From 46122082a61ef5bb2871d2d9158739133ad0e113 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Tue, 13 Feb 2024 15:17:28 +0100 Subject: [PATCH 001/240] [Object][COFF][NFC] Make writeImportLibrary NativeExports argument optional. (#81600) It's not interesting for majority of downstream users. --- lld/COFF/Driver.cpp | 12 ++++++------ llvm/include/llvm/Object/COFFImportFile.h | 18 ++++++++++++++---- llvm/lib/Object/COFFImportFile.cpp | 4 ++-- .../ToolDrivers/llvm-dlltool/DlltoolDriver.cpp | 5 ++--- llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp | 5 ++--- 5 files changed, 26 insertions(+), 18 deletions(-) diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 091aa0df207410..22ee2f133be98a 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -939,7 +939,7 @@ std::string LinkerDriver::getImportName(bool asLib) { void LinkerDriver::createImportLibrary(bool asLib) { llvm::TimeTraceScope timeScope("Create import library"); - std::vector exports, nativeExports; + std::vector exports; for (Export &e1 : ctx.config.exports) { COFFShortExport e2; e2.Name = std::string(e1.name); @@ -958,8 +958,8 @@ void LinkerDriver::createImportLibrary(bool asLib) { std::string path = getImplibPath(); if (!ctx.config.incremental) { - checkError(writeImportLibrary(libName, path, exports, nativeExports, - ctx.config.machine, ctx.config.mingw)); + checkError(writeImportLibrary(libName, path, exports, ctx.config.machine, + ctx.config.mingw)); return; } @@ -968,8 +968,8 @@ void LinkerDriver::createImportLibrary(bool asLib) { ErrorOr> oldBuf = MemoryBuffer::getFile( path, /*IsText=*/false, /*RequiresNullTerminator=*/false); if (!oldBuf) { - checkError(writeImportLibrary(libName, path, exports, nativeExports, - ctx.config.machine, ctx.config.mingw)); + checkError(writeImportLibrary(libName, path, exports, ctx.config.machine, + ctx.config.mingw)); return; } @@ -979,7 +979,7 @@ void LinkerDriver::createImportLibrary(bool asLib) { fatal("cannot create temporary file for import library " + path + ": " + ec.message()); - if (Error e = writeImportLibrary(libName, tmpName, exports, nativeExports, + if (Error e = writeImportLibrary(libName, tmpName, exports, ctx.config.machine, ctx.config.mingw)) { checkError(std::move(e)); return; diff --git a/llvm/include/llvm/Object/COFFImportFile.h b/llvm/include/llvm/Object/COFFImportFile.h index 23c3e6a1f0784a..402ded0d64fef2 100644 --- a/llvm/include/llvm/Object/COFFImportFile.h +++ b/llvm/include/llvm/Object/COFFImportFile.h @@ -135,10 +135,20 @@ struct COFFShortExport { } }; -Error writeImportLibrary(StringRef ImportName, StringRef Path, - ArrayRef Exports, - ArrayRef NativeExports, - COFF::MachineTypes Machine, bool MinGW); +/// Writes a COFF import library containing entries described by the Exports +/// array. +/// +/// For hybrid targets such as ARM64EC, additional native entry points can be +/// exposed using the NativeExports parameter. When NativeExports is used, the +/// output import library will expose these native ARM64 imports alongside the +/// entries described in the Exports array. Such a library can be used for +/// linking both ARM64EC and pure ARM64 objects, and the linker will pick only +/// the exports relevant to the target platform. For non-hybrid targets, +/// the NativeExports parameter should not be used. +Error writeImportLibrary( + StringRef ImportName, StringRef Path, ArrayRef Exports, + COFF::MachineTypes Machine, bool MinGW, + ArrayRef NativeExports = std::nullopt); } // namespace object } // namespace llvm diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp index 9175c3ee2a2c4b..f6f6cf2a1602cf 100644 --- a/llvm/lib/Object/COFFImportFile.cpp +++ b/llvm/lib/Object/COFFImportFile.cpp @@ -625,8 +625,8 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, Error writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef Exports, - ArrayRef NativeExports, - MachineTypes Machine, bool MinGW) { + MachineTypes Machine, bool MinGW, + ArrayRef NativeExports) { MachineTypes NativeMachine = isArm64EC(Machine) ? IMAGE_FILE_MACHINE_ARM64 : Machine; diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index 0749580c78a570..834903857a88eb 100644 --- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -215,9 +215,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { } } - if (!Path.empty() && - writeImportLibrary(Def->OutputFile, Path, Def->Exports, std::nullopt, - Machine, /*MinGW=*/true)) + if (!Path.empty() && writeImportLibrary(Def->OutputFile, Path, Def->Exports, + Machine, /*MinGW=*/true)) return 1; return 0; } diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 3baa0a08c73d1e..c3015d895230ea 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -418,9 +418,8 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { OutputFile = std::move(NativeDef->OutputFile); } - return writeImportLibrary(OutputFile, OutputPath, Def->Exports, - NativeExports, LibMachine, - /*MinGW=*/false) + return writeImportLibrary(OutputFile, OutputPath, Def->Exports, LibMachine, + /*MinGW=*/false, NativeExports) ? 1 : 0; } From d759618df76361a8e490eeae5c5399e0738cbfd0 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Tue, 13 Feb 2024 08:51:57 +0000 Subject: [PATCH 002/240] Reapply "[DebugInfo][RemoveDIs] Turn on non-instrinsic debug-info by default" This reapplies commit bdde5f9 by undoing the revert bc66e0c. The previous reapplication 5c9f768 was reverted due to a crash (reproducer in comments for 5c9f768) which was fixed in #81595. As noted in the original commit, this commit may break downstream tests. If this commit is breaking your downstream tests, please see comment 12 in [0], which documents the kind of variation in tests we'd expect to see from this change and what to do about it. [0] https://discourse.llvm.org/t/rfc-instruction-api-changes-needed-to-eliminate-debug-intrinsics-from-ir/68939 --- llvm/lib/IR/BasicBlock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index fe9d0d08c5fe97..bf02eba9fb448d 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -34,7 +34,7 @@ cl::opt UseNewDbgInfoFormat("experimental-debuginfo-iterators", cl::desc("Enable communicating debuginfo positions " "through iterators, eliminating intrinsics"), - cl::init(false)); + cl::init(true)); DPMarker *BasicBlock::createMarker(Instruction *I) { assert(IsNewDbgInfoFormat && From f7cddf80062848fbbb358d7e913650cc550d2547 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 13 Feb 2024 14:31:54 +0000 Subject: [PATCH 003/240] [TableGen] Use std::move instead of swap. NFC. (#81606) Historically TableGen has used `A.swap(B)` to move containers without the expense of copying them. Perhaps this predated rvalue references. In any case `A = std::move(B)` seems like a more direct way to implement this when only A is required after the operation. --- llvm/utils/TableGen/AsmMatcherEmitter.cpp | 9 ++++----- llvm/utils/TableGen/CodeGenRegisters.cpp | 6 +++--- llvm/utils/TableGen/CodeGenSchedule.cpp | 2 +- llvm/utils/TableGen/GlobalISelMatchTable.cpp | 4 ++-- llvm/utils/TableGen/SubtargetEmitter.cpp | 2 +- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index a2122659d4dd27..d6dc4b7881b830 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -1269,11 +1269,10 @@ void AsmMatcherInfo::buildRegisterClasses( } RegisterSet Tmp; - std::swap(Tmp, ContainingSet); - std::insert_iterator II(ContainingSet, - ContainingSet.begin()); - std::set_intersection(Tmp.begin(), Tmp.end(), RS.begin(), RS.end(), II, - LessRecordByID()); + std::insert_iterator II(Tmp, Tmp.begin()); + std::set_intersection(ContainingSet.begin(), ContainingSet.end(), + RS.begin(), RS.end(), II, LessRecordByID()); + ContainingSet = std::move(Tmp); } if (!ContainingSet.empty()) { diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index 5c74a6f3781222..25ef31097b53be 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -1964,9 +1964,9 @@ void CodeGenRegBank::pruneUnitSets() { for (unsigned i = 0, e = SuperSetIDs.size(); i != e; ++i) { unsigned SuperIdx = SuperSetIDs[i]; PrunedUnitSets[i].Name = RegUnitSets[SuperIdx].Name; - PrunedUnitSets[i].Units.swap(RegUnitSets[SuperIdx].Units); + PrunedUnitSets[i].Units = std::move(RegUnitSets[SuperIdx].Units); } - RegUnitSets.swap(PrunedUnitSets); + RegUnitSets = std::move(PrunedUnitSets); } // Create a RegUnitSet for each RegClass that contains all units in the class @@ -2139,7 +2139,7 @@ void CodeGenRegBank::computeRegUnitSets() { if (RCUnitSetsIdx == RegClassUnitSets.size()) { // Create a new list of UnitSets as a "fake" register class. RegClassUnitSets.resize(RCUnitSetsIdx + 1); - RegClassUnitSets[RCUnitSetsIdx].swap(RUSets); + RegClassUnitSets[RCUnitSetsIdx] = std::move(RUSets); } } } diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp index 9cebc427dbdbc7..e56bf5bdee634b 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -1788,7 +1788,7 @@ void CodeGenSchedModels::inferFromRW(ArrayRef OperWrites, for (const PredTransition &Trans : LastTransitions) SubstitutedAny |= Transitions.substituteVariants(Trans); LLVM_DEBUG(Transitions.dump()); - LastTransitions.swap(Transitions.TransVec); + LastTransitions = std::move(Transitions.TransVec); } while (SubstitutedAny); // WARNING: We are about to mutate the SchedClasses vector. Do not refer to diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/GlobalISelMatchTable.cpp index f7166ead9adc3d..d1bdc30849a7f6 100644 --- a/llvm/utils/TableGen/GlobalISelMatchTable.cpp +++ b/llvm/utils/TableGen/GlobalISelMatchTable.cpp @@ -545,8 +545,8 @@ void GroupMatcher::optimize() { if (T != E) F = ++T; } - optimizeRules(Matchers, MatcherStorage).swap(Matchers); - optimizeRules(Matchers, MatcherStorage).swap(Matchers); + Matchers = optimizeRules(Matchers, MatcherStorage); + Matchers = optimizeRules(Matchers, MatcherStorage); } //===- SwitchMatcher ------------------------------------------------------===// diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index b1502eaa20712a..ebe39167703c8c 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1649,7 +1649,7 @@ static void collectProcessorIndices(const CodeGenSchedClass &SC, IdxVec PI; std::set_union(&T.ProcIndex, &T.ProcIndex + 1, ProcIndices.begin(), ProcIndices.end(), std::back_inserter(PI)); - ProcIndices.swap(PI); + ProcIndices = std::move(PI); } } From d1f510cca8e966bd1742bf17256bfec99dcdf229 Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Tue, 13 Feb 2024 14:32:26 +0000 Subject: [PATCH 004/240] Fix warning by removing unused variable (#81604) Apparently, some compilers [correctly] warn that the variable that was created prior to this change is unused. This reemoves the variable. --- flang/lib/Semantics/canonicalize-omp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 01adcf53728424..0481b3d41f501d 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -92,7 +92,7 @@ class CanonicalizationOfOmp { nextIt = it; while (++nextIt != block.end()) { // Ignore compiler directives. - if (auto *directive{GetConstructIf(*nextIt)}) + if (GetConstructIf(*nextIt)) continue; if (auto *doCons{GetConstructIf(*nextIt)}) { From 5e5e51e9062895bed9fcf0dbb157d868be0adf8d Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 13 Feb 2024 14:34:12 +0000 Subject: [PATCH 005/240] Make use of std::inserter. NFC. --- llvm/lib/Support/DeltaAlgorithm.cpp | 6 +++--- llvm/utils/TableGen/AsmMatcherEmitter.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Support/DeltaAlgorithm.cpp b/llvm/lib/Support/DeltaAlgorithm.cpp index 341de244547cf4..d763cded6e7ea3 100644 --- a/llvm/lib/Support/DeltaAlgorithm.cpp +++ b/llvm/lib/Support/DeltaAlgorithm.cpp @@ -83,9 +83,9 @@ bool DeltaAlgorithm::Search(const changeset_ty &Changes, if (Sets.size() > 2) { // FIXME: This is really slow. changeset_ty Complement; - std::set_difference( - Changes.begin(), Changes.end(), it->begin(), it->end(), - std::insert_iterator(Complement, Complement.begin())); + std::set_difference(Changes.begin(), Changes.end(), it->begin(), + it->end(), + std::inserter(Complement, Complement.begin())); if (GetTestResult(Complement)) { changesetlist_ty ComplementSets; ComplementSets.insert(ComplementSets.end(), Sets.begin(), it); diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index d6dc4b7881b830..2c2c39a3ef54ff 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -1269,9 +1269,9 @@ void AsmMatcherInfo::buildRegisterClasses( } RegisterSet Tmp; - std::insert_iterator II(Tmp, Tmp.begin()); std::set_intersection(ContainingSet.begin(), ContainingSet.end(), - RS.begin(), RS.end(), II, LessRecordByID()); + RS.begin(), RS.end(), + std::inserter(Tmp, Tmp.begin()), LessRecordByID()); ContainingSet = std::move(Tmp); } From 38c706e30f5f339bfb0bfb26fd7b5c2d5086064a Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 13 Feb 2024 14:52:02 +0000 Subject: [PATCH 006/240] [GitHub][workflows] Ask reviewers to merge PRs when author cannot (#81142) This uses https://pygithub.readthedocs.io/en/stable/github_objects/Repository.html?highlight=get_collaborator_permission#github.Repository.Repository.get_collaborator_permission. Which does https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#get-repository-permissions-for-a-user and returns the top level "permission" key. This is less detailed than the user/permissions key but should be fine for this use case. When a review is submitted we check: * If it's an approval. * Whether we have already left a merge on behalf comment (by looking for a hidden HTML comment). * Whether the author has permissions to merge their own PR. * Whether the reviewer has permissions to merge. If needed we leave a comment tagging the reviewer. If the reviewer also doesn't have merge permission, then it asks them to find someone else who does. --- .github/workflows/approved-prs.yml | 39 +++++++++++++++++ llvm/utils/git/github-automation.py | 65 +++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 .github/workflows/approved-prs.yml diff --git a/.github/workflows/approved-prs.yml b/.github/workflows/approved-prs.yml new file mode 100644 index 00000000000000..309a9217e42d31 --- /dev/null +++ b/.github/workflows/approved-prs.yml @@ -0,0 +1,39 @@ +name: "Prompt reviewers to merge PRs on behalf of authors" + +permissions: + contents: read + +on: + pull_request_review: + types: + - submitted + +jobs: + merge-on-behalf-information-comment: + runs-on: ubuntu-latest + permissions: + pull-requests: write + if: >- + (github.repository == 'llvm/llvm-project') && + (github.event.review.state == 'APPROVED') + steps: + - name: Checkout Automation Script + uses: actions/checkout@v4 + with: + sparse-checkout: llvm/utils/git/ + ref: main + + - name: Setup Automation Script + working-directory: ./llvm/utils/git/ + run: | + pip install -r requirements.txt + + - name: Add Merge On Behalf Comment + working-directory: ./llvm/utils/git/ + run: | + python3 ./github-automation.py \ + --token '${{ secrets.GITHUB_TOKEN }}' \ + pr-merge-on-behalf-information \ + --issue-number "${{ github.event.pull_request.number }}" \ + --author "${{ github.event.pull_request.user.login }}" \ + --reviewer "${{ github.event.review.user.login }}" diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index b475eff06fc3eb..ccef274c4c1f7c 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -298,6 +298,55 @@ def run(self) -> bool: return True +class PRMergeOnBehalfInformation: + COMMENT_TAG = "\n" + + def __init__( + self, token: str, repo: str, pr_number: int, author: str, reviewer: str + ): + self.repo = github.Github(token).get_repo(repo) + self.pr = self.repo.get_issue(pr_number).as_pull_request() + self.author = author + self.reviewer = reviewer + + def can_merge(self, user: str) -> bool: + try: + return self.repo.get_collaborator_permission(user) in ["admin", "write"] + # There is a UnknownObjectException for this scenario, but this method + # does not use it. + except github.GithubException as e: + # 404 means the author was not found in the collaborator list, so we + # know they don't have push permissions. Anything else is a real API + # issue, raise it so it is visible. + if e.status != 404: + raise e + return False + + def run(self) -> bool: + # Check this first because it only costs 1 API point. + if self.can_merge(self.author): + return + + # A review can be approved more than once, only comment the first time. + for comment in self.pr.as_issue().get_comments(): + if self.COMMENT_TAG in comment.body: + return + + # This text is using Markdown formatting. + if self.can_merge(self.reviewer): + comment = f"""\ +{self.COMMENT_TAG} +@{self.reviewer} the PR author does not have permission to merge their own PRs yet. Please merge on their behalf.""" + else: + comment = f"""\ +{self.COMMENT_TAG} +@{self.reviewer} the author of this PR does not have permission to merge and neither do you. +Please find someone who has merge permissions who can merge it on the author's behalf. This could be one of the other reviewers or you can ask on [Discord](https://discord.com/invite/xS7Z362).""" + + self.pr.as_issue().create_comment(comment) + return True + + def setup_llvmbot_git(git_dir="."): """ Configure the git repo in `git_dir` with the llvmbot account so @@ -665,6 +714,17 @@ def execute_command(self) -> bool: pr_buildbot_information_parser.add_argument("--issue-number", type=int, required=True) pr_buildbot_information_parser.add_argument("--author", type=str, required=True) +pr_merge_on_behalf_information_parser = subparsers.add_parser( + "pr-merge-on-behalf-information" +) +pr_merge_on_behalf_information_parser.add_argument( + "--issue-number", type=int, required=True +) +pr_merge_on_behalf_information_parser.add_argument("--author", type=str, required=True) +pr_merge_on_behalf_information_parser.add_argument( + "--reviewer", type=str, required=True +) + release_workflow_parser = subparsers.add_parser("release-workflow") release_workflow_parser.add_argument( "--llvm-project-dir", @@ -724,6 +784,11 @@ def execute_command(self) -> bool: args.token, args.repo, args.issue_number, args.author ) pr_buildbot_information.run() +elif args.command == "pr-merge-on-behalf-information": + pr_merge_on_behalf_information = PRMergeOnBehalfInformation( + args.token, args.repo, args.issue_number, args.author, args.reviewer + ) + pr_merge_on_behalf_information.run() elif args.command == "release-workflow": release_workflow = ReleaseWorkflow( args.token, From 89c1bf1230e011f2f0e43554c278205fa1819de5 Mon Sep 17 00:00:00 2001 From: James Westwood Date: Tue, 13 Feb 2024 15:12:35 +0000 Subject: [PATCH 007/240] [ARM] __ARM_ARCH macro definition fix (#81493) This patch changes how the macro __ARM_ARCH is defined to match its defintion in the ACLE. In ACLE 5.4.1, __ARM_ARCH is defined as equal to the major architecture version for ISAs up to and including v8. From v8.1 onwards, its definition is changed to include minor versions, such that for an architecture vX.Y, __ARM_ARCH = X*100 + Y. Before this patch, LLVM defined __ARM_ARCH using only the major architecture version for all architecture versions. This patch adds functionality to define __ARM_ARCH correctly for architectures greater than or equal to v8.1. --- clang/docs/ReleaseNotes.rst | 2 + clang/lib/Basic/Targets/AArch64.cpp | 16 ++++- clang/lib/Basic/Targets/ARM.cpp | 14 ++++- clang/lib/Basic/Targets/ARM.h | 1 + clang/test/Preprocessor/arm-target-features.c | 34 +++++------ .../llvm/TargetParser/ARMTargetParser.h | 1 + llvm/lib/TargetParser/ARMTargetParser.cpp | 58 +++++++++++++++++++ .../TargetParser/TargetParserTest.cpp | 8 +++ 8 files changed, 112 insertions(+), 22 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index dd790236e03bb7..5c245b7b1bf488 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -255,6 +255,8 @@ X86 Support Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ +- Fixed the incorrect definition of the __ARM_ARCH macro for architectures greater than or equal to v8.1. + Android Support ^^^^^^^^^^^^^^^ diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 68032961451d90..dd0218e6ebed81 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -367,8 +367,20 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, // ACLE predefines. Many can only have one possible value on v8 AArch64. Builder.defineMacro("__ARM_ACLE", "200"); - Builder.defineMacro("__ARM_ARCH", - std::to_string(ArchInfo->Version.getMajor())); + + // __ARM_ARCH is defined as an integer value indicating the current ARM ISA. + // For ISAs up to and including v8, __ARM_ARCH is equal to the major version + // number. For ISAs from v8.1 onwards, __ARM_ARCH is scaled up to include the + // minor version number, e.g. for ARM architecture ARMvX.Y: + // __ARM_ARCH = X * 100 + Y. + if (ArchInfo->Version.getMajor() == 8 && ArchInfo->Version.getMinor() == 0) + Builder.defineMacro("__ARM_ARCH", + std::to_string(ArchInfo->Version.getMajor())); + else + Builder.defineMacro("__ARM_ARCH", + std::to_string(ArchInfo->Version.getMajor() * 100 + + ArchInfo->Version.getMinor().value())); + Builder.defineMacro("__ARM_ARCH_PROFILE", std::string("'") + (char)ArchInfo->Profile + "'"); diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 55b71557452fa0..cd7fb95259d9db 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -130,6 +130,7 @@ void ARMTargetInfo::setArchInfo(llvm::ARM::ArchKind Kind) { SubArch = llvm::ARM::getSubArch(ArchKind); ArchProfile = llvm::ARM::parseArchProfile(SubArch); ArchVersion = llvm::ARM::parseArchVersion(SubArch); + ArchMinorVersion = llvm::ARM::parseArchMinorVersion(SubArch); // cache CPU related strings CPUAttr = getCPUAttr(); @@ -736,9 +737,16 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, if (!CPUAttr.empty()) Builder.defineMacro("__ARM_ARCH_" + CPUAttr + "__"); - // ACLE 6.4.1 ARM/Thumb instruction set architecture - // __ARM_ARCH is defined as an integer value indicating the current ARM ISA - Builder.defineMacro("__ARM_ARCH", Twine(ArchVersion)); + // __ARM_ARCH is defined as an integer value indicating the current ARM ISA. + // For ISAs up to and including v8, __ARM_ARCH is equal to the major version + // number. For ISAs from v8.1 onwards, __ARM_ARCH is scaled up to include the + // minor version number, e.g. for ARM architecture ARMvX.Y: + // __ARM_ARCH = X * 100 + Y. + if (ArchVersion >= 9 || ArchMinorVersion != 0) + Builder.defineMacro("__ARM_ARCH", + Twine(ArchVersion * 100 + ArchMinorVersion)); + else + Builder.defineMacro("__ARM_ARCH", Twine(ArchVersion)); if (ArchVersion >= 8) { // ACLE 6.5.7 Crypto Extension diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index 71322a094f5edb..df06e4d120637a 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -60,6 +60,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { llvm::ARM::ArchKind ArchKind = llvm::ARM::ArchKind::ARMV4T; llvm::ARM::ProfileKind ArchProfile; unsigned ArchVersion; + unsigned ArchMinorVersion; LLVM_PREFERRED_TYPE(FPUMode) unsigned FPU : 5; diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index 236c9f2479b705..733d068b09b1fe 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -737,7 +737,7 @@ // Test whether predefines are as expected when targeting cortex-m55 (softfp FP ABI as default). // RUN: %clang -target arm-eabi -mcpu=cortex-m55 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M55 %s -// M55: #define __ARM_ARCH 8 +// M55: #define __ARM_ARCH 801 // M55: #define __ARM_ARCH_8_1M_MAIN__ 1 // M55: #define __ARM_ARCH_EXT_IDIV__ 1 // M55-NOT: __ARM_ARCH_ISA_ARM @@ -764,7 +764,7 @@ // KRAIT-ALLOW-FP-INSTR:#define __ARM_VFPV4__ 1 // RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M %s -// CHECK-V81M: #define __ARM_ARCH 8 +// CHECK-V81M: #define __ARM_ARCH 801 // CHECK-V81M: #define __ARM_ARCH_8_1M_MAIN__ 1 // CHECK-V81M: #define __ARM_ARCH_ISA_THUMB 2 // CHECK-V81M: #define __ARM_ARCH_PROFILE 'M' @@ -821,14 +821,14 @@ // CHECK-V8M-CDE-MASK2: #define __ARM_FEATURE_CDE_COPROC 0xff // RUN: %clang -target armv8.1a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81A %s -// CHECK-V81A: #define __ARM_ARCH 8 +// CHECK-V81A: #define __ARM_ARCH 801 // CHECK-V81A: #define __ARM_ARCH_8_1A__ 1 // CHECK-V81A: #define __ARM_ARCH_PROFILE 'A' // CHECK-V81A: #define __ARM_FEATURE_QRDMX 1 // CHECK-V81A: #define __ARM_FP 0xe // RUN: %clang -target armv8.2a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V82A %s -// CHECK-V82A: #define __ARM_ARCH 8 +// CHECK-V82A: #define __ARM_ARCH 802 // CHECK-V82A: #define __ARM_ARCH_8_2A__ 1 // CHECK-V82A: #define __ARM_ARCH_PROFILE 'A' // CHECK-V82A: #define __ARM_FEATURE_QRDMX 1 @@ -838,67 +838,67 @@ // CHECK-DRIVERKIT-NOT: #define __ARM_PCS_VFP 1 // RUN: %clang -target armv8.3a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V83A %s -// CHECK-V83A: #define __ARM_ARCH 8 +// CHECK-V83A: #define __ARM_ARCH 803 // CHECK-V83A: #define __ARM_ARCH_8_3A__ 1 // CHECK-V83A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv8.4a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V84A %s -// CHECK-V84A: #define __ARM_ARCH 8 +// CHECK-V84A: #define __ARM_ARCH 804 // CHECK-V84A: #define __ARM_ARCH_8_4A__ 1 // CHECK-V84A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv8.5a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V85A %s -// CHECK-V85A: #define __ARM_ARCH 8 +// CHECK-V85A: #define __ARM_ARCH 805 // CHECK-V85A: #define __ARM_ARCH_8_5A__ 1 // CHECK-V85A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv8.6a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V86A %s -// CHECK-V86A: #define __ARM_ARCH 8 +// CHECK-V86A: #define __ARM_ARCH 806 // CHECK-V86A: #define __ARM_ARCH_8_6A__ 1 // CHECK-V86A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv8.7a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V87A %s -// CHECK-V87A: #define __ARM_ARCH 8 +// CHECK-V87A: #define __ARM_ARCH 807 // CHECK-V87A: #define __ARM_ARCH_8_7A__ 1 // CHECK-V87A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv8.8a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V88A %s -// CHECK-V88A: #define __ARM_ARCH 8 +// CHECK-V88A: #define __ARM_ARCH 808 // CHECK-V88A: #define __ARM_ARCH_8_8A__ 1 // CHECK-V88A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv8.9a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V89A %s -// CHECK-V89A: #define __ARM_ARCH 8 +// CHECK-V89A: #define __ARM_ARCH 809 // CHECK-V89A: #define __ARM_ARCH_8_9A__ 1 // CHECK-V89A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv9a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V9A %s -// CHECK-V9A: #define __ARM_ARCH 9 +// CHECK-V9A: #define __ARM_ARCH 900 // CHECK-V9A: #define __ARM_ARCH_9A__ 1 // CHECK-V9A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv9.1a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V91A %s -// CHECK-V91A: #define __ARM_ARCH 9 +// CHECK-V91A: #define __ARM_ARCH 901 // CHECK-V91A: #define __ARM_ARCH_9_1A__ 1 // CHECK-V91A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv9.2a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V92A %s -// CHECK-V92A: #define __ARM_ARCH 9 +// CHECK-V92A: #define __ARM_ARCH 902 // CHECK-V92A: #define __ARM_ARCH_9_2A__ 1 // CHECK-V92A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv9.3a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V93A %s -// CHECK-V93A: #define __ARM_ARCH 9 +// CHECK-V93A: #define __ARM_ARCH 903 // CHECK-V93A: #define __ARM_ARCH_9_3A__ 1 // CHECK-V93A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv9.4a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V94A %s -// CHECK-V94A: #define __ARM_ARCH 9 +// CHECK-V94A: #define __ARM_ARCH 904 // CHECK-V94A: #define __ARM_ARCH_9_4A__ 1 // CHECK-V94A: #define __ARM_ARCH_PROFILE 'A' // RUN: %clang -target armv9.5a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V95A %s -// CHECK-V95A: #define __ARM_ARCH 9 +// CHECK-V95A: #define __ARM_ARCH 905 // CHECK-V95A: #define __ARM_ARCH_9_5A__ 1 // CHECK-V95A: #define __ARM_ARCH_PROFILE 'A' diff --git a/llvm/include/llvm/TargetParser/ARMTargetParser.h b/llvm/include/llvm/TargetParser/ARMTargetParser.h index c42d66f048fccc..ec3817134a5ac0 100644 --- a/llvm/include/llvm/TargetParser/ARMTargetParser.h +++ b/llvm/include/llvm/TargetParser/ARMTargetParser.h @@ -258,6 +258,7 @@ uint64_t parseArchExt(StringRef ArchExt); ArchKind parseCPUArch(StringRef CPU); ProfileKind parseArchProfile(StringRef Arch); unsigned parseArchVersion(StringRef Arch); +unsigned parseArchMinorVersion(StringRef Arch); void fillValidCPUArchList(SmallVectorImpl &Values); StringRef computeDefaultTargetABI(const Triple &TT, StringRef CPU); diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp index 67f937ebc33f9f..fac701946e282c 100644 --- a/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -94,6 +94,64 @@ unsigned ARM::parseArchVersion(StringRef Arch) { llvm_unreachable("Unhandled architecture"); } +unsigned ARM::parseArchMinorVersion(StringRef Arch) { + Arch = getCanonicalArchName(Arch); + switch (parseArch(Arch)) { + case ArchKind::ARMV4: + case ArchKind::ARMV4T: + case ArchKind::ARMV5T: + case ArchKind::ARMV5TE: + case ArchKind::IWMMXT: + case ArchKind::IWMMXT2: + case ArchKind::XSCALE: + case ArchKind::ARMV5TEJ: + case ArchKind::ARMV6: + case ArchKind::ARMV6K: + case ArchKind::ARMV6T2: + case ArchKind::ARMV6KZ: + case ArchKind::ARMV6M: + case ArchKind::ARMV7A: + case ArchKind::ARMV7VE: + case ArchKind::ARMV7R: + case ArchKind::ARMV7M: + case ArchKind::ARMV7S: + case ArchKind::ARMV7EM: + case ArchKind::ARMV7K: + case ArchKind::ARMV8A: + case ArchKind::ARMV8R: + case ArchKind::ARMV8MBaseline: + case ArchKind::ARMV8MMainline: + case ArchKind::ARMV9A: + case ArchKind::INVALID: + return 0; + case ArchKind::ARMV8_1A: + case ArchKind::ARMV8_1MMainline: + case ArchKind::ARMV9_1A: + return 1; + case ArchKind::ARMV8_2A: + case ArchKind::ARMV9_2A: + return 2; + case ArchKind::ARMV8_3A: + case ArchKind::ARMV9_3A: + return 3; + case ArchKind::ARMV8_4A: + case ArchKind::ARMV9_4A: + return 4; + case ArchKind::ARMV8_5A: + case ArchKind::ARMV9_5A: + return 5; + case ArchKind::ARMV8_6A: + return 6; + case ArchKind::ARMV8_7A: + return 7; + case ArchKind::ARMV8_8A: + return 8; + case ArchKind::ARMV8_9A: + return 9; + } + llvm_unreachable("Unhandled architecture"); +} + static ARM::ProfileKind getProfileKind(ARM::ArchKind AK) { switch (AK) { case ARM::ArchKind::ARMV6M: diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index e89fc687451cd7..c6ee39fa416021 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -976,6 +976,14 @@ TEST(TargetParserTest, ARMparseArchVersion) { EXPECT_EQ(5u, ARM::parseArchVersion(ARMArch[i])); } +TEST(TargetParserTest, ARMparseArchMinorVersion) { + for (unsigned i = 0; i < std::size(ARMArch); i++) + if (((std::string)ARMArch[i]).find(".") == 5) + EXPECT_EQ((ARMArch[i][6] - 48u), ARM::parseArchMinorVersion(ARMArch[i])); + else + EXPECT_EQ(0u, ARM::parseArchMinorVersion(ARMArch[i])); +} + TEST(TargetParserTest, getARMCPUForArch) { // Platform specific defaults. { From 25b9ed6e4964344e3710359bec4c831e5a8448b9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 13 Feb 2024 16:41:00 +0100 Subject: [PATCH 008/240] [DAGCombine] Fix multi-use miscompile in load combine (#81586) The load combine replaces a number of original loads with one new loads and also replaces the output chains of the original loads with the output chain of the new load. This is incorrect if the original load is retained (due to multi-use), as it may get incorrectly reordered. Fix this by using makeEquivalentMemoryOrdering() instead, which will create a TokenFactor with both chains. Fixes https://github.com/llvm/llvm-project/issues/80911. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/test/CodeGen/X86/load-combine.ll | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d3cd9b1671e1b9..52011e593f2e0a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9252,7 +9252,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) - DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); + DAG.makeEquivalentMemoryOrdering(L, NewLoad); if (!NeedsBswap) return NewLoad; diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index 7e4e11fcc75c20..b5f3e789918813 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -1283,7 +1283,6 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) { ret i32 %tmp8 } -; FIXME: This is a miscompile. define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind { ; CHECK-LABEL: pr80911_vector_load_multiuse: ; CHECK: # %bb.0: @@ -1299,9 +1298,9 @@ define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind { ; ; CHECK64-LABEL: pr80911_vector_load_multiuse: ; CHECK64: # %bb.0: +; CHECK64-NEXT: movl (%rdi), %ecx ; CHECK64-NEXT: movzwl (%rdi), %eax ; CHECK64-NEXT: movl $0, (%rsi) -; CHECK64-NEXT: movl (%rdi), %ecx ; CHECK64-NEXT: movl %ecx, (%rdi) ; CHECK64-NEXT: retq %load = load <4 x i8>, ptr %ptr, align 16 From 4ad9f5be8348374ed2bfff32842f395f6e5f41a4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 13 Feb 2024 07:45:55 -0800 Subject: [PATCH 009/240] ci: Temporarily disable the buildkite job on Windows (#81538) The failure rate is too high. See https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840 --- .ci/generate-buildkite-pipeline-premerge | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.ci/generate-buildkite-pipeline-premerge b/.ci/generate-buildkite-pipeline-premerge index 4ebf304e23d587..c14ec464a43a66 100755 --- a/.ci/generate-buildkite-pipeline-premerge +++ b/.ci/generate-buildkite-pipeline-premerge @@ -233,7 +233,10 @@ linux_projects=$(add-dependencies ${linux_projects_to_test} | sort | uniq) windows_projects_to_test=$(exclude-windows $(compute-projects-to-test ${modified_projects})) windows_check_targets=$(check-targets ${windows_projects_to_test} | sort | uniq) -windows_projects=$(add-dependencies ${windows_projects_to_test} | sort | uniq) +# Temporary disable the windows job. +# See https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840 +#windows_projects=$(add-dependencies ${windows_projects_to_test} | sort | uniq) +windows_projects="" # Generate the appropriate pipeline if [[ "${linux_projects}" != "" ]]; then From 192c23b0c7e5d3daefc2ad7d86c095509749eacd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 13 Feb 2024 15:56:38 +0000 Subject: [PATCH 010/240] [SLP] Add X86 version of non-power-of-2 vectorization tests. Extra X86 tests for https://github.com/llvm/llvm-project/pull/77790. --- .../Transforms/SLPVectorizer/X86/vec3-base.ll | 317 +++++++++++ .../SLPVectorizer/X86/vec3-calls.ll | 60 ++ .../X86/vec3-gather-some-loads.ll | 66 +++ .../X86/vec3-reorder-reshuffle.ll | 513 ++++++++++++++++++ 4 files changed, 956 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/vec3-gather-some-loads.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll new file mode 100644 index 00000000000000..6560fc6a145264 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll @@ -0,0 +1,317 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-apple-macosx -S %s | FileCheck %s + +define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { +; CHECK-LABEL: @v3_load_i32_mul_by_constant_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4 +; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1 +; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 +; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2 +; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 +; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 +; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4 +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 +; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0 + %l.src.0 = load i32, ptr %gep.src.0, align 4 + %mul.0 = mul nsw i32 %l.src.0, 10 + + %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1 + %l.src.1 = load i32, ptr %gep.src.1, align 4 + %mul.1 = mul nsw i32 %l.src.1, 10 + + %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2 + %l.src.2 = load i32, ptr %gep.src.2, align 4 + %mul.2 = mul nsw i32 %l.src.2, 10 + + store i32 %mul.0, ptr %dst + + %dst.1 = getelementptr i32, ptr %dst, i32 1 + store i32 %mul.1, ptr %dst.1 + + %dst.2 = getelementptr i32, ptr %dst, i32 2 + store i32 %mul.2, ptr %dst.2 + + ret void +} + +define void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) { +; CHECK-LABEL: @v3_load_i32_mul_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 +; CHECK-NEXT: [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4 +; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 +; CHECK-NEXT: [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4 +; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]] +; CHECK-NEXT: [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1 +; CHECK-NEXT: [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4 +; CHECK-NEXT: [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1 +; CHECK-NEXT: [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4 +; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]] +; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2 +; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4 +; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2 +; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]] +; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 +; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4 +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 +; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0 + %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4 + %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0 + %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4 + %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0 + + %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1 + %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4 + %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1 + %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4 + %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1 + + %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2 + %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4 + %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2 + %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4 + %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2 + + store i32 %mul.0, ptr %dst + + %dst.1 = getelementptr i32, ptr %dst, i32 1 + store i32 %mul.1, ptr %dst.1 + + %dst.2 = getelementptr i32, ptr %dst, i32 2 + store i32 %mul.2, ptr %dst.2 + + ret void +} + +define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { +; CHECK-LABEL: @v3_load_i32_mul_add_const_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 +; CHECK-NEXT: [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4 +; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 +; CHECK-NEXT: [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4 +; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]] +; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], 9 +; CHECK-NEXT: [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1 +; CHECK-NEXT: [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4 +; CHECK-NEXT: [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1 +; CHECK-NEXT: [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4 +; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]] +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_1]], 9 +; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2 +; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4 +; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2 +; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]] +; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], 9 +; CHECK-NEXT: store i32 [[ADD_0]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 +; CHECK-NEXT: store i32 [[ADD_1]], ptr [[DST_1]], align 4 +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 +; CHECK-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0 + %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4 + %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0 + %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4 + %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0 + %add.0 = add i32 %mul.0, 9 + + %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1 + %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4 + %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1 + %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4 + %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1 + %add.1 = add i32 %mul.1, 9 + + %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2 + %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4 + %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2 + %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4 + %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2 + %add.2 = add i32 %mul.2, 9 + + store i32 %add.0, ptr %dst + + %dst.1 = getelementptr i32, ptr %dst, i32 1 + store i32 %add.1, ptr %dst.1 + + %dst.2 = getelementptr i32, ptr %dst, i32 2 + store i32 %add.2, ptr %dst.2 + + ret void +} + +define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { +; CHECK-LABEL: @v3_load_f32_fadd_fadd_by_constant_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2 +; CHECK-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 +; CHECK-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0 + %l.src.0 = load float , ptr %gep.src.0, align 4 + %fadd.0 = fadd float %l.src.0, 10.0 + + %gep.src.1 = getelementptr inbounds float , ptr %src, i32 1 + %l.src.1 = load float, ptr %gep.src.1, align 4 + %fadd.1 = fadd float %l.src.1, 10.0 + + %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2 + %l.src.2 = load float, ptr %gep.src.2, align 4 + %fadd.2 = fadd float %l.src.2, 10.0 + + store float %fadd.0, ptr %dst + + %dst.1 = getelementptr float, ptr %dst, i32 1 + store float %fadd.1, ptr %dst.1 + + %dst.2 = getelementptr float, ptr %dst, i32 2 + store float %fadd.2, ptr %dst.2 + + ret void +} + +define void @phi_store3(ptr %dst) { +; CHECK-LABEL: @phi_store3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: invoke.cont8.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ , [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ] +; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2 +; CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4 +; CHECK-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %exit + +invoke.cont8.loopexit: ; No predecessors! + br label %exit + +exit: + %p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ] + %p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ] + %p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ] + + %dst.1 = getelementptr i32, ptr %dst, i32 1 + %dst.2 = getelementptr i32, ptr %dst, i32 2 + + store i32 %p.0, ptr %dst, align 4 + store i32 %p.1, ptr %dst.1, align 4 + store i32 %p.2, ptr %dst.2, align 4 + ret void +} + +define void @store_try_reorder(ptr %dst) { +; CHECK-LABEL: @store_try_reorder( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0 +; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 +; CHECK-NEXT: ret void +; +entry: + %add = add i32 0, 0 + store i32 %add, ptr %dst, align 4 + %add207 = sub i32 0, 0 + %arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1 + store i32 %add207, ptr %arrayidx.i1887, align 4 + %add216 = sub i32 0, 0 + %arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2 + store i32 %add216, ptr %arrayidx.i1891, align 4 + ret void +} + +define void @vec3_fpext_cost(ptr %Colour, float %0) { +; CHECK-LABEL: @vec3_fpext_cost( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) +; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float> +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4 +; CHECK-NEXT: [[CONV78:%.*]] = fpext float [[TMP0]] to double +; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00) +; CHECK-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP6]] to float +; CHECK-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx72 = getelementptr float, ptr %Colour, i64 1 + %arrayidx80 = getelementptr float, ptr %Colour, i64 2 + %conv62 = fpext float %0 to double + %1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00) + %conv66 = fptrunc double %1 to float + store float %conv66, ptr %Colour, align 4 + %conv70 = fpext float %0 to double + %2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00) + %conv74 = fptrunc double %2 to float + store float %conv74, ptr %arrayidx72, align 4 + %conv78 = fpext float %0 to double + %3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00) + %conv82 = fptrunc double %3 to float + store float %conv82, ptr %arrayidx80, align 4 + ret void +} + +define void @fpext_gather(ptr %dst, double %conv) { +; CHECK-LABEL: @fpext_gather( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float> +; CHECK-NEXT: [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: store float [[TMP3]], ptr [[LENGTHS]], align 4 +; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[ARRAYIDX32]], align 4 +; CHECK-NEXT: ret void +; +entry: + %conv25 = fptrunc double %conv to float + %Lengths = getelementptr float, ptr %dst, i64 0 + store float %conv25, ptr %Lengths, align 4 + %arrayidx32 = getelementptr float, ptr %dst, i64 1 + store float %conv25, ptr %arrayidx32, align 4 + %conv34 = fptrunc double %conv to float + %arrayidx37 = getelementptr float, ptr %dst, i64 2 + store float %conv34, ptr %arrayidx37, align 4 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) + +declare double @llvm.fmuladd.f64(double, double, double) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll new file mode 100644 index 00000000000000..71b9315839ecff --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-apple-macosx -S %s | FileCheck %s + +define void @vec3_vectorize_call(ptr %Colour, float %0) { +; CHECK-LABEL: @vec3_vectorize_call( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP1]], <2 x float> zeroinitializer, <2 x float> zeroinitializer) +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[COLOUR]], align 4 +; CHECK-NEXT: [[ARRAYIDX99_I1:%.*]] = getelementptr float, ptr [[COLOUR]], i64 2 +; CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0:%.*]], float 0.000000e+00, float 0.000000e+00) +; CHECK-NEXT: store float [[TMP3]], ptr [[ARRAYIDX99_I1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %1 = load float, ptr %Colour, align 4 + %2 = call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float 0.000000e+00) + store float %2, ptr %Colour, align 4 + %arrayidx91.i = getelementptr float, ptr %Colour, i64 1 + %3 = load float, ptr %arrayidx91.i, align 4 + %4 = call float @llvm.fmuladd.f32(float %3, float 0.000000e+00, float 0.000000e+00) + store float %4, ptr %arrayidx91.i, align 4 + %arrayidx99.i1 = getelementptr float, ptr %Colour, i64 2 + %5 = call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00) + store float %5, ptr %arrayidx99.i1, align 4 + ret void +} + +define void @vec3_fmuladd_64(ptr %Colour, double %0) { +; CHECK-LABEL: @vec3_fmuladd_64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = fptrunc <2 x double> [[TMP3]] to <2 x float> +; CHECK-NEXT: store <2 x float> [[TMP4]], ptr [[COLOUR]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fmuladd.f64(double [[TMP0]], double 0.000000e+00, double 0.000000e+00) +; CHECK-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP5]] to float +; CHECK-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx72 = getelementptr float, ptr %Colour, i64 1 + %arrayidx80 = getelementptr float, ptr %Colour, i64 2 + %1 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00) + %conv66 = fptrunc double %1 to float + store float %conv66, ptr %Colour, align 4 + %2 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00) + %conv74 = fptrunc double %2 to float + store float %conv74, ptr %arrayidx72, align 4 + %3 = call double @llvm.fmuladd.f64(double %0, double 0.000000e+00, double 0.000000e+00) + %conv82 = fptrunc double %3 to float + store float %conv82, ptr %arrayidx80, align 4 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) + +declare double @llvm.fmuladd.f64(double, double, double) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-gather-some-loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-gather-some-loads.ll new file mode 100644 index 00000000000000..1411f9416f69df --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-gather-some-loads.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-apple-macosx -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + +define void @test_insert_loads(ptr %A, ptr noalias %B, float %0) #0 { +; CHECK-LABEL: define void @test_insert_loads( +; CHECK-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MULADD_0:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 1.000000e+00, float 1.000000e+00) +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> , <2 x float> ) +; CHECK-NEXT: [[A_28:%.*]] = getelementptr i8, ptr [[A]], i64 28 +; CHECK-NEXT: [[L_A_28:%.*]] = load float, ptr [[A_28]], align 4 +; CHECK-NEXT: [[A_12:%.*]] = getelementptr i8, ptr [[A]], i64 12 +; CHECK-NEXT: [[L_A_12:%.*]] = load float, ptr [[A_12]], align 4 +; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[B]], i64 4 +; CHECK-NEXT: [[L_B_0:%.*]] = load float, ptr [[B]], align 4 +; CHECK-NEXT: [[GEP_28:%.*]] = getelementptr i8, ptr [[B]], i64 28 +; CHECK-NEXT: [[GEP_20:%.*]] = getelementptr i8, ptr [[B]], i64 20 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> , float [[L_A_12]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[L_A_28]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> , float [[L_B_0]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP5]], <4 x float> [[TMP8]], <4 x float> [[TMP9]]) +; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[GEP_4]], align 4 +; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[GEP_20]], align 4 +; CHECK-NEXT: store float [[MULADD_0]], ptr [[GEP_28]], align 4 +; CHECK-NEXT: ret void +; +entry: + %muladd.0 = tail call float @llvm.fmuladd.f32(float %0, float 1.000000e+00, float 1.000000e+00) + %muladd.1 = tail call float @llvm.fmuladd.f32(float %0, float 2.000000e+00, float 2.000000e+00) + %muladd.2 = tail call float @llvm.fmuladd.f32(float %0, float 3.000000e+00, float 3.000000e+00) + %muladd.3 = tail call float @llvm.fmuladd.f32(float %0, float 4.000000e+00, float 4.000000e+00) + %A.28 = getelementptr i8, ptr %A, i64 28 + %l.A.28 = load float, ptr %A.28, align 4 + %muladd.4 = tail call float @llvm.fmuladd.f32(float %0, float %l.A.28, float 0.000000e+00) + %muladd.5 = tail call float @llvm.fmuladd.f32(float %0, float %l.A.28, float 0.000000e+00) + %A.12 = getelementptr i8, ptr %A, i64 12 + %l.A.12 = load float, ptr %A.12, align 4 + %gep.4 = getelementptr i8, ptr %B, i64 4 + %gep.12 = getelementptr i8, ptr %B, i64 12 + %l.B.0 = load float, ptr %B, align 4 + %muladd.6 = tail call float @llvm.fmuladd.f32(float %0, float %l.A.12, float %l.B.0) + %gep.28 = getelementptr i8, ptr %B, i64 28 + %gep.24 = getelementptr i8, ptr %B, i64 24 + %gep.20 = getelementptr i8, ptr %B, i64 20 + %gep.16 = getelementptr i8, ptr %B, i64 16 + %gep.8 = getelementptr i8, ptr %B, i64 8 + store float %muladd.6, ptr %gep.4, align 4 + store float %muladd.5, ptr %gep.8, align 8 + store float %muladd.4, ptr %gep.12, align 4 + store float %muladd.3, ptr %gep.16, align 16 + store float %muladd.2, ptr %gep.20, align 4 + store float %muladd.1, ptr %gep.24, align 8 + store float %muladd.0, ptr %gep.28, align 4 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) + +attributes #0 = { "target-cpu"="skylake-avx512" } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll new file mode 100644 index 00000000000000..9584a663b2d486 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll @@ -0,0 +1,513 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-apple-macosx -S %s | FileCheck %s + +%struct.zot = type { i32, i32, i32 } + +define i1 @reorder_results(ptr %arg, i1 %arg1, ptr %arg2, i64 %arg3, ptr %arg4) { +; CHECK-LABEL: define i1 @reorder_results( +; CHECK-SAME: ptr [[ARG:%.*]], i1 [[ARG1:%.*]], ptr [[ARG2:%.*]], i64 [[ARG3:%.*]], ptr [[ARG4:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG4]], align 8 +; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[LOAD]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[LOAD]], i64 4 +; CHECK-NEXT: [[LOAD5:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR6:%.*]] = getelementptr i8, ptr [[LOAD]], i64 8 +; CHECK-NEXT: [[LOAD7:%.*]] = load i32, ptr [[GETELEMENTPTR6]], align 4 +; CHECK-NEXT: br i1 [[ARG1]], label [[BB12:%.*]], label [[BB9:%.*]] +; CHECK: bb8: +; CHECK-NEXT: ret i1 false +; CHECK: bb9: +; CHECK-NEXT: [[FREEZE:%.*]] = freeze ptr [[ARG]] +; CHECK-NEXT: store i32 [[LOAD4]], ptr [[FREEZE]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR10:%.*]] = getelementptr i8, ptr [[FREEZE]], i64 4 +; CHECK-NEXT: store i32 [[LOAD7]], ptr [[GETELEMENTPTR10]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR11:%.*]] = getelementptr i8, ptr [[FREEZE]], i64 8 +; CHECK-NEXT: store i32 [[LOAD5]], ptr [[GETELEMENTPTR11]], align 4 +; CHECK-NEXT: br label [[BB8:%.*]] +; CHECK: bb12: +; CHECK-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr [[STRUCT_ZOT:%.*]], ptr [[ARG2]], i64 [[ARG3]] +; CHECK-NEXT: store i32 [[LOAD4]], ptr [[GETELEMENTPTR13]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR14:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR13]], i64 4 +; CHECK-NEXT: store i32 [[LOAD7]], ptr [[GETELEMENTPTR14]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR15:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR13]], i64 8 +; CHECK-NEXT: store i32 [[LOAD5]], ptr [[GETELEMENTPTR15]], align 4 +; CHECK-NEXT: br label [[BB8]] +; +bb: + %load = load ptr, ptr %arg4, align 8 + %load4 = load i32, ptr %load, align 4 + %getelementptr = getelementptr i8, ptr %load, i64 4 + %load5 = load i32, ptr %getelementptr, align 4 + %getelementptr6 = getelementptr i8, ptr %load, i64 8 + %load7 = load i32, ptr %getelementptr6, align 4 + br i1 %arg1, label %bb12, label %bb9 + +bb8: ; preds = %bb12, %bb9 + ret i1 false + +bb9: ; preds = %bb + %freeze = freeze ptr %arg + store i32 %load4, ptr %freeze, align 4 + %getelementptr10 = getelementptr i8, ptr %freeze, i64 4 + store i32 %load7, ptr %getelementptr10, align 4 + %getelementptr11 = getelementptr i8, ptr %freeze, i64 8 + store i32 %load5, ptr %getelementptr11, align 4 + br label %bb8 + +bb12: ; preds = %bb + %getelementptr13 = getelementptr %struct.zot, ptr %arg2, i64 %arg3 + store i32 %load4, ptr %getelementptr13, align 4 + %getelementptr14 = getelementptr i8, ptr %getelementptr13, i64 4 + store i32 %load7, ptr %getelementptr14, align 4 + %getelementptr15 = getelementptr i8, ptr %getelementptr13, i64 8 + store i32 %load5, ptr %getelementptr15, align 4 + br label %bb8 +} + +define void @extract_mask(ptr %object, double %conv503, double %conv520) { +; CHECK-LABEL: define void @extract_mask( +; CHECK-SAME: ptr [[OBJECT:%.*]], double [[CONV503:%.*]], double [[CONV520:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OBJECT]], align 8 +; CHECK-NEXT: [[BBOX483:%.*]] = getelementptr float, ptr [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[BBOX483]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[CONV503]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x double> [[TMP3]], <2 x double> +; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <2 x double> [[TMP7]] to <2 x float> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 +; CHECK-NEXT: [[MUL646:%.*]] = fmul float [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[CMP663:%.*]] = fcmp olt float [[MUL646]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP663]], label [[IF_THEN665:%.*]], label [[IF_END668:%.*]] +; CHECK: if.then665: +; CHECK-NEXT: [[ARRAYIDX656:%.*]] = getelementptr float, ptr [[OBJECT]], i64 10 +; CHECK-NEXT: [[BBOX651:%.*]] = getelementptr float, ptr [[OBJECT]] +; CHECK-NEXT: [[CONV613:%.*]] = fptrunc double [[CONV503]] to float +; CHECK-NEXT: store float [[CONV613]], ptr [[BBOX651]], align 8 +; CHECK-NEXT: [[BBOX_SROA_6_0_BBOX666_SROA_IDX:%.*]] = getelementptr float, ptr [[OBJECT]], i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP6]], double [[CONV520]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <2 x double> [[TMP11]] to <2 x float> +; CHECK-NEXT: store <2 x float> [[TMP12]], ptr [[BBOX_SROA_6_0_BBOX666_SROA_IDX]], align 4 +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[ARRAYIDX656]], align 8 +; CHECK-NEXT: br label [[IF_END668]] +; CHECK: if.end668: +; CHECK-NEXT: ret void +; +entry: + %0 = load ptr, ptr %object, align 8 + %bbox483 = getelementptr float, ptr %0 + %1 = load float, ptr %bbox483, align 8 + %conv486 = fpext float %1 to double + %cmp487 = fcmp ogt double %conv486, -2.000000e+10 + %conv486.2 = select i1 %cmp487, double %conv486, double -2.000000e+10 + %arrayidx502 = getelementptr float, ptr %0, i64 1 + %2 = load float, ptr %arrayidx502, align 4 + %conv5033 = fpext float %2 to double + %cmp504 = fcmp ogt double %conv503, 0.000000e+00 + %cond514 = select i1 %cmp504, double %conv5033, double 0.000000e+00 + %sub626 = fsub double 0.000000e+00, %conv486.2 + %conv627 = fptrunc double %sub626 to float + %sub632 = fsub double 0.000000e+00, %cond514 + %conv633 = fptrunc double %sub632 to float + %mul646 = fmul float %conv633, %conv627 + %cmp663 = fcmp olt float %mul646, 0.000000e+00 + br i1 %cmp663, label %if.then665, label %if.end668 + +if.then665: ; preds = %entry + %arrayidx656 = getelementptr float, ptr %object, i64 10 + %lengths652 = getelementptr float, ptr %object, i64 11 + %bbox651 = getelementptr float, ptr %object + %conv621 = fptrunc double %conv520 to float + %conv617 = fptrunc double %cond514 to float + %conv613 = fptrunc double %conv503 to float + store float %conv613, ptr %bbox651, align 8 + %bbox.sroa.6.0.bbox666.sroa_idx = getelementptr float, ptr %object, i64 1 + store float %conv617, ptr %bbox.sroa.6.0.bbox666.sroa_idx, align 4 + %bbox.sroa.8.0.bbox666.sroa_idx = getelementptr float, ptr %object, i64 2 + store float %conv621, ptr %bbox.sroa.8.0.bbox666.sroa_idx, align 8 + store float %conv627, ptr %lengths652, align 4 + store float %conv633, ptr %arrayidx656, align 8 + br label %if.end668 + +if.end668: ; preds = %if.then665, %entry + ret void +} + +define void @gather_2(ptr %mat1, float %0, float %1) { +; CHECK-LABEL: define void @gather_2( +; CHECK-SAME: ptr [[MAT1:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> , <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP1]], float 0.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = fmul float [[TMP6]], 0.000000e+00 +; CHECK-NEXT: [[ARRAYIDX163:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1 +; CHECK-NEXT: [[ARRAYIDX5_I_I_I280:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1, i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[ARRAYIDX163]], align 4 +; CHECK-NEXT: store float [[TMP7]], ptr [[ARRAYIDX5_I_I_I280]], align 4 +; CHECK-NEXT: ret void +; +entry: + %2 = call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00) + %3 = call float @llvm.fmuladd.f32(float %1, float %0, float 0.000000e+00) + %4 = call float @llvm.fmuladd.f32(float %0, float %1, float 0.000000e+00) + %5 = fmul float %2, 0.000000e+00 + %6 = fmul float %3, 0.000000e+00 + %7 = fmul float %4, 0.000000e+00 + %arrayidx163 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1 + %arrayidx2.i.i.i278 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1, i64 1 + %arrayidx5.i.i.i280 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1, i64 2 + store float %5, ptr %arrayidx163, align 4 + store float %6, ptr %arrayidx2.i.i.i278, align 4 + store float %7, ptr %arrayidx5.i.i.i280, align 4 + ret void +} + +define i32 @reorder_indices_1(float %0) { +; CHECK-LABEL: define i32 @reorder_indices_1( +; CHECK-SAME: float [[TMP0:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NOR1:%.*]] = alloca [0 x [3 x float]], i32 0, align 4 +; CHECK-NEXT: [[ARRAYIDX2_I265:%.*]] = getelementptr float, ptr [[NOR1]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2_I265]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[NOR1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]] +; CHECK-NEXT: [[NEG11_I:%.*]] = fmul float [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fmuladd.f32(float [[TMP1]], float 0.000000e+00, float [[NEG11_I]]) +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = fneg <2 x float> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x float> [[TMP8]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> zeroinitializer, <2 x float> [[TMP12]]) +; CHECK-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP10]], <2 x float> [[TMP13]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP5]], float 0.000000e+00) +; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x float> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[MUL6_I_I_I:%.*]] = fmul float [[TMP15]], 0.000000e+00 +; CHECK-NEXT: store <2 x float> [[TMP16]], ptr [[NOR1]], align 4 +; CHECK-NEXT: store float [[MUL6_I_I_I]], ptr [[ARRAYIDX2_I265]], align 4 +; CHECK-NEXT: ret i32 0 +; +entry: + %nor1 = alloca [0 x [3 x float]], i32 0, align 4 + %arrayidx.i = getelementptr float, ptr %nor1, i64 1 + %1 = load float, ptr %arrayidx.i, align 4 + %arrayidx2.i265 = getelementptr float, ptr %nor1, i64 2 + %2 = load float, ptr %arrayidx2.i265, align 4 + %3 = fneg float %2 + %neg.i267 = fmul float %3, %0 + %4 = call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float %neg.i267) + %5 = load float, ptr %nor1, align 4 + %6 = fneg float %5 + %neg11.i = fmul float %6, %0 + %7 = call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float %neg11.i) + %8 = fneg float %1 + %neg18.i = fmul float %8, %0 + %9 = call float @llvm.fmuladd.f32(float %5, float 0.000000e+00, float %neg18.i) + %10 = call float @llvm.fmuladd.f32(float %0, float %9, float 0.000000e+00) + %11 = call float @llvm.fmuladd.f32(float %0, float %4, float 0.000000e+00) + %12 = call float @llvm.fmuladd.f32(float %0, float %7, float 0.000000e+00) + %mul.i.i.i = fmul float %10, 0.000000e+00 + %mul3.i.i.i = fmul float %11, 0.000000e+00 + %mul6.i.i.i = fmul float %12, 0.000000e+00 + store float %mul.i.i.i, ptr %nor1, align 4 + store float %mul3.i.i.i, ptr %arrayidx.i, align 4 + store float %mul6.i.i.i, ptr %arrayidx2.i265, align 4 + ret i32 0 +} + +define void @reorder_indices_2(ptr %spoint) { +; CHECK-LABEL: define void @reorder_indices_2( +; CHECK-SAME: ptr [[SPOINT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> zeroinitializer, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 0.000000e+00, float 0.000000e+00) +; CHECK-NEXT: [[MUL4_I461:%.*]] = fmul float [[TMP1]], 0.000000e+00 +; CHECK-NEXT: [[DSCO:%.*]] = getelementptr float, ptr [[SPOINT]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer) +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[DSCO]], align 4 +; CHECK-NEXT: [[ARRAYIDX5_I476:%.*]] = getelementptr float, ptr [[SPOINT]], i64 2 +; CHECK-NEXT: store float [[MUL4_I461]], ptr [[ARRAYIDX5_I476]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = extractelement <3 x float> zeroinitializer, i64 1 + %1 = extractelement <3 x float> zeroinitializer, i64 2 + %2 = extractelement <3 x float> zeroinitializer, i64 0 + %3 = tail call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00) + %4 = tail call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float 0.000000e+00) + %5 = tail call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float 0.000000e+00) + %mul.i457 = fmul float %3, 0.000000e+00 + %mul2.i459 = fmul float %4, 0.000000e+00 + %mul4.i461 = fmul float %5, 0.000000e+00 + %dsco = getelementptr float, ptr %spoint, i64 0 + store float %mul.i457, ptr %dsco, align 4 + %arrayidx3.i474 = getelementptr float, ptr %spoint, i64 1 + store float %mul2.i459, ptr %arrayidx3.i474, align 4 + %arrayidx5.i476 = getelementptr float, ptr %spoint, i64 2 + store float %mul4.i461, ptr %arrayidx5.i476, align 4 + ret void +} + +define void @reorder_indices_2x_load(ptr %png_ptr, ptr %info_ptr) { +; CHECK-LABEL: define void @reorder_indices_2x_load( +; CHECK-SAME: ptr [[PNG_PTR:%.*]], ptr [[INFO_PTR:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BIT_DEPTH:%.*]] = getelementptr i8, ptr [[INFO_PTR]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[BIT_DEPTH]], align 4 +; CHECK-NEXT: [[COLOR_TYPE:%.*]] = getelementptr i8, ptr [[INFO_PTR]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[COLOR_TYPE]], align 1 +; CHECK-NEXT: [[BIT_DEPTH37_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 11 +; CHECK-NEXT: store i8 [[TMP0]], ptr [[BIT_DEPTH37_I]], align 1 +; CHECK-NEXT: [[COLOR_TYPE39_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 10 +; CHECK-NEXT: store i8 [[TMP1]], ptr [[COLOR_TYPE39_I]], align 2 +; CHECK-NEXT: [[USR_BIT_DEPTH_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 12 +; CHECK-NEXT: store i8 [[TMP0]], ptr [[USR_BIT_DEPTH_I]], align 8 +; CHECK-NEXT: ret void +; +entry: + %bit_depth = getelementptr i8, ptr %info_ptr, i64 0 + %0 = load i8, ptr %bit_depth, align 4 + %color_type = getelementptr i8, ptr %info_ptr, i64 1 + %1 = load i8, ptr %color_type, align 1 + %bit_depth37.i = getelementptr i8, ptr %png_ptr, i64 11 + store i8 %0, ptr %bit_depth37.i, align 1 + %color_type39.i = getelementptr i8, ptr %png_ptr, i64 10 + store i8 %1, ptr %color_type39.i, align 2 + %usr_bit_depth.i = getelementptr i8, ptr %png_ptr, i64 12 + store i8 %0, ptr %usr_bit_depth.i, align 8 + ret void +} + +define void @reuse_shuffle_indidces_1(ptr %col, float %0, float %1) { +; CHECK-LABEL: define void @reuse_shuffle_indidces_1( +; CHECK-SAME: ptr [[COL:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[COL]], align 4 +; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr float, ptr [[COL]], i64 2 +; CHECK-NEXT: [[MUL38:%.*]] = fmul float [[TMP0]], 0.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[MUL38]], 0.000000e+00 +; CHECK-NEXT: store float [[TMP6]], ptr [[ARRAYIDX33]], align 4 +; CHECK-NEXT: ret void +; +entry: + %mul24 = fmul float %1, 0.000000e+00 + %2 = fadd float %mul24, 0.000000e+00 + store float %2, ptr %col, align 4 + %arrayidx26 = getelementptr float, ptr %col, i64 1 + %mul31 = fmul float %0, 0.000000e+00 + %3 = fadd float %mul31, 0.000000e+00 + store float %3, ptr %arrayidx26, align 4 + %arrayidx33 = getelementptr float, ptr %col, i64 2 + %mul38 = fmul float %0, 0.000000e+00 + %4 = fadd float %mul38, 0.000000e+00 + store float %4, ptr %arrayidx33, align 4 + ret void +} + +define void @reuse_shuffle_indices_2(ptr %inertia, double %0) { +; CHECK-LABEL: define void @reuse_shuffle_indices_2( +; CHECK-SAME: ptr [[INERTIA:%.*]], double [[TMP0:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float> +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <3 x i32> +; CHECK-NEXT: store <3 x float> [[TMP9]], ptr [[INERTIA]], align 4 +; CHECK-NEXT: ret void +; +entry: + %1 = insertelement <2 x double> poison, double %0, i32 0 + %2 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> zeroinitializer + %3 = fptrunc <2 x double> %2 to <2 x float> + %4 = fmul <2 x float> %3, zeroinitializer + %5 = shufflevector <2 x float> %4, <2 x float> poison, <4 x i32> + %6 = fadd <4 x float> %5, + %7 = fmul <4 x float> %6, + %8 = fadd <4 x float> %7, + %9 = shufflevector <4 x float> %8, <4 x float> poison, <3 x i32> + store <3 x float> %9, ptr %inertia, align 4 + ret void +} + +define void @reuse_shuffle_indices_cost_crash_2(ptr %bezt, float %0) { +; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_2( +; CHECK-SAME: ptr [[BEZT:%.*]], float [[TMP0:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FNEG:%.*]] = fmul float [[TMP0]], 0.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[FNEG]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> [[TMP4]], <2 x float> zeroinitializer) +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[BEZT]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = tail call float @llvm.fmuladd.f32(float [[FNEG]], float 0.000000e+00, float 0.000000e+00) +; CHECK-NEXT: [[ARRAYIDX8_I831:%.*]] = getelementptr float, ptr [[BEZT]], i64 2 +; CHECK-NEXT: store float [[TMP6]], ptr [[ARRAYIDX8_I831]], align 4 +; CHECK-NEXT: ret void +; +entry: + %fneg = fmul float %0, 0.000000e+00 + %1 = tail call float @llvm.fmuladd.f32(float %0, float %fneg, float 0.000000e+00) + store float %1, ptr %bezt, align 4 + %2 = tail call float @llvm.fmuladd.f32(float %0, float %fneg, float 0.000000e+00) + %arrayidx5.i = getelementptr float, ptr %bezt, i64 1 + store float %2, ptr %arrayidx5.i, align 4 + %3 = tail call float @llvm.fmuladd.f32(float %fneg, float 0.000000e+00, float 0.000000e+00) + %arrayidx8.i831 = getelementptr float, ptr %bezt, i64 2 + store float %3, ptr %arrayidx8.i831, align 4 + ret void +} + +define void @reuse_shuffle_indices_cost_crash_3(ptr %m, double %conv, double %conv2) { +; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_3( +; CHECK-SAME: ptr [[M:%.*]], double [[CONV:%.*]], double [[CONV2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB19:%.*]] = fsub double 0.000000e+00, [[CONV2]] +; CHECK-NEXT: [[CONV20:%.*]] = fptrunc double [[SUB19]] to float +; CHECK-NEXT: store float [[CONV20]], ptr [[M]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 0.000000e+00 +; CHECK-NEXT: [[CONV239:%.*]] = fptrunc double [[ADD]] to float +; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 1 +; CHECK-NEXT: store float [[CONV239]], ptr [[ARRAYIDX25]], align 4 +; CHECK-NEXT: [[ADD26:%.*]] = fsub double [[CONV]], [[CONV]] +; CHECK-NEXT: [[CONV27:%.*]] = fptrunc double [[ADD26]] to float +; CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 2 +; CHECK-NEXT: store float [[CONV27]], ptr [[ARRAYIDX29]], align 4 +; CHECK-NEXT: ret void +; +entry: + %sub19 = fsub double 0.000000e+00, %conv2 + %conv20 = fptrunc double %sub19 to float + store float %conv20, ptr %m, align 4 + %add = fadd double %conv, 0.000000e+00 + %conv239 = fptrunc double %add to float + %arrayidx25 = getelementptr [4 x float], ptr %m, i64 0, i64 1 + store float %conv239, ptr %arrayidx25, align 4 + %add26 = fsub double %conv, %conv + %conv27 = fptrunc double %add26 to float + %arrayidx29 = getelementptr [4 x float], ptr %m, i64 0, i64 2 + store float %conv27, ptr %arrayidx29, align 4 + ret void +} + +define void @reuse_shuffle_indices_cost_crash_4(double %conv7.i) { +; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_4( +; CHECK-SAME: double [[CONV7_I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DATA_I111:%.*]] = alloca [0 x [0 x [0 x [3 x float]]]], i32 0, align 4 +; CHECK-NEXT: [[ARRAYIDX_2_I:%.*]] = getelementptr [3 x float], ptr [[DATA_I111]], i64 0, i64 2 +; CHECK-NEXT: [[MUL17_I_US:%.*]] = fmul double [[CONV7_I]], 0.000000e+00 +; CHECK-NEXT: [[MUL_2_I_I_US:%.*]] = fmul double [[MUL17_I_US]], 0.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV7_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[ADD_2_I_I_US:%.*]] = fadd double [[MUL_2_I_I_US]], 0.000000e+00 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float> +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[DATA_I111]], align 4 +; CHECK-NEXT: [[CONV_2_I46_US:%.*]] = fptrunc double [[ADD_2_I_I_US]] to float +; CHECK-NEXT: store float [[CONV_2_I46_US]], ptr [[ARRAYIDX_2_I]], align 4 +; CHECK-NEXT: [[CALL2_I_US:%.*]] = load volatile ptr, ptr [[DATA_I111]], align 8 +; CHECK-NEXT: ret void +; +entry: + %data.i111 = alloca [0 x [0 x [0 x [3 x float]]]], i32 0, align 4 + %arrayidx.1.i = getelementptr [3 x float], ptr %data.i111, i64 0, i64 1 + %arrayidx.2.i = getelementptr [3 x float], ptr %data.i111, i64 0, i64 2 + %mul17.i.us = fmul double %conv7.i, 0.000000e+00 + %mul.2.i.i.us = fmul double %mul17.i.us, 0.000000e+00 + %add.i.i82.i.us = fadd double %conv7.i, 0.000000e+00 + %add.1.i.i84.i.us = fadd double %conv7.i, 0.000000e+00 + %mul.i.i91.i.us = fmul double %add.i.i82.i.us, %conv7.i + %mul.1.i.i92.i.us = fmul double %add.1.i.i84.i.us, %conv7.i + %add.i96.i.us = fadd double %mul.i.i91.i.us, 0.000000e+00 + %add.1.i.i.us = fadd double %mul.1.i.i92.i.us, 0.000000e+00 + %add.2.i.i.us = fadd double %mul.2.i.i.us, 0.000000e+00 + %conv.i42.us = fptrunc double %add.i96.i.us to float + store float %conv.i42.us, ptr %data.i111, align 4 + %conv.1.i44.us = fptrunc double %add.1.i.i.us to float + store float %conv.1.i44.us, ptr %arrayidx.1.i, align 4 + %conv.2.i46.us = fptrunc double %add.2.i.i.us to float + store float %conv.2.i46.us, ptr %arrayidx.2.i, align 4 + %call2.i.us = load volatile ptr, ptr %data.i111, align 8 + ret void +} + +define void @common_mask(ptr %m, double %conv, double %conv2) { +; CHECK-LABEL: define void @common_mask( +; CHECK-SAME: ptr [[M:%.*]], double [[CONV:%.*]], double [[CONV2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB19:%.*]] = fsub double [[CONV]], [[CONV]] +; CHECK-NEXT: [[CONV20:%.*]] = fptrunc double [[SUB19]] to float +; CHECK-NEXT: store float [[CONV20]], ptr [[M]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV2]], 0.000000e+00 +; CHECK-NEXT: [[CONV239:%.*]] = fptrunc double [[ADD]] to float +; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 1 +; CHECK-NEXT: store float [[CONV239]], ptr [[ARRAYIDX25]], align 4 +; CHECK-NEXT: [[ADD26:%.*]] = fsub double 0.000000e+00, [[CONV]] +; CHECK-NEXT: [[CONV27:%.*]] = fptrunc double [[ADD26]] to float +; CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 2 +; CHECK-NEXT: store float [[CONV27]], ptr [[ARRAYIDX29]], align 4 +; CHECK-NEXT: ret void +; +entry: + %sub19 = fsub double %conv, %conv + %conv20 = fptrunc double %sub19 to float + store float %conv20, ptr %m, align 4 + %add = fadd double %conv2, 0.000000e+00 + %conv239 = fptrunc double %add to float + %arrayidx25 = getelementptr [4 x float], ptr %m, i64 0, i64 1 + store float %conv239, ptr %arrayidx25, align 4 + %add26 = fsub double 0.000000e+00, %conv + %conv27 = fptrunc double %add26 to float + %arrayidx29 = getelementptr [4 x float], ptr %m, i64 0, i64 2 + store float %conv27, ptr %arrayidx29, align 4 + ret void +} + +define void @vec3_extract(<3 x i16> %pixel.sroa.0.4.vec.insert606, ptr %call3.i536) { +; CHECK-LABEL: define void @vec3_extract( +; CHECK-SAME: <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606:%.*]], ptr [[CALL3_I536:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PIXEL_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 2 +; CHECK-NEXT: [[RED668:%.*]] = getelementptr i16, ptr [[CALL3_I536]], i64 2 +; CHECK-NEXT: store i16 [[PIXEL_SROA_0_4_VEC_EXTRACT]], ptr [[RED668]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], <3 x i16> poison, <2 x i32> +; CHECK-NEXT: store <2 x i16> [[TMP0]], ptr [[CALL3_I536]], align 2 +; CHECK-NEXT: ret void +; +entry: + %pixel.sroa.0.4.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 2 + %red668 = getelementptr i16, ptr %call3.i536, i64 2 + store i16 %pixel.sroa.0.4.vec.extract, ptr %red668, align 2 + %pixel.sroa.0.2.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 1 + %green670 = getelementptr i16, ptr %call3.i536, i64 1 + store i16 %pixel.sroa.0.2.vec.extract, ptr %green670, align 2 + %pixel.sroa.0.0.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 0 + store i16 %pixel.sroa.0.0.vec.extract, ptr %call3.i536, align 2 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) From 485ebbff55f41bd12ad768c2974d3280cb581307 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 13 Feb 2024 15:52:34 +0000 Subject: [PATCH 011/240] [TableGen] Use emplace_back instead of resize to size() + 1. NFC. --- llvm/utils/TableGen/CodeGenRegisters.cpp | 24 +++++++++++------------- llvm/utils/TableGen/CodeGenRegisters.h | 7 +++---- llvm/utils/TableGen/SubtargetEmitter.cpp | 10 ++++------ 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index 25ef31097b53be..dd1850752aad61 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -1986,15 +1986,15 @@ void CodeGenRegBank::computeRegUnitSets() { continue; // Speculatively grow the RegUnitSets to hold the new set. - RegUnitSets.resize(RegUnitSets.size() + 1); - RegUnitSets.back().Name = RC.getName(); + RegUnitSet &RUSet = RegUnitSets.emplace_back(); + RUSet.Name = RC.getName(); // Compute a sorted list of units in this class. - RC.buildRegUnitSet(*this, RegUnitSets.back().Units); + RC.buildRegUnitSet(*this, RUSet.Units); // Find an existing RegUnitSet. std::vector::const_iterator SetI = - findRegUnitSet(RegUnitSets, RegUnitSets.back()); + findRegUnitSet(RegUnitSets, RUSet); if (SetI != std::prev(RegUnitSets.end())) RegUnitSets.pop_back(); } @@ -2043,27 +2043,26 @@ void CodeGenRegBank::computeRegUnitSets() { continue; // Speculatively grow the RegUnitSets to hold the new set. - RegUnitSets.resize(RegUnitSets.size() + 1); - RegUnitSets.back().Name = + RegUnitSet &RUSet = RegUnitSets.emplace_back(); + RUSet.Name = RegUnitSets[Idx].Name + "_with_" + RegUnitSets[SearchIdx].Name; std::set_union(RegUnitSets[Idx].Units.begin(), RegUnitSets[Idx].Units.end(), RegUnitSets[SearchIdx].Units.begin(), RegUnitSets[SearchIdx].Units.end(), - std::inserter(RegUnitSets.back().Units, - RegUnitSets.back().Units.begin())); + std::inserter(RUSet.Units, RUSet.Units.begin())); // Find an existing RegUnitSet, or add the union to the unique sets. std::vector::const_iterator SetI = - findRegUnitSet(RegUnitSets, RegUnitSets.back()); + findRegUnitSet(RegUnitSets, RUSet); if (SetI != std::prev(RegUnitSets.end())) RegUnitSets.pop_back(); else { LLVM_DEBUG(dbgs() << "UnitSet " << RegUnitSets.size() - 1 << " " - << RegUnitSets.back().Name << ":"; + << RUSet.Name << ":"; for (auto &U - : RegUnitSets.back().Units) printRegUnitName(U); + : RUSet.Units) printRegUnitName(U); dbgs() << "\n";); } } @@ -2138,8 +2137,7 @@ void CodeGenRegBank::computeRegUnitSets() { RegUnits[UnitIdx].RegClassUnitSetsIdx = RCUnitSetsIdx; if (RCUnitSetsIdx == RegClassUnitSets.size()) { // Create a new list of UnitSets as a "fake" register class. - RegClassUnitSets.resize(RCUnitSetsIdx + 1); - RegClassUnitSets[RCUnitSetsIdx] = std::move(RUSets); + RegClassUnitSets.push_back(std::move(RUSets)); } } } diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h index cfc6d87c4ce3a9..fc5cd67e2d553d 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.h +++ b/llvm/utils/TableGen/CodeGenRegisters.h @@ -712,8 +712,7 @@ class CodeGenRegBank { // Create a native register unit that is associated with one or two root // registers. unsigned newRegUnit(CodeGenRegister *R0, CodeGenRegister *R1 = nullptr) { - RegUnits.resize(RegUnits.size() + 1); - RegUnit &RU = RegUnits.back(); + RegUnit &RU = RegUnits.emplace_back(); RU.Roots[0] = R0; RU.Roots[1] = R1; RU.Artificial = R0->Artificial; @@ -725,8 +724,8 @@ class CodeGenRegBank { // Create a new non-native register unit that can be adopted by a register // to increase its pressure. Note that NumNativeRegUnits is not increased. unsigned newRegUnit(unsigned Weight) { - RegUnits.resize(RegUnits.size() + 1); - RegUnits.back().Weight = Weight; + RegUnit &RU = RegUnits.emplace_back(); + RU.Weight = Weight; return RegUnits.size() - 1; } diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index ebe39167703c8c..2707f54eed6e9b 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -486,11 +486,10 @@ void SubtargetEmitter::EmitStageAndOperandCycleData( std::map ItinStageMap, ItinOperandMap; for (const CodeGenProcModel &ProcModel : SchedModels.procModels()) { // Add process itinerary to the list. - ProcItinLists.resize(ProcItinLists.size() + 1); + std::vector &ItinList = ProcItinLists.emplace_back(); // If this processor defines no itineraries, then leave the itinerary list // empty. - std::vector &ItinList = ProcItinLists.back(); if (!ProcModel.hasItineraries()) continue; @@ -1029,17 +1028,16 @@ void SubtargetEmitter::ExpandProcResources( // tables. Must be called for each processor in order. void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, SchedClassTables &SchedTables) { - SchedTables.ProcSchedClasses.resize(SchedTables.ProcSchedClasses.size() + 1); + std::vector &SCTab = + SchedTables.ProcSchedClasses.emplace_back(); if (!ProcModel.hasInstrSchedModel()) return; - std::vector &SCTab = SchedTables.ProcSchedClasses.back(); LLVM_DEBUG(dbgs() << "\n+++ SCHED CLASSES (GenSchedClassTables) +++\n"); for (const CodeGenSchedClass &SC : SchedModels.schedClasses()) { LLVM_DEBUG(SC.dump(&SchedModels)); - SCTab.resize(SCTab.size() + 1); - MCSchedClassDesc &SCDesc = SCTab.back(); + MCSchedClassDesc &SCDesc = SCTab.emplace_back(); // SCDesc.Name is guarded by NDEBUG SCDesc.NumMicroOps = 0; SCDesc.BeginGroup = false; From 4f13f353cc8dc472a3f00932bc42179776f0f684 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 13 Feb 2024 15:02:21 +0000 Subject: [PATCH 012/240] [NFC][LLVM][AsmWriter] Extract logic to write out ConstantFP from WriteConstantInternal. This makes is easier to extend the code to support vector types. --- llvm/lib/IR/AsmWriter.cpp | 182 ++++++++++++++++++++------------------ 1 file changed, 94 insertions(+), 88 deletions(-) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index c6ef332403fd6d..0ae720e8b7ce8c 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1406,6 +1406,99 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { } } +static void WriteAPFloatInternal(raw_ostream &Out, const APFloat &APF) { + if (&APF.getSemantics() == &APFloat::IEEEsingle() || + &APF.getSemantics() == &APFloat::IEEEdouble()) { + // We would like to output the FP constant value in exponential notation, + // but we cannot do this if doing so will lose precision. Check here to + // make sure that we only output it in exponential format if we can parse + // the value back and get the same value. + // + bool ignored; + bool isDouble = &APF.getSemantics() == &APFloat::IEEEdouble(); + bool isInf = APF.isInfinity(); + bool isNaN = APF.isNaN(); + + if (!isInf && !isNaN) { + double Val = APF.convertToDouble(); + SmallString<128> StrVal; + APF.toString(StrVal, 6, 0, false); + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN, that atof will accept, but the lexer will not. Check + // that the string matches the "[-+]?[0-9]" regex. + // + assert((isDigit(StrVal[0]) || + ((StrVal[0] == '-' || StrVal[0] == '+') && isDigit(StrVal[1]))) && + "[-+]?[0-9] regex does not match!"); + // Reparse stringized version! + if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { + Out << StrVal; + return; + } + } + + // Otherwise we could not reparse it to exactly the same value, so we must + // output the string in hexadecimal format! Note that loading and storing + // floating point types changes the bits of NaNs on some hosts, notably + // x86, so we must not use these types. + static_assert(sizeof(double) == sizeof(uint64_t), + "assuming that double is 64 bits!"); + APFloat apf = APF; + + // Floats are represented in ASCII IR as double, convert. + // FIXME: We should allow 32-bit hex float and remove this. + if (!isDouble) { + // A signaling NaN is quieted on conversion, so we need to recreate the + // expected value after convert (quiet bit of the payload is clear). + bool IsSNAN = apf.isSignaling(); + apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &ignored); + if (IsSNAN) { + APInt Payload = apf.bitcastToAPInt(); + apf = + APFloat::getSNaN(APFloat::IEEEdouble(), apf.isNegative(), &Payload); + } + } + + Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true); + return; + } + + // Either half, bfloat or some form of long double. + // These appear as a magic letter identifying the type, then a + // fixed number of hex digits. + Out << "0x"; + APInt API = APF.bitcastToAPInt(); + if (&APF.getSemantics() == &APFloat::x87DoubleExtended()) { + Out << 'K'; + Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + } else if (&APF.getSemantics() == &APFloat::IEEEquad()) { + Out << 'L'; + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, + /*Upper=*/true); + } else if (&APF.getSemantics() == &APFloat::PPCDoubleDouble()) { + Out << 'M'; + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, + /*Upper=*/true); + } else if (&APF.getSemantics() == &APFloat::IEEEhalf()) { + Out << 'H'; + Out << format_hex_no_prefix(API.getZExtValue(), 4, + /*Upper=*/true); + } else if (&APF.getSemantics() == &APFloat::BFloat()) { + Out << 'R'; + Out << format_hex_no_prefix(API.getZExtValue(), 4, + /*Upper=*/true); + } else + llvm_unreachable("Unsupported floating point type"); +} + static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, AsmWriterContext &WriterCtx) { if (const ConstantInt *CI = dyn_cast(CV)) { @@ -1418,94 +1511,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (const ConstantFP *CFP = dyn_cast(CV)) { - const APFloat &APF = CFP->getValueAPF(); - if (&APF.getSemantics() == &APFloat::IEEEsingle() || - &APF.getSemantics() == &APFloat::IEEEdouble()) { - // We would like to output the FP constant value in exponential notation, - // but we cannot do this if doing so will lose precision. Check here to - // make sure that we only output it in exponential format if we can parse - // the value back and get the same value. - // - bool ignored; - bool isDouble = &APF.getSemantics() == &APFloat::IEEEdouble(); - bool isInf = APF.isInfinity(); - bool isNaN = APF.isNaN(); - if (!isInf && !isNaN) { - double Val = APF.convertToDouble(); - SmallString<128> StrVal; - APF.toString(StrVal, 6, 0, false); - // Check to make sure that the stringized number is not some string like - // "Inf" or NaN, that atof will accept, but the lexer will not. Check - // that the string matches the "[-+]?[0-9]" regex. - // - assert((isDigit(StrVal[0]) || ((StrVal[0] == '-' || StrVal[0] == '+') && - isDigit(StrVal[1]))) && - "[-+]?[0-9] regex does not match!"); - // Reparse stringized version! - if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { - Out << StrVal; - return; - } - } - // Otherwise we could not reparse it to exactly the same value, so we must - // output the string in hexadecimal format! Note that loading and storing - // floating point types changes the bits of NaNs on some hosts, notably - // x86, so we must not use these types. - static_assert(sizeof(double) == sizeof(uint64_t), - "assuming that double is 64 bits!"); - APFloat apf = APF; - // Floats are represented in ASCII IR as double, convert. - // FIXME: We should allow 32-bit hex float and remove this. - if (!isDouble) { - // A signaling NaN is quieted on conversion, so we need to recreate the - // expected value after convert (quiet bit of the payload is clear). - bool IsSNAN = apf.isSignaling(); - apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, - &ignored); - if (IsSNAN) { - APInt Payload = apf.bitcastToAPInt(); - apf = APFloat::getSNaN(APFloat::IEEEdouble(), apf.isNegative(), - &Payload); - } - } - Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true); - return; - } - - // Either half, bfloat or some form of long double. - // These appear as a magic letter identifying the type, then a - // fixed number of hex digits. - Out << "0x"; - APInt API = APF.bitcastToAPInt(); - if (&APF.getSemantics() == &APFloat::x87DoubleExtended()) { - Out << 'K'; - Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, - /*Upper=*/true); - Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, - /*Upper=*/true); - return; - } else if (&APF.getSemantics() == &APFloat::IEEEquad()) { - Out << 'L'; - Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, - /*Upper=*/true); - Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, - /*Upper=*/true); - } else if (&APF.getSemantics() == &APFloat::PPCDoubleDouble()) { - Out << 'M'; - Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, - /*Upper=*/true); - Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, - /*Upper=*/true); - } else if (&APF.getSemantics() == &APFloat::IEEEhalf()) { - Out << 'H'; - Out << format_hex_no_prefix(API.getZExtValue(), 4, - /*Upper=*/true); - } else if (&APF.getSemantics() == &APFloat::BFloat()) { - Out << 'R'; - Out << format_hex_no_prefix(API.getZExtValue(), 4, - /*Upper=*/true); - } else - llvm_unreachable("Unsupported floating point type"); + WriteAPFloatInternal(Out, CFP->getValueAPF()); return; } From 987258f5c7801ebb4f7ce7c6a035634b275a5759 Mon Sep 17 00:00:00 2001 From: Daniel Chen Date: Tue, 13 Feb 2024 11:03:54 -0500 Subject: [PATCH 013/240] [Flang] Add __powerpc__ macro to set c_intmax_t to c_int64_t rather than c_int128_t as PowerPC only supports up to c_int64_t. (#81222) PowerPC only supports up to `c_int64_t`. Add macro `__powerpc__` and preprocess it for setting `c_intmax_t` in `iso_c_binding` intrinsic module. --- flang/lib/Frontend/CompilerInvocation.cpp | 15 ++++++++++++++- flang/module/iso_c_binding.f90 | 4 ++++ flang/test/Driver/predefined-macros-powerpc.f90 | 11 +++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 flang/test/Driver/predefined-macros-powerpc.f90 diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index ffde7f50087e52..4707de0e976ca7 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1326,10 +1326,23 @@ void CompilerInvocation::setDefaultPredefinitions() { Fortran::common::setOpenMPMacro(getLangOpts().OpenMPVersion, fortranOptions.predefinitions); } + llvm::Triple targetTriple{llvm::Triple(this->targetOpts.triple)}; - if (targetTriple.getArch() == llvm::Triple::ArchType::x86_64) { + switch (targetTriple.getArch()) { + default: + break; + case llvm::Triple::ArchType::x86_64: fortranOptions.predefinitions.emplace_back("__x86_64__", "1"); fortranOptions.predefinitions.emplace_back("__x86_64", "1"); + break; + case llvm::Triple::ArchType::ppc: + case llvm::Triple::ArchType::ppcle: + case llvm::Triple::ArchType::ppc64: + case llvm::Triple::ArchType::ppc64le: + // '__powerpc__' is a generic macro for any PowerPC cases. e.g. Max integer + // size. + fortranOptions.predefinitions.emplace_back("__powerpc__", "1"); + break; } } diff --git a/flang/module/iso_c_binding.f90 b/flang/module/iso_c_binding.f90 index 9a7e68f3314463..1661fd5a6dcf6a 100644 --- a/flang/module/iso_c_binding.f90 +++ b/flang/module/iso_c_binding.f90 @@ -47,7 +47,11 @@ module iso_c_binding c_long_long = c_int64_t, & c_signed_char = c_int8_t, & c_size_t = kind(c_sizeof(1)), & +#if __powerpc__ + c_intmax_t = c_int64_t, & +#else c_intmax_t = c_int128_t, & +#endif c_intptr_t = c_size_t, & c_ptrdiff_t = c_size_t integer, parameter, public :: & diff --git a/flang/test/Driver/predefined-macros-powerpc.f90 b/flang/test/Driver/predefined-macros-powerpc.f90 new file mode 100644 index 00000000000000..b3d2b617fb1fad --- /dev/null +++ b/flang/test/Driver/predefined-macros-powerpc.f90 @@ -0,0 +1,11 @@ +! Test predefined macro for PowerPC architecture + +! RUN: %flang_fc1 -cpp -E %s | FileCheck %s +! REQUIRES: target=powerpc{{.*}} + +! CHECK: integer :: var1 = 1 + +#if __powerpc__ + integer :: var1 = __powerpc__ +#endif +end program From 381a00de4fdcccd904dac6a0856fb44f12ba0abb Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 13 Feb 2024 16:01:28 +0000 Subject: [PATCH 014/240] [clang][Driver][HLSL] Fix formatting of clang-dxc options group title Some extra `<>` and a missing full stop. --- clang/include/clang/Driver/Options.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d5017b901d2906..187b845ddf3c7b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8466,8 +8466,8 @@ def _SLASH_ZW : CLJoined<"ZW">; // clang-dxc Options //===----------------------------------------------------------------------===// -def dxc_Group : OptionGroup<"">, Visibility<[DXCOption]>, - HelpText<"dxc compatibility options">; +def dxc_Group : OptionGroup<"clang-dxc options">, Visibility<[DXCOption]>, + HelpText<"dxc compatibility options.">; class DXCFlag : Option<["/", "-"], name, KIND_FLAG>, Group, Visibility<[DXCOption]>; class DXCJoinedOrSeparate : Option<["/", "-"], name, From 11fcae69dbea4860e20ab799ecca9b0432d7f19d Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 13 Feb 2024 10:06:25 -0600 Subject: [PATCH 015/240] [LLVM] Add `__builtin_readsteadycounter` intrinsic and builtin for realtime clocks (#81331) Summary: This patch adds a new intrinsic and builtin function mirroring the existing `__builtin_readcyclecounter`. The difference is that this implementation targets a separate counter that some targets have which returns a fixed frequency clock that can be used to determine elapsed time, this is different compared to the cycle counter which often has variable frequency. This patch only adds support for the NVPTX and AMDGPU targets. This is done as a new and separate builtin rather than an argument to `readcyclecounter` to avoid needing to change existing code and to make the separation more explicit. --- clang/docs/LanguageExtensions.rst | 33 ++++++++++ clang/docs/ReleaseNotes.rst | 3 + clang/include/clang/Basic/Builtins.td | 6 ++ clang/lib/CodeGen/CGBuiltin.cpp | 4 ++ clang/test/CodeGen/builtins.c | 6 ++ llvm/include/llvm/CodeGen/ISDOpcodes.h | 6 ++ llvm/include/llvm/IR/Intrinsics.td | 2 + llvm/include/llvm/Support/TargetOpcodes.def | 3 + llvm/include/llvm/Target/GenericOpcodes.td | 6 ++ .../Target/GlobalISel/SelectionDAGCompat.td | 1 + .../include/llvm/Target/TargetSelectionDAG.td | 3 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 + llvm/lib/CodeGen/IntrinsicLowering.cpp | 6 ++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 6 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 7 ++- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 8 +++ .../SelectionDAG/SelectionDAGDumper.cpp | 1 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++ llvm/lib/Target/AMDGPU/SMInstructions.td | 14 +++++ llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 + llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 1 - llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 1 + .../GlobalISel/legalizer-info-validation.mir | 3 + llvm/test/CodeGen/AMDGPU/readsteadycounter.ll | 24 +++++++ llvm/test/CodeGen/NVPTX/intrinsics.ll | 12 ++++ .../builtins/match-table-replacerreg.td | 24 +++---- .../match-table-imms.td | 32 +++++----- .../match-table-intrinsics.td | 5 +- .../match-table-patfrag-root.td | 4 +- .../GlobalISelCombinerEmitter/match-table.td | 62 +++++++++---------- llvm/test/TableGen/GlobalISelEmitter.td | 2 +- 35 files changed, 229 insertions(+), 72 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/readsteadycounter.ll diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index e91156837290f7..ca78a5c39cf736 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2764,6 +2764,39 @@ Query for this feature with ``__has_builtin(__builtin_readcyclecounter)``. Note that even if present, its use may depend on run-time privilege or other OS controlled state. +``__builtin_readsteadycounter`` +------------------------------ + +``__builtin_readsteadycounter`` is used to access the fixed frequency counter +register (or a similar steady-rate clock) on those targets that support it. +The function is similar to ``__builtin_readcyclecounter`` above except that the +frequency is fixed, making it suitable for measuring elapsed time. + +**Syntax**: + +.. code-block:: c++ + + __builtin_readsteadycounter() + +**Example of Use**: + +.. code-block:: c++ + + unsigned long long t0 = __builtin_readsteadycounter(); + do_something(); + unsigned long long t1 = __builtin_readsteadycounter(); + unsigned long long secs_to_do_something = (t1 - t0) / tick_rate; + +**Description**: + +The ``__builtin_readsteadycounter()`` builtin returns the frequency counter value. +When not supported by the target, the return value is always zero. This builtin +takes no arguments and produces an unsigned long long result. The builtin does +not guarantee any particular frequency, only that it is stable. Knowledge of the +counter's true frequency will need to be provided by the user. + +Query for this feature with ``__has_builtin(__builtin_readsteadycounter)``. + ``__builtin_dump_struct`` ------------------------- diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5c245b7b1bf488..dc2fb3b25e3a54 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -117,6 +117,9 @@ C23 Feature Support Non-comprehensive list of changes in this release ------------------------------------------------- +- Added ``__builtin_readsteadycounter`` for reading fixed frequency hardware + counters. + New Compiler Flags ------------------ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 31a2bdeb2d3e5e..193d5851f9f29f 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1110,6 +1110,12 @@ def ReadCycleCounter : Builtin { let Prototype = "unsigned long long int()"; } +def ReadSteadyCounter : Builtin { + let Spellings = ["__builtin_readsteadycounter"]; + let Attributes = [NoThrow]; + let Prototype = "unsigned long long int()"; +} + def Trap : Builtin { let Spellings = ["__builtin_trap"]; let Attributes = [NoThrow, NoReturn]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a7a410dab1a018..ee0b7504769622 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3443,6 +3443,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); return RValue::get(Builder.CreateCall(F)); } + case Builtin::BI__builtin_readsteadycounter: { + Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter); + return RValue::get(Builder.CreateCall(F)); + } case Builtin::BI__builtin___clear_cache: { Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c index ed03233b6f1a96..88282120283b8a 100644 --- a/clang/test/CodeGen/builtins.c +++ b/clang/test/CodeGen/builtins.c @@ -496,6 +496,12 @@ long long test_builtin_readcyclecounter(void) { return __builtin_readcyclecounter(); } +// CHECK-LABEL: define{{.*}} i64 @test_builtin_readsteadycounter +long long test_builtin_readsteadycounter(void) { + // CHECK: call i64 @llvm.readsteadycounter() + return __builtin_readsteadycounter(); +} + /// __builtin_launder should be a NOP in C since there are no vtables. // CHECK-LABEL: define{{.*}} void @test_builtin_launder void test_builtin_launder(int *p) { diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 349d1286c8dc4f..8cb0bc9fd98133 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1179,6 +1179,12 @@ enum NodeType { /// counter-like register (or other high accuracy low latency clock source). READCYCLECOUNTER, + /// READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic. + /// It has the same semantics as the READCYCLECOUNTER implementation except + /// that the result is the content of the architecture-specific fixed + /// frequency counter suitable for measuring elapsed time. + READSTEADYCOUNTER, + /// HANDLENODE node - Used as a handle for various purposes. HANDLENODE, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 3c19c7b063652c..4becdd71cd440d 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -870,6 +870,8 @@ def int_pcmarker : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>; def int_readcyclecounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>; +def int_readsteadycounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>; + // The assume intrinsic is marked InaccessibleMemOnly so that proper control // dependencies will be maintained. def int_assume : DefaultAttrsIntrinsic< diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index abb237083d254e..42cb854d950502 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -352,6 +352,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN) /// INTRINSIC readcyclecounter HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER) +/// INTRINSIC readsteadycounter +HANDLE_TARGET_OPCODE(G_READSTEADYCOUNTER) + /// Generic load (including anyext load) HANDLE_TARGET_OPCODE(G_LOAD) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 2c73b67f9e1af0..19197f50d9dff9 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1101,6 +1101,12 @@ def G_READCYCLECOUNTER : GenericInstruction { let hasSideEffects = true; } +def G_READSTEADYCOUNTER : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins); + let hasSideEffects = true; +} + //------------------------------------------------------------------------------ // Memory ops //------------------------------------------------------------------------------ diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 6bc19421fb0169..b1f3c500a1b6c5 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -168,6 +168,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 22360353790dbc..5f8bf0d448105d 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -657,6 +657,9 @@ def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch, def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf, [SDNPHasChain, SDNPSideEffect]>; +def readsteadycounter : SDNode<"ISD::READSTEADYCOUNTER", SDTIntLeaf, + [SDNPHasChain, SDNPSideEffect]>; + def membarrier : SDNode<"ISD::MEMBARRIER", SDTNone, [SDNPHasChain, SDNPSideEffect]>; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index c1d8e890a66edb..311dd9d9739a6d 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1885,6 +1885,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_INTRINSIC_TRUNC; case Intrinsic::readcyclecounter: return TargetOpcode::G_READCYCLECOUNTER; + case Intrinsic::readsteadycounter: + return TargetOpcode::G_READSTEADYCOUNTER; case Intrinsic::ptrmask: return TargetOpcode::G_PTRMASK; case Intrinsic::lrint: diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp index 61920a0e04ab59..fe450cba4a3332 100644 --- a/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -312,6 +312,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); break; } + case Intrinsic::readsteadycounter: { + errs() << "WARNING: this target does not support the llvm.readsteadycounter" + << " intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); + break; + } case Intrinsic::dbg_declare: case Intrinsic::dbg_label: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 892bfbd62f0d02..252b6e9997a710 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1127,8 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TargetLowering::Custom; break; case ISD::READCYCLECOUNTER: - // READCYCLECOUNTER returns an i64, even if type legalization might have - // expanded that to several smaller types. + case ISD::READSTEADYCOUNTER: + // READCYCLECOUNTER and READSTEADYCOUNTER return a i64, even if type + // legalization might have expanded that to several smaller types. Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); break; case ISD::READ_REGISTER: @@ -3080,6 +3081,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Node->getOperand(0)); break; case ISD::READCYCLECOUNTER: + case ISD::READSTEADYCOUNTER: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.append(Node->getNumValues() - 1, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e73a0921a46f5d..a4ba261686c688 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2648,7 +2648,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; - case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; + case ISD::READCYCLECOUNTER: + case ISD::READSTEADYCOUNTER: ExpandIntRes_READCOUNTER(N, Lo, Hi); break; case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; @@ -4031,8 +4032,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, Lo, Hi); } -void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { SDLoc DL(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 09f0bca8b8611e..91149871628574 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -439,7 +439,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READCOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5ce1013f30fd1b..28664b2ed9052d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6781,6 +6781,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.setRoot(Res.getValue(1)); return; } + case Intrinsic::readsteadycounter: { + SDValue Op = getRoot(); + Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl, + DAG.getVTList(MVT::i64, MVT::Other), Op); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + } case Intrinsic::bitreverse: setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, getValue(I.getArgOperand(0)).getValueType(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index a28d834f0522f2..0fbd999694f104 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -104,6 +104,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::READSTEADYCOUNTER: return "ReadSteadyCounter"; case ISD::SRCVALUE: return "SrcValue"; case ISD::MDNODE_SDNODE: return "MDNode"; case ISD::EntryToken: return "EntryToken"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index d8302ba2b42608..dc766928e5dc94 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -964,6 +964,9 @@ void TargetLoweringBase::initActions() { // Most targets also ignore the @llvm.readcyclecounter intrinsic. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand); + // Most targets also ignore the @llvm.readsteadycounter intrinsic. + setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Expand); + // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 5458dfc68f3dfc..0d3b158a0df731 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1988,6 +1988,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder(G_READCYCLECOUNTER) .legalFor({S64}); + getActionDefinitionsBuilder(G_READSTEADYCOUNTER).legalFor({S64}); + getActionDefinitionsBuilder(G_FENCE) .alwaysLegal(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 5323e4fc58de80..b174d57bd57656 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4051,6 +4051,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_CONSTANT: case AMDGPU::G_GLOBAL_VALUE: case AMDGPU::G_BLOCK_ADDR: + case AMDGPU::G_READSTEADYCOUNTER: case AMDGPU::G_READCYCLECOUNTER: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 83221f7ead37e1..56f0e716423955 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -468,6 +468,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // On SI this is s_memtime and s_memrealtime on VI. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + + if (Subtarget->hasSMemRealTime() || + Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11) + setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal); setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom); if (Subtarget->has16BitInsts()) { diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index f3096962e2f3e8..29651a8390399c 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -1068,6 +1068,20 @@ def : GCNPat < } } // let OtherPredicates = [HasShaderCyclesRegister] +let OtherPredicates = [HasSMemRealTime] in { +def : GCNPat < + (i64 (readsteadycounter)), + (S_MEMREALTIME) +>; +} // let OtherPredicates = [HasSMemRealTime] + +let SubtargetPredicate = isGFX11Plus in { +def : GCNPat < + (i64 (readsteadycounter)), + (S_SENDMSG_RTN_B64 (i32 /*MSG_RTN_GET_REALTIME=*/0x83)) +>; +} // let SubtargetPredicate = [isGFX11Plus] + def i32imm_zero : TImmLeaf ; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 80a67ca8e368c6..7f58b312e7a201 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -490,6 +490,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + if (STI.getSmVersion() >= 30 && STI.getPTXVersion() > 31) + setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal); setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote); setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 4322eaef9f467e..631136ad621464 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3805,7 +3805,6 @@ def CALL_PROTOTYPE : include "NVPTXIntrinsics.td" - //----------------------------------- // Notes //----------------------------------- diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 133e2827008a87..477789a164ead2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -6382,6 +6382,7 @@ def INT_PTX_SREG_GLOBALTIMER : } def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>; +def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>; def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index c90c31aa27ef57..aaf2fef9545908 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -163,6 +163,9 @@ # DEBUG-NEXT: G_READCYCLECOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_READSTEADYCOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_LOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll b/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll new file mode 100644 index 00000000000000..15f664c98182ae --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll @@ -0,0 +1,24 @@ +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX700 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100 + +declare i64 @llvm.readsteadycounter() #0 + +; GCN-LABEL: {{^}}test_readsteadycounter: +; GFX700: s_mov_b32 s[[REG:[0-9]+]], 0 +; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]] +; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]] +; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME) +; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME) +define amdgpu_kernel void @test_readsteadycounter(ptr addrspace(1) %out) #0 { + %cycle0 = call i64 @llvm.readsteadycounter() + store volatile i64 %cycle0, ptr addrspace(1) %out + + %cycle1 = call i64 @llvm.readsteadycounter() + store volatile i64 %cycle1, ptr addrspace(1) %out + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll index d84ee6754281e5..e16028074f4151 100644 --- a/llvm/test/CodeGen/NVPTX/intrinsics.ll +++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll @@ -162,6 +162,17 @@ define i64 @test_cyclecounter() { ret i64 %ret } +; CHECK-LABEL: test_steadycounter +define i64 @test_steadycounter() { +; CHECK: mov.u64 %r{{.*}}, %globaltimer; + %a = tail call i64 @llvm.readsteadycounter() +; CHECK: mov.u64 %r{{.*}}, %globaltimer; + %b = tail call i64 @llvm.readsteadycounter() + %ret = add i64 %a, %b +; CHECK: ret + ret i64 %ret +} + declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare float @llvm.nvvm.sqrt.f(float) @@ -178,3 +189,4 @@ declare i64 @llvm.nvvm.read.ptx.sreg.clock64() declare void @llvm.nvvm.exit() declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer() declare i64 @llvm.readcyclecounter() +declare i64 @llvm.readsteadycounter() diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/builtins/match-table-replacerreg.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/builtins/match-table-replacerreg.td index cf57a247bc797d..622d1df7b381a8 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/builtins/match-table-replacerreg.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/builtins/match-table-replacerreg.td @@ -26,13 +26,13 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ ReplaceTemp ]>; -// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { +// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(65), GIMT_Encode2(181), /*)*//*default:*//*Label 2*/ GIMT_Encode4(558), -// CHECK-NEXT: /*TargetOpcode::G_UNMERGE_VALUES*//*Label 0*/ GIMT_Encode4(474), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_FNEG*//*Label 1*/ GIMT_Encode4(526), -// CHECK-NEXT: // Label 0: @474 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 3*/ GIMT_Encode4(525), // Rule ID 1 // +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(65), GIMT_Encode2(182), /*)*//*default:*//*Label 2*/ GIMT_Encode4(562), +// CHECK-NEXT: /*TargetOpcode::G_UNMERGE_VALUES*//*Label 0*/ GIMT_Encode4(478), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_FNEG*//*Label 1*/ GIMT_Encode4(530), +// CHECK-NEXT: // Label 0: @478 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 3*/ GIMT_Encode4(529), // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3, // CHECK-NEXT: // MIs[0] a @@ -57,10 +57,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_ReplaceRegWithTempReg, /*OldInsnID*/0, /*OldOpIdx*/1, /*TempRegID*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 3: @525 +// CHECK-NEXT: // Label 3: @529 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 1: @526 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ GIMT_Encode4(557), // Rule ID 0 // +// CHECK-NEXT: // Label 1: @530 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ GIMT_Encode4(561), // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -75,10 +75,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_ReplaceReg, /*OldInsnID*/0, /*OldOpIdx*/0, /*NewInsnId*/1, /*NewOpIdx*/1, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 4: @557 +// CHECK-NEXT: // Label 4: @561 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @558 +// CHECK-NEXT: // Label 2: @562 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: 559 bytes +// CHECK-NEXT: }; // Size: 563 bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td index d0c0eba9e3974a..f0ca65a87b76bc 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td @@ -32,14 +32,14 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ CImmInstTest1 ]>; -// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { +// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(127), /*)*//*default:*//*Label 3*/ GIMT_Encode4(559), -// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(442), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_CONSTANT*//*Label 1*/ GIMT_Encode4(473), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(519), -// CHECK-NEXT: // Label 0: @442 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ GIMT_Encode4(472), // Rule ID 0 // +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(128), /*)*//*default:*//*Label 3*/ GIMT_Encode4(563), +// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(446), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_CONSTANT*//*Label 1*/ GIMT_Encode4(477), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(523), +// CHECK-NEXT: // Label 0: @446 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ GIMT_Encode4(476), // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[0] a @@ -51,10 +51,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_AddImm8, /*InsnID*/0, /*Imm*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 4: @472 +// CHECK-NEXT: // Label 4: @476 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 1: @473 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ GIMT_Encode4(518), // Rule ID 2 // +// CHECK-NEXT: // Label 1: @477 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ GIMT_Encode4(522), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[0] a @@ -66,10 +66,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_AddCImm, /*InsnID*/0, /*Type*/GILLT_s32, /*Imm*/GIMT_Encode8(42), // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 5: @518 +// CHECK-NEXT: // Label 5: @522 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @519 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(558), // Rule ID 1 // +// CHECK-NEXT: // Label 2: @523 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(562), // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // MIs[0] a // CHECK-NEXT: // No operand predicates @@ -83,10 +83,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 6: @558 +// CHECK-NEXT: // Label 6: @562 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 3: @559 +// CHECK-NEXT: // Label 3: @563 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: 560 bytes +// CHECK-NEXT: }; // Size: 564 bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-intrinsics.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-intrinsics.td index 94cc3e58dfc9a1..a446fb72298c25 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-intrinsics.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-intrinsics.td @@ -27,10 +27,9 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ SpecialIntrins ]>; - -// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { +// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(114), GIMT_Encode2(116), /*)*//*default:*//*Label 2*/ GIMT_Encode4(132), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(115), GIMT_Encode2(117), /*)*//*default:*//*Label 2*/ GIMT_Encode4(132), // CHECK-NEXT: /*TargetOpcode::G_INTRINSIC*//*Label 0*/ GIMT_Encode4(18), // CHECK-NEXT: /*TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS*//*Label 1*/ GIMT_Encode4(73), // CHECK-NEXT: // Label 0: @18 diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td index fdcb31e9679787..d3c202c4cb01d4 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td @@ -26,9 +26,9 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ Test0 ]>; -// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { +// CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(119), GIMT_Encode2(182), /*)*//*default:*//*Label 3*/ GIMT_Encode4(380), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(120), GIMT_Encode2(183), /*)*//*default:*//*Label 3*/ GIMT_Encode4(380), // CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 0*/ GIMT_Encode4(262), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 1*/ GIMT_Encode4(298), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_FPEXT*//*Label 2*/ GIMT_Encode4(344), diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td index 5ec44b5e08d855..57ad0009b5bd6a 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td @@ -132,15 +132,15 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // Verify match table. // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(127), /*)*//*default:*//*Label 6*/ GIMT_Encode4(657), -// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(442), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(484), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(537), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(579), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(604), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(617), -// CHECK-NEXT: // Label 0: @442 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(471), // Rule ID 4 // +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(128), /*)*//*default:*//*Label 6*/ GIMT_Encode4(661), +// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(446), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(488), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(541), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(583), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(608), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(621), +// CHECK-NEXT: // Label 0: @446 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(475), // Rule ID 4 // // CHECK-NEXT: GIM_CheckFeatures, GIMT_Encode2(GIFBS_HasAnswerToEverything), // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled), // CHECK-NEXT: // MIs[0] a @@ -155,8 +155,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // Combiner Rule #3: InstTest1 // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 7: @471 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(483), // Rule ID 3 // +// CHECK-NEXT: // Label 7: @475 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(487), // Rule ID 3 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: // MIs[0] a // CHECK-NEXT: // No operand predicates @@ -165,10 +165,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // Combiner Rule #2: InstTest0 // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner1), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 8: @483 +// CHECK-NEXT: // Label 8: @487 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 1: @484 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(536), // Rule ID 6 // +// CHECK-NEXT: // Label 1: @488 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(540), // Rule ID 6 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule5Enabled), // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[0] dst @@ -186,10 +186,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/1, /*OpIdx*/1, // z // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 9: @536 +// CHECK-NEXT: // Label 9: @540 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @537 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(578), // Rule ID 5 // +// CHECK-NEXT: // Label 2: @541 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(582), // Rule ID 5 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled), // CHECK-NEXT: // MIs[0] tmp // CHECK-NEXT: GIM_RecordInsnIgnoreCopies, /*DefineMI*/1, /*MI*/0, /*OpIdx*/0, // MIs[1] @@ -207,32 +207,32 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner2), // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 10: @578 +// CHECK-NEXT: // Label 10: @582 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 3: @579 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(591), // Rule ID 0 // +// CHECK-NEXT: // Label 3: @583 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(595), // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), // CHECK-NEXT: // Combiner Rule #0: WipOpcodeTest0; wip_match_opcode 'G_TRUNC' // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 11: @591 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(603), // Rule ID 1 // +// CHECK-NEXT: // Label 11: @595 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(607), // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_TRUNC' // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 12: @603 +// CHECK-NEXT: // Label 12: @607 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 4: @604 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(616), // Rule ID 2 // +// CHECK-NEXT: // Label 4: @608 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(620), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_SEXT' // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 13: @616 +// CHECK-NEXT: // Label 13: @620 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 5: @617 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(656), // Rule ID 7 // +// CHECK-NEXT: // Label 5: @621 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(660), // Rule ID 7 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule6Enabled), // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -247,10 +247,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 14: @656 +// CHECK-NEXT: // Label 14: @660 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 6: @657 +// CHECK-NEXT: // Label 6: @661 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: 658 bytes +// CHECK-NEXT: }; // Size: 662 bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td index 3e651265b352ec..f79b792b37a36c 100644 --- a/llvm/test/TableGen/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter.td @@ -518,7 +518,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3), // R00O-NEXT: GIM_Reject, // R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]] // R00O-NEXT: GIM_Reject, -// R00O-NEXT: }; // Size: 2019 bytes +// R00O-NEXT: }; // Size: 2023 bytes def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4), [(set GPR32:$dst, From 1f90af183d7a007584fac041eaca9f126a1a942f Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 13 Feb 2024 16:04:41 +0000 Subject: [PATCH 016/240] [TableGen] Do not speculatively grow RegUnitSets. NFC. This seems to be a trick to avoid copying a RegUnitSet, but it can be done more simply using std::move. --- llvm/utils/TableGen/CodeGenRegisters.cpp | 25 ++++++++---------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index dd1850752aad61..0b671271bf0c9c 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -1985,18 +1985,14 @@ void CodeGenRegBank::computeRegUnitSets() { if (!RC.Allocatable || RC.Artificial || !RC.GeneratePressureSet) continue; - // Speculatively grow the RegUnitSets to hold the new set. - RegUnitSet &RUSet = RegUnitSets.emplace_back(); - RUSet.Name = RC.getName(); - // Compute a sorted list of units in this class. + RegUnitSet RUSet; + RUSet.Name = RC.getName(); RC.buildRegUnitSet(*this, RUSet.Units); // Find an existing RegUnitSet. - std::vector::const_iterator SetI = - findRegUnitSet(RegUnitSets, RUSet); - if (SetI != std::prev(RegUnitSets.end())) - RegUnitSets.pop_back(); + if (findRegUnitSet(RegUnitSets, RUSet) == RegUnitSets.end()) + RegUnitSets.push_back(std::move(RUSet)); } if (RegUnitSets.empty()) @@ -2042,11 +2038,9 @@ void CodeGenRegBank::computeRegUnitSets() { if (Intersection.empty()) continue; - // Speculatively grow the RegUnitSets to hold the new set. - RegUnitSet &RUSet = RegUnitSets.emplace_back(); + RegUnitSet RUSet; RUSet.Name = RegUnitSets[Idx].Name + "_with_" + RegUnitSets[SearchIdx].Name; - std::set_union(RegUnitSets[Idx].Units.begin(), RegUnitSets[Idx].Units.end(), RegUnitSets[SearchIdx].Units.begin(), @@ -2054,16 +2048,13 @@ void CodeGenRegBank::computeRegUnitSets() { std::inserter(RUSet.Units, RUSet.Units.begin())); // Find an existing RegUnitSet, or add the union to the unique sets. - std::vector::const_iterator SetI = - findRegUnitSet(RegUnitSets, RUSet); - if (SetI != std::prev(RegUnitSets.end())) - RegUnitSets.pop_back(); - else { - LLVM_DEBUG(dbgs() << "UnitSet " << RegUnitSets.size() - 1 << " " + if (findRegUnitSet(RegUnitSets, RUSet) == RegUnitSets.end()) { + LLVM_DEBUG(dbgs() << "UnitSet " << RegUnitSets.size() << " " << RUSet.Name << ":"; for (auto &U : RUSet.Units) printRegUnitName(U); dbgs() << "\n";); + RegUnitSets.push_back(std::move(RUSet)); } } } From 8ba4ff392538dac7b803cfdf5bde217ff538a644 Mon Sep 17 00:00:00 2001 From: "S. Bharadwaj Yadavalli" Date: Tue, 13 Feb 2024 11:12:03 -0500 Subject: [PATCH 017/240] [DirectX][NFC] Change specification of overload types and attribute in DXIL.td (#81184) - Specify overload types of DXIL Operation as list of types instead of a string. - Add supported DXIL type record definitions to `DXIL.td` leveraging `LLVMType` to avoid duplicate definitions. - Spell out DXIL Operation Attribute specification string. - Make corresponding changes to process the records in DXILEmitter.cpp --- llvm/lib/Target/DirectX/DXIL.td | 44 +++++--- llvm/utils/TableGen/DXILEmitter.cpp | 160 ++++++++++++++++++---------- 2 files changed, 133 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 3f3ace5a1a3a36..52158139a2584e 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// include "llvm/IR/Intrinsics.td" +include "llvm/IR/Attributes.td" // Abstract representation of the class a DXIL Operation belongs to. class DXILOpClass { @@ -34,12 +35,29 @@ def BinaryUintCategory : DXILOpCategory<"Binary uint">; def UnaryFloatCategory : DXILOpCategory<"Unary float">; def ComputeIDCategory : DXILOpCategory<"Compute/Mesh/Amplification shader">; +// Following are the scalar types supported by DXIL operations and are synonymous +// to llvm_*_ty defined for readability and ease of use in the context of this file. + +def voidTy : LLVMType; + +// Floating point types +def f16Ty : LLVMType; +def f32Ty : LLVMType; +def f64Ty : LLVMType; + +// Integer types +def i1Ty : LLVMType; +def i8Ty : LLVMType; +def i16Ty : LLVMType; +def i32Ty : LLVMType; +def i64Ty : LLVMType; + // The parameter description for a DXIL operation class DXILOpParameter { int Pos = pos; // Position in parameter list - string LLVMType = type; // LLVM type name, $o for overload, $r for resource + string Type = type; // LLVM type name, $o for overload, $r for resource // type, $cb for legacy cbuffer, $u4 for u4 struct string Name = name; // Short, unique parameter name string Doc = doc; // Description of this parameter @@ -56,9 +74,11 @@ class DXILOperationDesc { DXILOpCategory OpCategory; // Category of the operation string Doc = ""; // Description of the operation list Params = []; // Parameter list of the operation - string OverloadTypes = ""; // Overload types, if applicable - string Attributes = ""; // Attribute shorthands: rn=does not access - // memory,ro=only reads from memory, + list OverloadTypes = []; // Overload types, if applicable + EnumAttr Attribute; // Operation Attribute. Leverage attributes defined in Attributes.td + // ReadNone - operation does not access memory. + // ReadOnly - only reads from memory. + // "ReadMemory" - reads memory bit IsDerivative = 0; // Whether this is some kind of derivative bit IsGradient = 0; // Whether this requires a gradient calculation bit IsFeedback = 0; // Whether this is a sampler feedback operation @@ -71,7 +91,7 @@ class DXILOperationDesc { } class DXILOperation params, + list oloadTypes, EnumAttr attrs, list params, list statsGroup = []> : DXILOperationDesc { let OpName = name; let OpCode = opCode; @@ -80,7 +100,7 @@ class DXILOperation { Intrinsic llvm_intrinsic = llvm_intrinsic_; } def Sin : DXILOperation<"Sin", 13, UnaryClass, UnaryFloatCategory, "returns sine(theta) for theta in radians.", - "half;float;", "rn", + [f16Ty,f32Ty], ReadNone, [ DXILOpParameter<0, "$o", "", "operation result">, DXILOpParameter<1, "i32", "opcode", "DXIL opcode">, @@ -98,7 +118,7 @@ def Sin : DXILOperation<"Sin", 13, UnaryClass, UnaryFloatCategory, "returns sine LLVMIntrinsic; def UMax : DXILOperation< "UMax", 39, BinaryClass, BinaryUintCategory, "unsigned integer maximum. UMax(a,b) = a > b ? a : b", - "i16;i32;i64;", "rn", + [i16Ty,i32Ty,i64Ty], ReadNone, [ DXILOpParameter<0, "$o", "", "operation result">, DXILOpParameter<1, "i32", "opcode", "DXIL opcode">, @@ -108,7 +128,7 @@ def UMax : DXILOperation< "UMax", 39, BinaryClass, BinaryUintCategory, "unsign ["uints"]>, LLVMIntrinsic; -def ThreadId : DXILOperation< "ThreadId", 93, ThreadIdClass, ComputeIDCategory, "reads the thread ID", "i32;", "rn", +def ThreadId : DXILOperation< "ThreadId", 93, ThreadIdClass, ComputeIDCategory, "reads the thread ID", [i32Ty], ReadNone, [ DXILOpParameter<0, "i32", "", "thread ID component">, DXILOpParameter<1, "i32", "opcode", "DXIL opcode">, @@ -116,7 +136,7 @@ def ThreadId : DXILOperation< "ThreadId", 93, ThreadIdClass, ComputeIDCategory, ]>, LLVMIntrinsic; -def GroupId : DXILOperation< "GroupId", 94, GroupIdClass, ComputeIDCategory, "reads the group ID (SV_GroupID)", "i32;", "rn", +def GroupId : DXILOperation< "GroupId", 94, GroupIdClass, ComputeIDCategory, "reads the group ID (SV_GroupID)", [i32Ty], ReadNone, [ DXILOpParameter<0, "i32", "", "group ID component">, DXILOpParameter<1, "i32", "opcode", "DXIL opcode">, @@ -125,7 +145,7 @@ def GroupId : DXILOperation< "GroupId", 94, GroupIdClass, ComputeIDCategory, "r LLVMIntrinsic; def ThreadIdInGroup : DXILOperation< "ThreadIdInGroup", 95, ThreadIdInGroupClass, ComputeIDCategory, - "reads the thread ID within the group (SV_GroupThreadID)", "i32;", "rn", + "reads the thread ID within the group (SV_GroupThreadID)", [i32Ty], ReadNone, [ DXILOpParameter<0, "i32", "", "thread ID in group component">, DXILOpParameter<1, "i32", "opcode", "DXIL opcode">, @@ -134,7 +154,7 @@ def ThreadIdInGroup : DXILOperation< "ThreadIdInGroup", 95, ThreadIdInGroupClas LLVMIntrinsic; def FlattenedThreadIdInGroup : DXILOperation< "FlattenedThreadIdInGroup", 96, FlattenedThreadIdInGroupClass, ComputeIDCategory, - "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i32;", "rn", + "provides a flattened index for a given thread within a given group (SV_GroupIndex)", [i32Ty], ReadNone, [ DXILOpParameter<0, "i32", "", "result">, DXILOpParameter<1, "i32", "opcode", "DXIL opcode"> diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 25e818a0c54325..3378a904ac404d 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -49,11 +49,11 @@ struct DXILOperationDesc { StringRef Doc; // the documentation description of this instruction SmallVector Params; // the operands that this instruction takes - StringRef OverloadTypes; // overload types if applicable - StringRef FnAttr; // attribute shorthands: rn=does not access - // memory,ro=only reads from memory + SmallVector OverloadTypes; // overload types if applicable + StringRef Attr; // operation attribute; reference to string representation + // of llvm::Attribute::AttrKind StringRef Intrinsic; // The llvm intrinsic map to OpName. Default is "" which - // means no map exist + // means no map exists bool IsDeriv = false; // whether this is some kind of derivative bool IsGradient = false; // whether this requires a gradient calculation bool IsFeedback = false; // whether this is a sampler feedback op @@ -70,37 +70,32 @@ struct DXILOperationDesc { int OverloadParamIndex; // parameter index which control the overload. // When < 0, should be only 1 overload type. SmallVector counters; // counters for this inst. - DXILOperationDesc(const Record *R) { - OpName = R->getValueAsString("OpName"); - OpCode = R->getValueAsInt("OpCode"); - OpClass = R->getValueAsDef("OpClass")->getValueAsString("Name"); - Category = R->getValueAsDef("OpCategory")->getValueAsString("Name"); - - if (R->getValue("llvm_intrinsic")) { - auto *IntrinsicDef = R->getValueAsDef("llvm_intrinsic"); - auto DefName = IntrinsicDef->getName(); - assert(DefName.starts_with("int_") && "invalid intrinsic name"); - // Remove the int_ from intrinsic name. - Intrinsic = DefName.substr(4); - } - - Doc = R->getValueAsString("Doc"); - - ListInit *ParamList = R->getValueAsListInit("Params"); - OverloadParamIndex = -1; - for (unsigned I = 0; I < ParamList->size(); ++I) { - Record *Param = ParamList->getElementAsRecord(I); - Params.emplace_back(DXILParameter(Param)); - auto &CurParam = Params.back(); - if (CurParam.Kind >= ParameterKind::OVERLOAD) - OverloadParamIndex = I; - } - OverloadTypes = R->getValueAsString("OverloadTypes"); - FnAttr = R->getValueAsString("Attributes"); - } + DXILOperationDesc(const Record *); }; } // end anonymous namespace +// Convert DXIL type name string to dxil::ParameterKind +// +// @param typeNameStr Type name string +// @return ParameterKind as defined in llvm/Support/DXILABI.h +static ParameterKind getDXILTypeNameToKind(StringRef typeNameStr) { + return StringSwitch(typeNameStr) + .Case("voidTy", ParameterKind::VOID) + .Case("f16Ty", ParameterKind::HALF) + .Case("f32Ty", ParameterKind::FLOAT) + .Case("f64Ty", ParameterKind::DOUBLE) + .Case("i1Ty", ParameterKind::I1) + .Case("i8Ty", ParameterKind::I8) + .Case("i16Ty", ParameterKind::I16) + .Case("i32Ty", ParameterKind::I32) + .Case("i64Ty", ParameterKind::I64) + .Case("overloadTy", ParameterKind::OVERLOAD) + .Case("handleTy", ParameterKind::DXIL_HANDLE) + .Case("cbufferRetTy", ParameterKind::CBUFFER_RET) + .Case("resourceRetTy", ParameterKind::RESOURCE_RET) + .Default(ParameterKind::INVALID); +} + static ParameterKind parameterTypeNameToKind(StringRef Name) { return StringSwitch(Name) .Case("void", ParameterKind::VOID) @@ -119,10 +114,44 @@ static ParameterKind parameterTypeNameToKind(StringRef Name) { .Default(ParameterKind::INVALID); } +DXILOperationDesc::DXILOperationDesc(const Record *R) { + OpName = R->getValueAsString("OpName"); + OpCode = R->getValueAsInt("OpCode"); + OpClass = R->getValueAsDef("OpClass")->getValueAsString("Name"); + Category = R->getValueAsDef("OpCategory")->getValueAsString("Name"); + + if (R->getValue("llvm_intrinsic")) { + auto *IntrinsicDef = R->getValueAsDef("llvm_intrinsic"); + auto DefName = IntrinsicDef->getName(); + assert(DefName.starts_with("int_") && "invalid intrinsic name"); + // Remove the int_ from intrinsic name. + Intrinsic = DefName.substr(4); + } + + Doc = R->getValueAsString("Doc"); + + ListInit *ParamList = R->getValueAsListInit("Params"); + OverloadParamIndex = -1; + for (unsigned I = 0; I < ParamList->size(); ++I) { + Record *Param = ParamList->getElementAsRecord(I); + Params.emplace_back(DXILParameter(Param)); + auto &CurParam = Params.back(); + if (CurParam.Kind >= ParameterKind::OVERLOAD) + OverloadParamIndex = I; + } + ListInit *OverloadTypeList = R->getValueAsListInit("OverloadTypes"); + + for (unsigned I = 0; I < OverloadTypeList->size(); ++I) { + Record *R = OverloadTypeList->getElementAsRecord(I); + OverloadTypes.emplace_back(getDXILTypeNameToKind(R->getNameInitAsString())); + } + Attr = StringRef(R->getValue("Attribute")->getNameInitAsString()); +} + DXILParameter::DXILParameter(const Record *R) { Name = R->getValueAsString("Name"); Pos = R->getValueAsInt("Pos"); - Kind = parameterTypeNameToKind(R->getValueAsString("LLVMType")); + Kind = parameterTypeNameToKind(R->getValueAsString("Type")); if (R->getValue("Doc")) Doc = R->getValueAsString("Doc"); IsConst = R->getValueAsBit("IsConstant"); @@ -267,38 +296,51 @@ static void emitDXILIntrinsicMap(std::vector &Ops, OS << "\n"; } -static std::string emitDXILOperationFnAttr(StringRef FnAttr) { - return StringSwitch(FnAttr) - .Case("rn", "Attribute::ReadNone") - .Case("ro", "Attribute::ReadOnly") +// Convert operation attribute string to Attribute enum +// +// @param Attr string reference +// @return std::string Attribute enum string +static std::string emitDXILOperationAttr(StringRef Attr) { + return StringSwitch(Attr) + .Case("ReadNone", "Attribute::ReadNone") + .Case("ReadOnly", "Attribute::ReadOnly") .Default("Attribute::None"); } -static std::string getOverloadKind(StringRef Overload) { - return StringSwitch(Overload) - .Case("half", "OverloadKind::HALF") - .Case("float", "OverloadKind::FLOAT") - .Case("double", "OverloadKind::DOUBLE") - .Case("i1", "OverloadKind::I1") - .Case("i16", "OverloadKind::I16") - .Case("i32", "OverloadKind::I32") - .Case("i64", "OverloadKind::I64") - .Case("udt", "OverloadKind::UserDefineType") - .Case("obj", "OverloadKind::ObjectType") - .Default("OverloadKind::VOID"); +static std::string overloadKindStr(ParameterKind Overload) { + switch (Overload) { + case ParameterKind::HALF: + return "OverloadKind::HALF"; + case ParameterKind::FLOAT: + return "OverloadKind::FLOAT"; + case ParameterKind::DOUBLE: + return "OverloadKind::DOUBLE"; + case ParameterKind::I1: + return "OverloadKind::I1"; + case ParameterKind::I8: + return "OverloadKind::I8"; + case ParameterKind::I16: + return "OverloadKind::I16"; + case ParameterKind::I32: + return "OverloadKind::I32"; + case ParameterKind::I64: + return "OverloadKind::I64"; + case ParameterKind::VOID: + return "OverloadKind::VOID"; + default: + return "OverloadKind::UNKNOWN"; + } } -static std::string getDXILOperationOverload(StringRef Overloads) { - SmallVector OverloadStrs; - Overloads.split(OverloadStrs, ';', /*MaxSplit*/ -1, /*KeepEmpty*/ false); +static std::string +getDXILOperationOverloads(SmallVector Overloads) { // Format is: OverloadKind::FLOAT | OverloadKind::HALF - assert(!OverloadStrs.empty() && "Invalid overloads"); - auto It = OverloadStrs.begin(); + auto It = Overloads.begin(); std::string Result; raw_string_ostream OS(Result); - OS << getOverloadKind(*It); - for (++It; It != OverloadStrs.end(); ++It) { - OS << " | " << getOverloadKind(*It); + OS << overloadKindStr(*It); + for (++It; It != Overloads.end(); ++It) { + OS << " | " << overloadKindStr(*It); } return OS.str(); } @@ -368,8 +410,8 @@ static void emitDXILOperationTable(std::vector &Ops, OS << " { dxil::OpCode::" << Op.OpName << ", " << OpStrings.get(Op.OpName.str()) << ", OpCodeClass::" << Op.OpClass << ", " << OpClassStrings.get(getDXILOpClassName(Op.OpClass)) << ", " - << getDXILOperationOverload(Op.OverloadTypes) << ", " - << emitDXILOperationFnAttr(Op.FnAttr) << ", " << Op.OverloadParamIndex + << getDXILOperationOverloads(Op.OverloadTypes) << ", " + << emitDXILOperationAttr(Op.Attr) << ", " << Op.OverloadParamIndex << ", " << Op.Params.size() << ", " << Parameters.get(ParameterMap[Op.OpClass]) << " },\n"; } From 1d8479225a8c1efc8c90511e6c7fe608ff38163c Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 13 Feb 2024 16:14:03 +0000 Subject: [PATCH 018/240] [clang][Driver] Small correction to print-runtime-dir --- clang/include/clang/Driver/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 187b845ddf3c7b..c625d0dd1c0c72 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5339,7 +5339,7 @@ def print_rocm_search_dirs : Flag<["-", "--"], "print-rocm-search-dirs">, HelpText<"Print the paths used for finding ROCm installation">, Visibility<[ClangOption, CLOption]>; def print_runtime_dir : Flag<["-", "--"], "print-runtime-dir">, - HelpText<"Print the directory pathname containing clangs runtime libraries">, + HelpText<"Print the directory pathname containing Clang's runtime libraries">, Visibility<[ClangOption, CLOption]>; def print_diagnostic_options : Flag<["-", "--"], "print-diagnostic-options">, HelpText<"Print all of Clang's warning options">, From d58c128bc42b8a9cc45516ba9fe9e6a3c322d7b3 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 13 Feb 2024 11:38:02 -0500 Subject: [PATCH 019/240] [lldb-dap][NFC] Add Breakpoint struct to share common logic. (#80753) This adds a layer between `SounceBreakpoint`/`FunctionBreakpoint` and `BreakpointBase` to have better separation and encapsulation so we are not directly operating on `SBBreakpoint`. I basically moved the `SBBreakpoint` and the methods that requires it from `BreakpointBase` to `Breakpoint`. This allows adding support for data watchpoint easier by sharing the logic inside `BreakpointBase`. --- lldb/tools/lldb-dap/Breakpoint.cpp | 76 +++++ lldb/tools/lldb-dap/Breakpoint.h | 33 ++ lldb/tools/lldb-dap/BreakpointBase.cpp | 299 +---------------- lldb/tools/lldb-dap/BreakpointBase.h | 33 +- lldb/tools/lldb-dap/CMakeLists.txt | 1 + lldb/tools/lldb-dap/FunctionBreakpoint.cpp | 12 +- lldb/tools/lldb-dap/FunctionBreakpoint.h | 4 +- lldb/tools/lldb-dap/JSONUtils.cpp | 46 +-- lldb/tools/lldb-dap/JSONUtils.h | 5 +- lldb/tools/lldb-dap/SourceBreakpoint.cpp | 304 +++++++++++++++++- lldb/tools/lldb-dap/SourceBreakpoint.h | 30 +- lldb/tools/lldb-dap/lldb-dap.cpp | 17 +- .../gn/secondary/lldb/tools/lldb-dap/BUILD.gn | 1 + 13 files changed, 459 insertions(+), 402 deletions(-) create mode 100644 lldb/tools/lldb-dap/Breakpoint.cpp create mode 100644 lldb/tools/lldb-dap/Breakpoint.h diff --git a/lldb/tools/lldb-dap/Breakpoint.cpp b/lldb/tools/lldb-dap/Breakpoint.cpp new file mode 100644 index 00000000000000..0c33d4b114d760 --- /dev/null +++ b/lldb/tools/lldb-dap/Breakpoint.cpp @@ -0,0 +1,76 @@ +//===-- Breakpoint.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Breakpoint.h" +#include "DAP.h" +#include "JSONUtils.h" +#include "llvm/ADT/StringExtras.h" + +using namespace lldb_dap; + +void Breakpoint::SetCondition() { bp.SetCondition(condition.c_str()); } + +void Breakpoint::SetHitCondition() { + uint64_t hitCount = 0; + if (llvm::to_integer(hitCondition, hitCount)) + bp.SetIgnoreCount(hitCount - 1); +} + +void Breakpoint::CreateJsonObject(llvm::json::Object &object) { + // Each breakpoint location is treated as a separate breakpoint for VS code. + // They don't have the notion of a single breakpoint with multiple locations. + if (!bp.IsValid()) + return; + object.try_emplace("verified", bp.GetNumResolvedLocations() > 0); + object.try_emplace("id", bp.GetID()); + // VS Code DAP doesn't currently allow one breakpoint to have multiple + // locations so we just report the first one. If we report all locations + // then the IDE starts showing the wrong line numbers and locations for + // other source file and line breakpoints in the same file. + + // Below we search for the first resolved location in a breakpoint and report + // this as the breakpoint location since it will have a complete location + // that is at least loaded in the current process. + lldb::SBBreakpointLocation bp_loc; + const auto num_locs = bp.GetNumLocations(); + for (size_t i = 0; i < num_locs; ++i) { + bp_loc = bp.GetLocationAtIndex(i); + if (bp_loc.IsResolved()) + break; + } + // If not locations are resolved, use the first location. + if (!bp_loc.IsResolved()) + bp_loc = bp.GetLocationAtIndex(0); + auto bp_addr = bp_loc.GetAddress(); + + if (bp_addr.IsValid()) { + std::string formatted_addr = + "0x" + llvm::utohexstr(bp_addr.GetLoadAddress(g_dap.target)); + object.try_emplace("instructionReference", formatted_addr); + auto line_entry = bp_addr.GetLineEntry(); + const auto line = line_entry.GetLine(); + if (line != UINT32_MAX) + object.try_emplace("line", line); + const auto column = line_entry.GetColumn(); + if (column != 0) + object.try_emplace("column", column); + object.try_emplace("source", CreateSource(line_entry)); + } +} + +bool Breakpoint::MatchesName(const char *name) { return bp.MatchesName(name); } + +void Breakpoint::SetBreakpoint() { + // See comments in BreakpointBase::GetBreakpointLabel() for details of why + // we add a label to our breakpoints. + bp.AddName(GetBreakpointLabel()); + if (!condition.empty()) + SetCondition(); + if (!hitCondition.empty()) + SetHitCondition(); +} diff --git a/lldb/tools/lldb-dap/Breakpoint.h b/lldb/tools/lldb-dap/Breakpoint.h new file mode 100644 index 00000000000000..47a9d9c59ae2b7 --- /dev/null +++ b/lldb/tools/lldb-dap/Breakpoint.h @@ -0,0 +1,33 @@ +//===-- Breakpoint.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_TOOLS_LLDB_DAP_BREAKPOINT_H +#define LLDB_TOOLS_LLDB_DAP_BREAKPOINT_H + +#include "BreakpointBase.h" + +namespace lldb_dap { + +struct Breakpoint : public BreakpointBase { + // The LLDB breakpoint associated wit this source breakpoint + lldb::SBBreakpoint bp; + + Breakpoint() = default; + Breakpoint(const llvm::json::Object &obj) : BreakpointBase(obj){}; + Breakpoint(lldb::SBBreakpoint bp) : bp(bp) {} + + void SetCondition() override; + void SetHitCondition() override; + void CreateJsonObject(llvm::json::Object &object) override; + + bool MatchesName(const char *name); + void SetBreakpoint(); +}; +} // namespace lldb_dap + +#endif diff --git a/lldb/tools/lldb-dap/BreakpointBase.cpp b/lldb/tools/lldb-dap/BreakpointBase.cpp index fb4b27fbe315fc..519729f5519ffc 100644 --- a/lldb/tools/lldb-dap/BreakpointBase.cpp +++ b/lldb/tools/lldb-dap/BreakpointBase.cpp @@ -8,306 +8,13 @@ #include "BreakpointBase.h" #include "DAP.h" -#include "JSONUtils.h" #include "llvm/ADT/StringExtras.h" using namespace lldb_dap; BreakpointBase::BreakpointBase(const llvm::json::Object &obj) : condition(std::string(GetString(obj, "condition"))), - hitCondition(std::string(GetString(obj, "hitCondition"))), - logMessage(std::string(GetString(obj, "logMessage"))) {} - -void BreakpointBase::SetCondition() { bp.SetCondition(condition.c_str()); } - -void BreakpointBase::SetHitCondition() { - uint64_t hitCount = 0; - if (llvm::to_integer(hitCondition, hitCount)) - bp.SetIgnoreCount(hitCount - 1); -} - -lldb::SBError BreakpointBase::AppendLogMessagePart(llvm::StringRef part, - bool is_expr) { - if (is_expr) { - logMessageParts.emplace_back(part, is_expr); - } else { - std::string formatted; - lldb::SBError error = FormatLogText(part, formatted); - if (error.Fail()) - return error; - logMessageParts.emplace_back(formatted, is_expr); - } - return lldb::SBError(); -} - -// TODO: consolidate this code with the implementation in -// FormatEntity::ParseInternal(). -lldb::SBError BreakpointBase::FormatLogText(llvm::StringRef text, - std::string &formatted) { - lldb::SBError error; - while (!text.empty()) { - size_t backslash_pos = text.find_first_of('\\'); - if (backslash_pos == std::string::npos) { - formatted += text.str(); - return error; - } - - formatted += text.substr(0, backslash_pos).str(); - // Skip the characters before and including '\'. - text = text.drop_front(backslash_pos + 1); - - if (text.empty()) { - error.SetErrorString( - "'\\' character was not followed by another character"); - return error; - } - - const char desens_char = text[0]; - text = text.drop_front(); // Skip the desensitized char character - switch (desens_char) { - case 'a': - formatted.push_back('\a'); - break; - case 'b': - formatted.push_back('\b'); - break; - case 'f': - formatted.push_back('\f'); - break; - case 'n': - formatted.push_back('\n'); - break; - case 'r': - formatted.push_back('\r'); - break; - case 't': - formatted.push_back('\t'); - break; - case 'v': - formatted.push_back('\v'); - break; - case '\'': - formatted.push_back('\''); - break; - case '\\': - formatted.push_back('\\'); - break; - case '0': - // 1 to 3 octal chars - { - if (text.empty()) { - error.SetErrorString("missing octal number following '\\0'"); - return error; - } - - // Make a string that can hold onto the initial zero char, up to 3 - // octal digits, and a terminating NULL. - char oct_str[5] = {0, 0, 0, 0, 0}; - - size_t i; - for (i = 0; - i < text.size() && i < 4 && (text[i] >= '0' && text[i] <= '7'); - ++i) { - oct_str[i] = text[i]; - } - - text = text.drop_front(i); - unsigned long octal_value = ::strtoul(oct_str, nullptr, 8); - if (octal_value <= UINT8_MAX) { - formatted.push_back((char)octal_value); - } else { - error.SetErrorString("octal number is larger than a single byte"); - return error; - } - } - break; - - case 'x': { - if (text.empty()) { - error.SetErrorString("missing hex number following '\\x'"); - return error; - } - // hex number in the text - if (isxdigit(text[0])) { - // Make a string that can hold onto two hex chars plus a - // NULL terminator - char hex_str[3] = {0, 0, 0}; - hex_str[0] = text[0]; - - text = text.drop_front(); - - if (!text.empty() && isxdigit(text[0])) { - hex_str[1] = text[0]; - text = text.drop_front(); - } - - unsigned long hex_value = strtoul(hex_str, nullptr, 16); - if (hex_value <= UINT8_MAX) { - formatted.push_back((char)hex_value); - } else { - error.SetErrorString("hex number is larger than a single byte"); - return error; - } - } else { - formatted.push_back(desens_char); - } - break; - } - - default: - // Just desensitize any other character by just printing what came - // after the '\' - formatted.push_back(desens_char); - break; - } - } - return error; -} - -// logMessage will be divided into array of LogMessagePart as two kinds: -// 1. raw print text message, and -// 2. interpolated expression for evaluation which is inside matching curly -// braces. -// -// The function tries to parse logMessage into a list of LogMessageParts -// for easy later access in BreakpointHitCallback. -void BreakpointBase::SetLogMessage() { - logMessageParts.clear(); - - // Contains unmatched open curly braces indices. - std::vector unmatched_curly_braces; - - // Contains all matched curly braces in logMessage. - // Loop invariant: matched_curly_braces_ranges are sorted by start index in - // ascending order without any overlap between them. - std::vector> matched_curly_braces_ranges; - - lldb::SBError error; - // Part1 - parse matched_curly_braces_ranges. - // locating all curly braced expression ranges in logMessage. - // The algorithm takes care of nested and imbalanced curly braces. - for (size_t i = 0; i < logMessage.size(); ++i) { - if (logMessage[i] == '{') { - unmatched_curly_braces.push_back(i); - } else if (logMessage[i] == '}') { - if (unmatched_curly_braces.empty()) - // Nothing to match. - continue; - - int last_unmatched_index = unmatched_curly_braces.back(); - unmatched_curly_braces.pop_back(); - - // Erase any matched ranges included in the new match. - while (!matched_curly_braces_ranges.empty()) { - assert(matched_curly_braces_ranges.back().first != - last_unmatched_index && - "How can a curley brace be matched twice?"); - if (matched_curly_braces_ranges.back().first < last_unmatched_index) - break; - - // This is a nested range let's earse it. - assert((size_t)matched_curly_braces_ranges.back().second < i); - matched_curly_braces_ranges.pop_back(); - } - - // Assert invariant. - assert(matched_curly_braces_ranges.empty() || - matched_curly_braces_ranges.back().first < last_unmatched_index); - matched_curly_braces_ranges.emplace_back(last_unmatched_index, i); - } - } - - // Part2 - parse raw text and expresions parts. - // All expression ranges have been parsed in matched_curly_braces_ranges. - // The code below uses matched_curly_braces_ranges to divide logMessage - // into raw text parts and expression parts. - int last_raw_text_start = 0; - for (const std::pair &curly_braces_range : - matched_curly_braces_ranges) { - // Raw text before open curly brace. - assert(curly_braces_range.first >= last_raw_text_start); - size_t raw_text_len = curly_braces_range.first - last_raw_text_start; - if (raw_text_len > 0) { - error = AppendLogMessagePart( - llvm::StringRef(logMessage.c_str() + last_raw_text_start, - raw_text_len), - /*is_expr=*/false); - if (error.Fail()) { - NotifyLogMessageError(error.GetCString()); - return; - } - } - - // Expression between curly braces. - assert(curly_braces_range.second > curly_braces_range.first); - size_t expr_len = curly_braces_range.second - curly_braces_range.first - 1; - error = AppendLogMessagePart( - llvm::StringRef(logMessage.c_str() + curly_braces_range.first + 1, - expr_len), - /*is_expr=*/true); - if (error.Fail()) { - NotifyLogMessageError(error.GetCString()); - return; - } - - last_raw_text_start = curly_braces_range.second + 1; - } - // Trailing raw text after close curly brace. - assert(last_raw_text_start >= 0); - if (logMessage.size() > (size_t)last_raw_text_start) { - error = AppendLogMessagePart( - llvm::StringRef(logMessage.c_str() + last_raw_text_start, - logMessage.size() - last_raw_text_start), - /*is_expr=*/false); - if (error.Fail()) { - NotifyLogMessageError(error.GetCString()); - return; - } - } - - bp.SetCallback(BreakpointBase::BreakpointHitCallback, this); -} - -void BreakpointBase::NotifyLogMessageError(llvm::StringRef error) { - std::string message = "Log message has error: "; - message += error; - g_dap.SendOutput(OutputType::Console, message); -} - -/*static*/ -bool BreakpointBase::BreakpointHitCallback( - void *baton, lldb::SBProcess &process, lldb::SBThread &thread, - lldb::SBBreakpointLocation &location) { - if (!baton) - return true; - - BreakpointBase *bp = (BreakpointBase *)baton; - lldb::SBFrame frame = thread.GetSelectedFrame(); - - std::string output; - for (const BreakpointBase::LogMessagePart &messagePart : - bp->logMessageParts) { - if (messagePart.is_expr) { - // Try local frame variables first before fall back to expression - // evaluation - const std::string &expr_str = messagePart.text; - const char *expr = expr_str.c_str(); - lldb::SBValue value = - frame.GetValueForVariablePath(expr, lldb::eDynamicDontRunTarget); - if (value.GetError().Fail()) - value = frame.EvaluateExpression(expr); - output += VariableDescription(value).display_value; - } else { - output += messagePart.text; - } - } - if (!output.empty() && output.back() != '\n') - output.push_back('\n'); // Ensure log message has line break. - g_dap.SendOutput(OutputType::Console, output.c_str()); - - // Do not stop. - return false; -} + hitCondition(std::string(GetString(obj, "hitCondition"))) {} void BreakpointBase::UpdateBreakpoint(const BreakpointBase &request_bp) { if (condition != request_bp.condition) { @@ -318,10 +25,6 @@ void BreakpointBase::UpdateBreakpoint(const BreakpointBase &request_bp) { hitCondition = request_bp.hitCondition; SetHitCondition(); } - if (logMessage != request_bp.logMessage) { - logMessage = request_bp.logMessage; - SetLogMessage(); - } } const char *BreakpointBase::GetBreakpointLabel() { diff --git a/lldb/tools/lldb-dap/BreakpointBase.h b/lldb/tools/lldb-dap/BreakpointBase.h index 41787f78610215..5a04bb201615fc 100644 --- a/lldb/tools/lldb-dap/BreakpointBase.h +++ b/lldb/tools/lldb-dap/BreakpointBase.h @@ -9,7 +9,6 @@ #ifndef LLDB_TOOLS_LLDB_DAP_BREAKPOINTBASE_H #define LLDB_TOOLS_LLDB_DAP_BREAKPOINTBASE_H -#include "JSONUtils.h" #include "lldb/API/SBBreakpoint.h" #include "llvm/Support/JSON.h" #include @@ -18,44 +17,24 @@ namespace lldb_dap { struct BreakpointBase { - // logMessage part can be either a raw text or an expression. - struct LogMessagePart { - LogMessagePart(llvm::StringRef text, bool is_expr) - : text(text), is_expr(is_expr) {} - std::string text; - bool is_expr; - }; + // An optional expression for conditional breakpoints. std::string condition; // An optional expression that controls how many hits of the breakpoint are // ignored. The backend is expected to interpret the expression as needed std::string hitCondition; - // If this attribute exists and is non-empty, the backend must not 'break' - // (stop) but log the message instead. Expressions within {} are - // interpolated. - std::string logMessage; - std::vector logMessageParts; - // The LLDB breakpoint associated wit this source breakpoint - lldb::SBBreakpoint bp; BreakpointBase() = default; BreakpointBase(const llvm::json::Object &obj); + virtual ~BreakpointBase() = default; - void SetCondition(); - void SetHitCondition(); - void SetLogMessage(); - void UpdateBreakpoint(const BreakpointBase &request_bp); + virtual void SetCondition() = 0; + virtual void SetHitCondition() = 0; + virtual void CreateJsonObject(llvm::json::Object &object) = 0; - // Format \param text and return formatted text in \param formatted. - // \return any formatting failures. - lldb::SBError FormatLogText(llvm::StringRef text, std::string &formatted); - lldb::SBError AppendLogMessagePart(llvm::StringRef part, bool is_expr); - void NotifyLogMessageError(llvm::StringRef error); + void UpdateBreakpoint(const BreakpointBase &request_bp); static const char *GetBreakpointLabel(); - static bool BreakpointHitCallback(void *baton, lldb::SBProcess &process, - lldb::SBThread &thread, - lldb::SBBreakpointLocation &location); }; } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/CMakeLists.txt b/lldb/tools/lldb-dap/CMakeLists.txt index 554567eb3b0e23..f8c0e4ecf36c2f 100644 --- a/lldb/tools/lldb-dap/CMakeLists.txt +++ b/lldb/tools/lldb-dap/CMakeLists.txt @@ -24,6 +24,7 @@ tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(LLDBDAPOptionsTableGen) add_lldb_tool(lldb-dap lldb-dap.cpp + Breakpoint.cpp BreakpointBase.cpp ExceptionBreakpoint.cpp FifoFiles.cpp diff --git a/lldb/tools/lldb-dap/FunctionBreakpoint.cpp b/lldb/tools/lldb-dap/FunctionBreakpoint.cpp index d4bdb976500ecd..21743bf908706d 100644 --- a/lldb/tools/lldb-dap/FunctionBreakpoint.cpp +++ b/lldb/tools/lldb-dap/FunctionBreakpoint.cpp @@ -12,21 +12,13 @@ namespace lldb_dap { FunctionBreakpoint::FunctionBreakpoint(const llvm::json::Object &obj) - : BreakpointBase(obj), functionName(std::string(GetString(obj, "name"))) {} + : Breakpoint(obj), functionName(std::string(GetString(obj, "name"))) {} void FunctionBreakpoint::SetBreakpoint() { if (functionName.empty()) return; bp = g_dap.target.BreakpointCreateByName(functionName.c_str()); - // See comments in BreakpointBase::GetBreakpointLabel() for details of why - // we add a label to our breakpoints. - bp.AddName(GetBreakpointLabel()); - if (!condition.empty()) - SetCondition(); - if (!hitCondition.empty()) - SetHitCondition(); - if (!logMessage.empty()) - SetLogMessage(); + Breakpoint::SetBreakpoint(); } } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/FunctionBreakpoint.h b/lldb/tools/lldb-dap/FunctionBreakpoint.h index fc23e94e128763..b15ff1931a6b22 100644 --- a/lldb/tools/lldb-dap/FunctionBreakpoint.h +++ b/lldb/tools/lldb-dap/FunctionBreakpoint.h @@ -9,11 +9,11 @@ #ifndef LLDB_TOOLS_LLDB_DAP_FUNCTIONBREAKPOINT_H #define LLDB_TOOLS_LLDB_DAP_FUNCTIONBREAKPOINT_H -#include "BreakpointBase.h" +#include "Breakpoint.h" namespace lldb_dap { -struct FunctionBreakpoint : public BreakpointBase { +struct FunctionBreakpoint : public Breakpoint { std::string functionName; FunctionBreakpoint() = default; diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index a8b438d9d6df39..878449a91aa66a 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -364,54 +364,14 @@ llvm::json::Value CreateScope(const llvm::StringRef name, // }, // "required": [ "verified" ] // } -llvm::json::Value CreateBreakpoint(lldb::SBBreakpoint &bp, +llvm::json::Value CreateBreakpoint(BreakpointBase *bp, std::optional request_path, std::optional request_line, std::optional request_column) { - // Each breakpoint location is treated as a separate breakpoint for VS code. - // They don't have the notion of a single breakpoint with multiple locations. llvm::json::Object object; - if (!bp.IsValid()) - return llvm::json::Value(std::move(object)); - - object.try_emplace("verified", bp.GetNumResolvedLocations() > 0); - object.try_emplace("id", bp.GetID()); - // VS Code DAP doesn't currently allow one breakpoint to have multiple - // locations so we just report the first one. If we report all locations - // then the IDE starts showing the wrong line numbers and locations for - // other source file and line breakpoints in the same file. - - // Below we search for the first resolved location in a breakpoint and report - // this as the breakpoint location since it will have a complete location - // that is at least loaded in the current process. - lldb::SBBreakpointLocation bp_loc; - const auto num_locs = bp.GetNumLocations(); - for (size_t i = 0; i < num_locs; ++i) { - bp_loc = bp.GetLocationAtIndex(i); - if (bp_loc.IsResolved()) - break; - } - // If not locations are resolved, use the first location. - if (!bp_loc.IsResolved()) - bp_loc = bp.GetLocationAtIndex(0); - auto bp_addr = bp_loc.GetAddress(); - if (request_path) object.try_emplace("source", CreateSource(*request_path)); - - if (bp_addr.IsValid()) { - std::string formatted_addr = - "0x" + llvm::utohexstr(bp_addr.GetLoadAddress(g_dap.target)); - object.try_emplace("instructionReference", formatted_addr); - auto line_entry = bp_addr.GetLineEntry(); - const auto line = line_entry.GetLine(); - if (line != UINT32_MAX) - object.try_emplace("line", line); - const auto column = line_entry.GetColumn(); - if (column != 0) - object.try_emplace("column", column); - object.try_emplace("source", CreateSource(line_entry)); - } + bp->CreateJsonObject(object); // We try to add request_line as a fallback if (request_line) object.try_emplace("line", *request_line); @@ -506,7 +466,7 @@ llvm::json::Value CreateModule(lldb::SBModule &module) { return llvm::json::Value(std::move(object)); } -void AppendBreakpoint(lldb::SBBreakpoint &bp, llvm::json::Array &breakpoints, +void AppendBreakpoint(BreakpointBase *bp, llvm::json::Array &breakpoints, std::optional request_path, std::optional request_line) { breakpoints.emplace_back(CreateBreakpoint(bp, request_path, request_line)); diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h index 62338548890c0c..1515f5ba2e5f4d 100644 --- a/lldb/tools/lldb-dap/JSONUtils.h +++ b/lldb/tools/lldb-dap/JSONUtils.h @@ -9,6 +9,7 @@ #ifndef LLDB_TOOLS_LLDB_DAP_JSONUTILS_H #define LLDB_TOOLS_LLDB_DAP_JSONUTILS_H +#include "BreakpointBase.h" #include "DAPForward.h" #include "lldb/API/SBModule.h" #include "llvm/ADT/StringRef.h" @@ -191,7 +192,7 @@ void FillResponse(const llvm::json::Object &request, /// provided by the setBreakpoints request are returned to the IDE as a /// fallback. void AppendBreakpoint( - lldb::SBBreakpoint &bp, llvm::json::Array &breakpoints, + BreakpointBase *bp, llvm::json::Array &breakpoints, std::optional request_path = std::nullopt, std::optional request_line = std::nullopt); @@ -223,7 +224,7 @@ void AppendBreakpoint( /// A "Breakpoint" JSON object with that follows the formal JSON /// definition outlined by Microsoft. llvm::json::Value -CreateBreakpoint(lldb::SBBreakpoint &bp, +CreateBreakpoint(BreakpointBase *bp, std::optional request_path = std::nullopt, std::optional request_line = std::nullopt, std::optional request_column = std::nullopt); diff --git a/lldb/tools/lldb-dap/SourceBreakpoint.cpp b/lldb/tools/lldb-dap/SourceBreakpoint.cpp index 3bd83c0a6874de..f5dd1346cb9e54 100644 --- a/lldb/tools/lldb-dap/SourceBreakpoint.cpp +++ b/lldb/tools/lldb-dap/SourceBreakpoint.cpp @@ -12,22 +12,308 @@ namespace lldb_dap { SourceBreakpoint::SourceBreakpoint(const llvm::json::Object &obj) - : BreakpointBase(obj), line(GetUnsigned(obj, "line", 0)), - column(GetUnsigned(obj, "column", 0)) {} + : Breakpoint(obj), logMessage(std::string(GetString(obj, "logMessage"))), + line(GetUnsigned(obj, "line", 0)), column(GetUnsigned(obj, "column", 0)) { +} void SourceBreakpoint::SetBreakpoint(const llvm::StringRef source_path) { lldb::SBFileSpecList module_list; bp = g_dap.target.BreakpointCreateByLocation(source_path.str().c_str(), line, column, 0, module_list); - // See comments in BreakpointBase::GetBreakpointLabel() for details of why - // we add a label to our breakpoints. - bp.AddName(GetBreakpointLabel()); - if (!condition.empty()) - SetCondition(); - if (!hitCondition.empty()) - SetHitCondition(); if (!logMessage.empty()) SetLogMessage(); + Breakpoint::SetBreakpoint(); +} + +void SourceBreakpoint::UpdateBreakpoint(const SourceBreakpoint &request_bp) { + if (logMessage != request_bp.logMessage) { + logMessage = request_bp.logMessage; + SetLogMessage(); + } + BreakpointBase::UpdateBreakpoint(request_bp); +} + +lldb::SBError SourceBreakpoint::AppendLogMessagePart(llvm::StringRef part, + bool is_expr) { + if (is_expr) { + logMessageParts.emplace_back(part, is_expr); + } else { + std::string formatted; + lldb::SBError error = FormatLogText(part, formatted); + if (error.Fail()) + return error; + logMessageParts.emplace_back(formatted, is_expr); + } + return lldb::SBError(); +} + +// TODO: consolidate this code with the implementation in +// FormatEntity::ParseInternal(). +lldb::SBError SourceBreakpoint::FormatLogText(llvm::StringRef text, + std::string &formatted) { + lldb::SBError error; + while (!text.empty()) { + size_t backslash_pos = text.find_first_of('\\'); + if (backslash_pos == std::string::npos) { + formatted += text.str(); + return error; + } + + formatted += text.substr(0, backslash_pos).str(); + // Skip the characters before and including '\'. + text = text.drop_front(backslash_pos + 1); + + if (text.empty()) { + error.SetErrorString( + "'\\' character was not followed by another character"); + return error; + } + + const char desens_char = text[0]; + text = text.drop_front(); // Skip the desensitized char character + switch (desens_char) { + case 'a': + formatted.push_back('\a'); + break; + case 'b': + formatted.push_back('\b'); + break; + case 'f': + formatted.push_back('\f'); + break; + case 'n': + formatted.push_back('\n'); + break; + case 'r': + formatted.push_back('\r'); + break; + case 't': + formatted.push_back('\t'); + break; + case 'v': + formatted.push_back('\v'); + break; + case '\'': + formatted.push_back('\''); + break; + case '\\': + formatted.push_back('\\'); + break; + case '0': + // 1 to 3 octal chars + { + if (text.empty()) { + error.SetErrorString("missing octal number following '\\0'"); + return error; + } + + // Make a string that can hold onto the initial zero char, up to 3 + // octal digits, and a terminating NULL. + char oct_str[5] = {0, 0, 0, 0, 0}; + + size_t i; + for (i = 0; + i < text.size() && i < 4 && (text[i] >= '0' && text[i] <= '7'); + ++i) { + oct_str[i] = text[i]; + } + + text = text.drop_front(i); + unsigned long octal_value = ::strtoul(oct_str, nullptr, 8); + if (octal_value <= UINT8_MAX) { + formatted.push_back((char)octal_value); + } else { + error.SetErrorString("octal number is larger than a single byte"); + return error; + } + } + break; + + case 'x': { + if (text.empty()) { + error.SetErrorString("missing hex number following '\\x'"); + return error; + } + // hex number in the text + if (isxdigit(text[0])) { + // Make a string that can hold onto two hex chars plus a + // NULL terminator + char hex_str[3] = {0, 0, 0}; + hex_str[0] = text[0]; + + text = text.drop_front(); + + if (!text.empty() && isxdigit(text[0])) { + hex_str[1] = text[0]; + text = text.drop_front(); + } + + unsigned long hex_value = strtoul(hex_str, nullptr, 16); + if (hex_value <= UINT8_MAX) { + formatted.push_back((char)hex_value); + } else { + error.SetErrorString("hex number is larger than a single byte"); + return error; + } + } else { + formatted.push_back(desens_char); + } + break; + } + + default: + // Just desensitize any other character by just printing what came + // after the '\' + formatted.push_back(desens_char); + break; + } + } + return error; +} + +// logMessage will be divided into array of LogMessagePart as two kinds: +// 1. raw print text message, and +// 2. interpolated expression for evaluation which is inside matching curly +// braces. +// +// The function tries to parse logMessage into a list of LogMessageParts +// for easy later access in BreakpointHitCallback. +void SourceBreakpoint::SetLogMessage() { + logMessageParts.clear(); + + // Contains unmatched open curly braces indices. + std::vector unmatched_curly_braces; + + // Contains all matched curly braces in logMessage. + // Loop invariant: matched_curly_braces_ranges are sorted by start index in + // ascending order without any overlap between them. + std::vector> matched_curly_braces_ranges; + + lldb::SBError error; + // Part1 - parse matched_curly_braces_ranges. + // locating all curly braced expression ranges in logMessage. + // The algorithm takes care of nested and imbalanced curly braces. + for (size_t i = 0; i < logMessage.size(); ++i) { + if (logMessage[i] == '{') { + unmatched_curly_braces.push_back(i); + } else if (logMessage[i] == '}') { + if (unmatched_curly_braces.empty()) + // Nothing to match. + continue; + + int last_unmatched_index = unmatched_curly_braces.back(); + unmatched_curly_braces.pop_back(); + + // Erase any matched ranges included in the new match. + while (!matched_curly_braces_ranges.empty()) { + assert(matched_curly_braces_ranges.back().first != + last_unmatched_index && + "How can a curley brace be matched twice?"); + if (matched_curly_braces_ranges.back().first < last_unmatched_index) + break; + + // This is a nested range let's earse it. + assert((size_t)matched_curly_braces_ranges.back().second < i); + matched_curly_braces_ranges.pop_back(); + } + + // Assert invariant. + assert(matched_curly_braces_ranges.empty() || + matched_curly_braces_ranges.back().first < last_unmatched_index); + matched_curly_braces_ranges.emplace_back(last_unmatched_index, i); + } + } + + // Part2 - parse raw text and expresions parts. + // All expression ranges have been parsed in matched_curly_braces_ranges. + // The code below uses matched_curly_braces_ranges to divide logMessage + // into raw text parts and expression parts. + int last_raw_text_start = 0; + for (const std::pair &curly_braces_range : + matched_curly_braces_ranges) { + // Raw text before open curly brace. + assert(curly_braces_range.first >= last_raw_text_start); + size_t raw_text_len = curly_braces_range.first - last_raw_text_start; + if (raw_text_len > 0) { + error = AppendLogMessagePart( + llvm::StringRef(logMessage.c_str() + last_raw_text_start, + raw_text_len), + /*is_expr=*/false); + if (error.Fail()) { + NotifyLogMessageError(error.GetCString()); + return; + } + } + + // Expression between curly braces. + assert(curly_braces_range.second > curly_braces_range.first); + size_t expr_len = curly_braces_range.second - curly_braces_range.first - 1; + error = AppendLogMessagePart( + llvm::StringRef(logMessage.c_str() + curly_braces_range.first + 1, + expr_len), + /*is_expr=*/true); + if (error.Fail()) { + NotifyLogMessageError(error.GetCString()); + return; + } + + last_raw_text_start = curly_braces_range.second + 1; + } + // Trailing raw text after close curly brace. + assert(last_raw_text_start >= 0); + if (logMessage.size() > (size_t)last_raw_text_start) { + error = AppendLogMessagePart( + llvm::StringRef(logMessage.c_str() + last_raw_text_start, + logMessage.size() - last_raw_text_start), + /*is_expr=*/false); + if (error.Fail()) { + NotifyLogMessageError(error.GetCString()); + return; + } + } + + bp.SetCallback(BreakpointHitCallback, this); +} + +void SourceBreakpoint::NotifyLogMessageError(llvm::StringRef error) { + std::string message = "Log message has error: "; + message += error; + g_dap.SendOutput(OutputType::Console, message); +} + +/*static*/ +bool SourceBreakpoint::BreakpointHitCallback( + void *baton, lldb::SBProcess &process, lldb::SBThread &thread, + lldb::SBBreakpointLocation &location) { + if (!baton) + return true; + + SourceBreakpoint *bp = (SourceBreakpoint *)baton; + lldb::SBFrame frame = thread.GetSelectedFrame(); + + std::string output; + for (const SourceBreakpoint::LogMessagePart &messagePart : + bp->logMessageParts) { + if (messagePart.is_expr) { + // Try local frame variables first before fall back to expression + // evaluation + const std::string &expr_str = messagePart.text; + const char *expr = expr_str.c_str(); + lldb::SBValue value = + frame.GetValueForVariablePath(expr, lldb::eDynamicDontRunTarget); + if (value.GetError().Fail()) + value = frame.EvaluateExpression(expr); + output += VariableDescription(value).display_value; + } else { + output += messagePart.text; + } + } + if (!output.empty() && output.back() != '\n') + output.push_back('\n'); // Ensure log message has line break. + g_dap.SendOutput(OutputType::Console, output.c_str()); + + // Do not stop. + return false; } } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/SourceBreakpoint.h b/lldb/tools/lldb-dap/SourceBreakpoint.h index f4b54a44fc6875..aa3fbe6d0f96d2 100644 --- a/lldb/tools/lldb-dap/SourceBreakpoint.h +++ b/lldb/tools/lldb-dap/SourceBreakpoint.h @@ -9,21 +9,45 @@ #ifndef LLDB_TOOLS_LLDB_DAP_SOURCEBREAKPOINT_H #define LLDB_TOOLS_LLDB_DAP_SOURCEBREAKPOINT_H -#include "BreakpointBase.h" +#include "Breakpoint.h" #include "llvm/ADT/StringRef.h" namespace lldb_dap { -struct SourceBreakpoint : public BreakpointBase { +struct SourceBreakpoint : public Breakpoint { + // logMessage part can be either a raw text or an expression. + struct LogMessagePart { + LogMessagePart(llvm::StringRef text, bool is_expr) + : text(text), is_expr(is_expr) {} + std::string text; + bool is_expr; + }; + // If this attribute exists and is non-empty, the backend must not 'break' + // (stop) but log the message instead. Expressions within {} are + // interpolated. + std::string logMessage; + std::vector logMessageParts; uint32_t line; ///< The source line of the breakpoint or logpoint uint32_t column; ///< An optional source column of the breakpoint - SourceBreakpoint() : BreakpointBase(), line(0), column(0) {} + SourceBreakpoint() : Breakpoint(), line(0), column(0) {} SourceBreakpoint(const llvm::json::Object &obj); // Set this breakpoint in LLDB as a new breakpoint void SetBreakpoint(const llvm::StringRef source_path); + void UpdateBreakpoint(const SourceBreakpoint &request_bp); + + void SetLogMessage(); + // Format \param text and return formatted text in \param formatted. + // \return any formatting failures. + lldb::SBError FormatLogText(llvm::StringRef text, std::string &formatted); + lldb::SBError AppendLogMessagePart(llvm::StringRef part, bool is_expr); + void NotifyLogMessageError(llvm::StringRef error); + + static bool BreakpointHitCallback(void *baton, lldb::SBProcess &process, + lldb::SBThread &thread, + lldb::SBBreakpointLocation &location); }; inline bool operator<(const SourceBreakpoint &lhs, diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 01494dcc7da00f..67022347e6d624 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -525,7 +525,8 @@ void EventThreadFunction() { if (event_mask & lldb::SBTarget::eBroadcastBitBreakpointChanged) { auto event_type = lldb::SBBreakpoint::GetBreakpointEventTypeFromEvent(event); - auto bp = lldb::SBBreakpoint::GetBreakpointFromEvent(event); + auto bp = + Breakpoint(lldb::SBBreakpoint::GetBreakpointFromEvent(event)); // If the breakpoint was originated from the IDE, it will have the // BreakpointBase::GetBreakpointLabel() label attached. Regardless // of wether the locations were added or removed, the breakpoint @@ -541,7 +542,7 @@ void EventThreadFunction() { // mapped. Note that CreateBreakpoint doesn't apply source mapping. // Besides, the current implementation of VSCode ignores the // "source" element of breakpoint events. - llvm::json::Value source_bp = CreateBreakpoint(bp); + llvm::json::Value source_bp = CreateBreakpoint(&bp); source_bp.getAsObject()->erase("source"); body.try_emplace("breakpoint", source_bp); @@ -2345,7 +2346,7 @@ void request_setBreakpoints(const llvm::json::Object &request) { existing_source_bps->second.find(src_bp.line); if (existing_bp != existing_source_bps->second.end()) { existing_bp->second.UpdateBreakpoint(src_bp); - AppendBreakpoint(existing_bp->second.bp, response_breakpoints, path, + AppendBreakpoint(&existing_bp->second, response_breakpoints, path, src_bp.line); continue; } @@ -2354,7 +2355,7 @@ void request_setBreakpoints(const llvm::json::Object &request) { g_dap.source_breakpoints[path][src_bp.line] = src_bp; SourceBreakpoint &new_bp = g_dap.source_breakpoints[path][src_bp.line]; new_bp.SetBreakpoint(path.data()); - AppendBreakpoint(new_bp.bp, response_breakpoints, path, new_bp.line); + AppendBreakpoint(&new_bp, response_breakpoints, path, new_bp.line); } } } @@ -2567,7 +2568,7 @@ void request_setFunctionBreakpoints(const llvm::json::Object &request) { // handled it here and we don't need to set a new breakpoint below. request_bps.erase(request_pos); // Add this breakpoint info to the response - AppendBreakpoint(pair.second.bp, response_breakpoints); + AppendBreakpoint(&pair.second, response_breakpoints); } } // Remove any breakpoints that are no longer in our list @@ -2581,7 +2582,7 @@ void request_setFunctionBreakpoints(const llvm::json::Object &request) { g_dap.function_breakpoints[pair.first()] = std::move(pair.second); FunctionBreakpoint &new_bp = g_dap.function_breakpoints[pair.first()]; new_bp.SetBreakpoint(); - AppendBreakpoint(new_bp.bp, response_breakpoints); + AppendBreakpoint(&new_bp, response_breakpoints); } llvm::json::Object body; @@ -3582,8 +3583,8 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { FillResponse(request, response); llvm::json::Array response_breakpoints; for (uint32_t i = 0; g_dap.target.GetBreakpointAtIndex(i).IsValid(); ++i) { - auto bp = g_dap.target.GetBreakpointAtIndex(i); - AppendBreakpoint(bp, response_breakpoints); + auto bp = Breakpoint(g_dap.target.GetBreakpointAtIndex(i)); + AppendBreakpoint(&bp, response_breakpoints); } llvm::json::Object body; body.try_emplace("breakpoints", std::move(response_breakpoints)); diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn index d8292df8c0e74f..98c2068f6da291 100644 --- a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn @@ -38,6 +38,7 @@ executable("lldb-dap") { # FIXME: rpath/install_name stuff on macOS for framework on macOS sources = [ + "Breakpoint.cpp", "BreakpointBase.cpp", "DAP.cpp", "ExceptionBreakpoint.cpp", From 7a5c1a4abc750fef335c2ee5191d59ebe9e4bf18 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 13 Feb 2024 16:39:29 +0000 Subject: [PATCH 020/240] [clang][docs] Fix warning in LanguageExtensions build-llvm/tools/clang/docs/LanguageExtensions.rst:2768: WARNING: Title underline too short. --- clang/docs/LanguageExtensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index ca78a5c39cf736..ee1d25396ca865 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2765,7 +2765,7 @@ that even if present, its use may depend on run-time privilege or other OS controlled state. ``__builtin_readsteadycounter`` ------------------------------- +------------------------------- ``__builtin_readsteadycounter`` is used to access the fixed frequency counter register (or a similar steady-rate clock) on those targets that support it. From 7a471133ef56bf6059b3e35125f86420ebbf3a33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Tue, 13 Feb 2024 16:42:50 +0000 Subject: [PATCH 021/240] [mlir][nfc] Add tests for linalg.mmt4d (#81422) linalg.mmt4d was added a while back (https://reviews.llvm.org/D105244), but there are virtually no tests in-tree. In the spirit of documenting through test, this PR adds a few basic examples. --- mlir/test/Dialect/Linalg/invalid.mlir | 26 +++++++ mlir/test/Dialect/Linalg/named-ops.mlir | 11 +++ .../Linalg/transform-op-mmt4d-to-fma.mlir | 70 +++++++++++++++++++ mlir/test/Dialect/Linalg/vectorization.mlir | 25 +++++++ 4 files changed, 132 insertions(+) create mode 100644 mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 56890df3f3ee52..916c04f33e9c67 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -744,3 +744,29 @@ func.func @illegal_softmax_output_shape(%arg0: tensor<2x16x32xf32>) -> tensor<2x -> tensor<2x16xf32> return %1 : tensor<2x16xf32> } + +// ----- + +func.func @mmt4d_dims_mismatch(%A: tensor<16x16x8x1xf32>, + %B: tensor<16x16x8x1xf32>, + %C_in: tensor<16x16x8x1xf32>) -> tensor<16x16x8x1xf32> { + // expected-error @+1 {{inferred input/output operand #2 has shape's dimension #3 to be 8, but found 1}} + %res = linalg.mmt4d + ins(%A, %B: tensor<16x16x8x1xf32>, tensor<16x16x8x1xf32>) + outs(%C_in: tensor<16x16x8x1xf32>) + -> tensor<16x16x8x1xf32> + return %res : tensor<16x16x8x1xf32> +} + +// ----- + +func.func @mmt4d_rank_mismatch(%A: tensor<16x16x8x1xf32>, + %B: tensor<16x16x8x1xf32>, + %C_in: tensor<8x8xf32>) -> tensor<8x8xf32> { + // expected-error @+1 {{expected operand rank (2) to match the result rank of indexing_map #2 (4)}} + %res = linalg.mmt4d + ins(%A, %B: tensor<16x16x8x1xf32>, tensor<16x16x8x1xf32>) + outs(%C_in: tensor<8x8xf32>) + -> tensor<8x8xf32> + return %res : tensor<8x8xf32> +} diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir index 29977a71dbb864..7064e1b3f9dc76 100644 --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -1219,6 +1219,17 @@ func.func @batchmatmul_transpose_b(%arg0: memref<2x3x5xf32>, %arg1: memref<2x7x5 // ----- +// CHECK-LABEL: func @mmt4d +func.func @mmt4d(%A: tensor<10x32x8x1xf32>, %B: tensor<80x32x4x1xf32>, %C: tensor<10x80x8x4xf32>) -> tensor<10x80x8x4xf32> { + // CHECK: %{{.+}} = linalg.mmt4d + // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<10x32x8x1xf32>, tensor<80x32x4x1xf32>) + // CHECK-SAME: outs(%{{.+}} : tensor<10x80x8x4xf32>) -> tensor<10x80x8x4xf32> + %0 = linalg.mmt4d ins(%A, %B : tensor<10x32x8x1xf32>, tensor<80x32x4x1xf32>) outs(%C: tensor<10x80x8x4xf32>) -> tensor<10x80x8x4xf32> + return %0: tensor<10x80x8x4xf32> +} + +// ----- + // CHECK-LABEL: func @batch_mmt4d func.func @batch_mmt4d(%arg0: tensor<128x10x32x8x1xf32>, %arg1: tensor<128x80x32x4x1xf32>, %arg2: tensor<128x10x80x8x4xf32>) -> tensor<128x10x80x8x4xf32> { // CHECK: %{{.+}} = linalg.batch_mmt4d diff --git a/mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir b/mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir new file mode 100644 index 00000000000000..61e13d1bfa9c62 --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir @@ -0,0 +1,70 @@ +// RUN: mlir-opt %s -transform-interpreter | FileCheck %s + +func.func @mmt4d_to_fma(%A: tensor<16x16x8x1xf32>, %B: tensor<16x16x8x1xf32>, %C_in: tensor<16x16x8x8xf32>) -> tensor<16x16x8x8xf32> { + %res = linalg.mmt4d + ins(%A, %B: tensor<16x16x8x1xf32>, tensor<16x16x8x1xf32>) + outs(%C_in: tensor<16x16x8x8xf32>) + -> tensor<16x16x8x8xf32> + return %res : tensor<16x16x8x8xf32> +} + + +// CHECK-LABEL: @mmt4d_to_fma +// CHECK-COUNT-8: vector.fma + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%module: !transform.any_op {transform.readonly}) { + %func = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.op<"func.func"> + + %mmt4d = transform.structured.match ops{["linalg.mmt4d"]} in %func + + // Step 1: Tile + : (!transform.op<"func.func">) -> !transform.any_op + // Tile parallel dims + %tiled_linalg_op_p, %loops:4 = transform.structured.tile_using_for %mmt4d[1, 1, 0, 8, 8, 0] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) + // Tile reduction dims + %tiled_linalg_op_r, %loops2:2 = transform.structured.tile_using_for %tiled_linalg_op_p[0, 0, 1, 0, 0, 1] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + + // Step 2: Vectorize + transform.structured.vectorize %tiled_linalg_op_r : !transform.any_op + + // Step 3: Simplify + // vector.multi_reduction --> vector.contract + // Generates a 6-dim vector.contract with the dim matching the original MMT4D Op + // and with the following split into parallel and reduction dims: + // * parallel, parallel, reduction, parallel, parallel, reduction + transform.apply_patterns to %func { + transform.apply_patterns.vector.reduction_to_contract + // Reduce the rank of xfer ops. This transforms vector.contract to be + // more matmul-like and to enable the lowering to outer product Ops. + transform.apply_patterns.vector.transfer_permutation_patterns + } : !transform.op<"func.func"> + + // Hoisting and LICM - not strictly required + %func_h = transform.structured.hoist_redundant_vector_transfers %func + : (!transform.op<"func.func">) -> !transform.op<"func.func"> + %all_loops = transform.structured.match interface{LoopLikeInterface} in %func_h + : (!transform.op<"func.func">) -> !transform.any_op + transform.apply_licm to %all_loops : !transform.any_op + transform.loop.hoist_loop_invariant_subsets %all_loops : !transform.any_op + + // Simplify the 6-dim vector.contract into a 3-dim matmul-like + // vector.contract with the following split into parallel and reduction + // dims: + // * parallel, parallel, reduction + transform.apply_patterns to %func_h { + transform.apply_patterns.vector.reduction_to_contract + transform.apply_patterns.vector.cast_away_vector_leading_one_dim + transform.apply_patterns.canonicalization + } : !transform.op<"func.func"> + + // Step 4: Lower vector.contract to vector.fma via vector.outerproduct + transform.apply_patterns to %func_h { + transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct" + transform.apply_patterns.vector.lower_outerproduct + } : !transform.op<"func.func"> + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 5d1bef478ee987..0272ac599aa3db 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -639,6 +639,31 @@ module attributes {transform.with_named_sequence} { // ----- +func.func @mmt4d(%A: memref<16x16x8x1xf32>, %B: memref<16x16x8x1xf32>, %C_in: memref<16x16x8x8xf32>) { + linalg.mmt4d ins(%A, %B: memref<16x16x8x1xf32>, memref<16x16x8x1xf32>) + outs(%C_in: memref<16x16x8x8xf32>) + return +} + +// CHECK-LABEL: func.func @mmt4d( +// CHECK-SAME: %[[A:.*]]: memref<16x16x8x1xf32>, %[[B:.*]]: memref<16x16x8x1xf32>, %[[C:.*]]: memref<16x16x8x8xf32>) { +// CHECK: %[[VEC_A:.*]] = vector.transfer_read %[[A]]{{.*}} : memref<16x16x8x1xf32>, vector<16x16x16x8x8x1xf32> +// CHECK: %[[VEC_B:.*]] = vector.transfer_read %[[B]]{{.*}} : memref<16x16x8x1xf32>, vector<16x16x16x8x8x1xf32> +// CHECK: %[[VEC_C:.*]] = vector.transfer_read %[[C]]{{.*}} : memref<16x16x8x8xf32>, vector<16x16x8x8xf32> +// CHECK: %[[MUL:.*]] = arith.mulf %[[VEC_A]], %[[VEC_B]] : vector<16x16x16x8x8x1xf32> +// CHECK: %[[RED:.*]] = vector.multi_reduction , %[[MUL]], %[[VEC_C]] [2, 5] : vector<16x16x16x8x8x1xf32> to vector<16x16x8x8xf32> +// CHECK: vector.transfer_write %[[RED]], %[[C]]{{.*}} : vector<16x16x8x8xf32>, memref<16x16x8x8xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %mmt4d = transform.structured.match ops{["linalg.mmt4d"]} in %arg1 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %mmt4d : !transform.any_op + transform.yield + } +} + +// ----- + func.func @matmul_scalable(%A: memref, %B: memref, %C: memref) { linalg.matmul ins(%A, %B: memref, memref) outs(%C: memref) From f879ac0385d4c5f7b2b9f4807cd7bd4a78556c1c Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 13 Feb 2024 10:45:43 -0600 Subject: [PATCH 022/240] [libc] Rework the RPC interface to accept runtime wave sizes (#80914) Summary: The RPC interface needs to handle an entire warp or wavefront at once. This is currently done by using a compile time constant indicating the size of the buffer, which right now defaults to some value on the client (GPU) side. However, there are currently attempts to move the `libc` library to a single IR build. This is problematic as the size of the wave fronts changes between ISAs on AMDGPU. The builitin `__builtin_amdgcn_wavefrontsize()` will return the appropriate value, but it is only known at runtime now. In order to support this, this patch restructures the packet. Now instead of having an array of arrays, we simply have a large array of buffers and slice it according to the runtime value if we don't know it ahead of time. This also somewhat has the advantage of making the buffer contiguous within a page now that the header has been moved out of it. --- libc/src/__support/GPU/amdgpu/utils.h | 10 +- libc/src/__support/GPU/generic/utils.h | 4 +- libc/src/__support/GPU/nvptx/utils.h | 7 +- libc/src/__support/RPC/rpc.h | 152 +++++++++--------- .../test/src/__support/RPC/rpc_smoke_test.cpp | 10 +- libc/utils/gpu/server/rpc_server.cpp | 56 +++---- 6 files changed, 108 insertions(+), 131 deletions(-) diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h index 58bbe29cb3a7d7..9432b7b39f7836 100644 --- a/libc/src/__support/GPU/amdgpu/utils.h +++ b/libc/src/__support/GPU/amdgpu/utils.h @@ -17,9 +17,6 @@ namespace LIBC_NAMESPACE { namespace gpu { -/// The number of threads that execute in lock-step in a lane. -constexpr const uint64_t LANE_SIZE = __AMDGCN_WAVEFRONT_SIZE; - /// Type aliases to the address spaces used by the AMDGPU backend. template using Private = [[clang::opencl_private]] T; template using Constant = [[clang::opencl_constant]] T; @@ -108,8 +105,11 @@ LIBC_INLINE uint64_t get_thread_id() { get_num_threads_x() * get_num_threads_y() * get_thread_id_z(); } -/// Returns the size of an AMD wavefront. Either 32 or 64 depending on hardware. -LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; } +/// Returns the size of an AMD wavefront, either 32 or 64 depending on hardware +/// and compilation options. +LIBC_INLINE uint32_t get_lane_size() { + return __builtin_amdgcn_wavefrontsize(); +} /// Returns the id of the thread inside of an AMD wavefront executing together. [[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() { diff --git a/libc/src/__support/GPU/generic/utils.h b/libc/src/__support/GPU/generic/utils.h index 00b59837ccc671..58db88dce1ca8c 100644 --- a/libc/src/__support/GPU/generic/utils.h +++ b/libc/src/__support/GPU/generic/utils.h @@ -16,8 +16,6 @@ namespace LIBC_NAMESPACE { namespace gpu { -constexpr const uint64_t LANE_SIZE = 1; - template using Private = T; template using Constant = T; template using Shared = T; @@ -55,7 +53,7 @@ LIBC_INLINE uint32_t get_thread_id_z() { return 0; } LIBC_INLINE uint64_t get_thread_id() { return 0; } -LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; } +LIBC_INLINE uint32_t get_lane_size() { return 1; } LIBC_INLINE uint32_t get_lane_id() { return 0; } diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h index e7e297adf7ecca..6c4bb5a7720a50 100644 --- a/libc/src/__support/GPU/nvptx/utils.h +++ b/libc/src/__support/GPU/nvptx/utils.h @@ -16,9 +16,6 @@ namespace LIBC_NAMESPACE { namespace gpu { -/// The number of threads that execute in lock-step in a warp. -constexpr const uint64_t LANE_SIZE = 32; - /// Type aliases to the address spaces used by the NVPTX backend. template using Private = [[clang::opencl_private]] T; template using Constant = [[clang::opencl_constant]] T; @@ -95,8 +92,8 @@ LIBC_INLINE uint64_t get_thread_id() { get_num_threads_x() * get_num_threads_y() * get_thread_id_z(); } -/// Returns the size of a CUDA warp. -LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; } +/// Returns the size of a CUDA warp, always 32 on NVIDIA hardware. +LIBC_INLINE uint32_t get_lane_size() { return 32; } /// Returns the id of the thread inside of a CUDA warp executing together. [[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() { diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h index 7924d4cec2ac84..5ed39ae0d7f7a9 100644 --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -43,19 +43,6 @@ struct Header { uint16_t opcode; }; -/// The data payload for the associated packet. We provide enough space for each -/// thread in the cooperating lane to have a buffer. -template struct Payload { - Buffer slot[lane_size]; -}; - -/// A packet used to share data between the client and server across an entire -/// lane. We use a lane as the minimum granularity for execution. -template struct alignas(64) Packet { - Header header; - Payload payload; -}; - /// The maximum number of parallel ports that the RPC interface can support. constexpr uint64_t MAX_PORT_COUNT = 4096; @@ -71,7 +58,7 @@ constexpr uint64_t MAX_PORT_COUNT = 4096; /// - The client will always start with a 'send' operation. /// - The server will always start with a 'recv' operation. /// - Every 'send' or 'recv' call is mirrored by the other process. -template struct Process { +template struct Process { LIBC_INLINE Process() = default; LIBC_INLINE Process(const Process &) = delete; LIBC_INLINE Process &operator=(const Process &) = delete; @@ -82,7 +69,8 @@ template struct Process { uint32_t port_count = 0; cpp::Atomic *inbox = nullptr; cpp::Atomic *outbox = nullptr; - Packet *packet = nullptr; + Header *header = nullptr; + Buffer *packet = nullptr; static constexpr uint64_t NUM_BITS_IN_WORD = sizeof(uint32_t) * 8; cpp::Atomic lock[MAX_PORT_COUNT / NUM_BITS_IN_WORD] = {0}; @@ -92,7 +80,9 @@ template struct Process { advance(buffer, inbox_offset(port_count)))), outbox(reinterpret_cast *>( advance(buffer, outbox_offset(port_count)))), - packet(reinterpret_cast( + header(reinterpret_cast
( + advance(buffer, header_offset(port_count)))), + packet(reinterpret_cast( advance(buffer, buffer_offset(port_count)))) {} /// Allocate a memory buffer sufficient to store the following equivalent @@ -101,10 +91,12 @@ template struct Process { /// struct Equivalent { /// Atomic primary[port_count]; /// Atomic secondary[port_count]; - /// Packet buffer[port_count]; + /// Header header[port_count]; + /// Buffer packet[port_count][lane_size]; /// }; - LIBC_INLINE static constexpr uint64_t allocation_size(uint32_t port_count) { - return buffer_offset(port_count) + buffer_bytes(port_count); + LIBC_INLINE static constexpr uint64_t allocation_size(uint32_t port_count, + uint32_t lane_size) { + return buffer_offset(port_count) + buffer_bytes(port_count, lane_size); } /// Retrieve the inbox state from memory shared between processes. @@ -144,6 +136,13 @@ template struct Process { atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); } + /// The packet is a linearly allocated array of buffers used to communicate + /// with the other process. This function returns the appropriate slot in this + /// array such that the process can operate on an entire warp or wavefront. + LIBC_INLINE Buffer *get_packet(uint32_t index, uint32_t lane_size) { + return &packet[index * lane_size]; + } + /// Determines if this process needs to wait for ownership of the buffer. We /// invert the condition on one of the processes to indicate that if one /// process owns the buffer then the other does not. @@ -219,8 +218,9 @@ template struct Process { } /// Number of bytes to allocate for the buffer containing the packets. - LIBC_INLINE static constexpr uint64_t buffer_bytes(uint32_t port_count) { - return port_count * sizeof(Packet); + LIBC_INLINE static constexpr uint64_t buffer_bytes(uint32_t port_count, + uint32_t lane_size) { + return port_count * lane_size * sizeof(Buffer); } /// Offset of the inbox in memory. This is the same as the outbox if inverted. @@ -233,9 +233,15 @@ template struct Process { return Invert ? 0 : mailbox_bytes(port_count); } + /// Offset of the buffer containing the packets after the inbox and outbox. + LIBC_INLINE static constexpr uint64_t header_offset(uint32_t port_count) { + return align_up(2 * mailbox_bytes(port_count), alignof(Header)); + } + /// Offset of the buffer containing the packets after the inbox and outbox. LIBC_INLINE static constexpr uint64_t buffer_offset(uint32_t port_count) { - return align_up(2 * mailbox_bytes(port_count), alignof(Packet)); + return align_up(header_offset(port_count) + port_count * sizeof(Header), + alignof(Buffer)); } /// Conditionally set the n-th bit in the atomic bitfield. @@ -262,39 +268,39 @@ template struct Process { }; /// Invokes a function accross every active buffer across the total lane size. -template static LIBC_INLINE void invoke_rpc(cpp::function fn, - Packet &packet) { + uint32_t lane_size, uint64_t lane_mask, + Buffer *slot) { if constexpr (is_process_gpu()) { - fn(&packet.payload.slot[gpu::get_lane_id()]); + fn(&slot[gpu::get_lane_id()]); } else { for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size()) - if (packet.header.mask & 1ul << i) - fn(&packet.payload.slot[i]); + if (lane_mask & (1ul << i)) + fn(&slot[i]); } } /// Alternate version that also provides the index of the current lane. -template static LIBC_INLINE void invoke_rpc(cpp::function fn, - Packet &packet) { + uint32_t lane_size, uint64_t lane_mask, + Buffer *slot) { if constexpr (is_process_gpu()) { - fn(&packet.payload.slot[gpu::get_lane_id()], gpu::get_lane_id()); + fn(&slot[gpu::get_lane_id()], gpu::get_lane_id()); } else { for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size()) - if (packet.header.mask & 1ul << i) - fn(&packet.payload.slot[i], i); + if (lane_mask & (1ul << i)) + fn(&slot[i], i); } } /// The port provides the interface to communicate between the multiple /// processes. A port is conceptually an index into the memory provided by the /// underlying process that is guarded by a lock bit. -template struct Port { - LIBC_INLINE Port(Process &process, uint64_t lane_mask, uint32_t index, - uint32_t out) - : process(process), lane_mask(lane_mask), index(index), out(out), - receive(false), owns_buffer(true) {} +template struct Port { + LIBC_INLINE Port(Process &process, uint64_t lane_mask, uint32_t lane_size, + uint32_t index, uint32_t out) + : process(process), lane_mask(lane_mask), lane_size(lane_size), + index(index), out(out), receive(false), owns_buffer(true) {} LIBC_INLINE ~Port() = default; private: @@ -305,7 +311,7 @@ template struct Port { friend struct Client; template friend struct Server; - friend class cpp::optional>; + friend class cpp::optional>; public: template LIBC_INLINE void recv(U use); @@ -319,7 +325,7 @@ template struct Port { LIBC_INLINE void recv_n(void **dst, uint64_t *size, A &&alloc); LIBC_INLINE uint16_t get_opcode() const { - return process.packet[index].header.opcode; + return process.header[index].opcode; } LIBC_INLINE uint16_t get_index() const { return index; } @@ -333,8 +339,9 @@ template struct Port { } private: - Process &process; + Process &process; uint64_t lane_mask; + uint32_t lane_size; uint32_t index; uint32_t out; bool receive; @@ -351,15 +358,14 @@ struct Client { LIBC_INLINE Client(uint32_t port_count, void *buffer) : process(port_count, buffer) {} - using Port = rpc::Port>; + using Port = rpc::Port; template LIBC_INLINE Port open(); private: - Process> process; + Process process; }; static_assert(cpp::is_trivially_copyable::value && - sizeof(Process>) == - sizeof(Process>), + sizeof(Process) == sizeof(Process), "The client is not trivially copyable from the server"); /// The RPC server used to respond to the client. @@ -372,38 +378,35 @@ template struct Server { LIBC_INLINE Server(uint32_t port_count, void *buffer) : process(port_count, buffer) {} - using Port = rpc::Port>; + using Port = rpc::Port; LIBC_INLINE cpp::optional try_open(uint32_t start = 0); LIBC_INLINE Port open(); LIBC_INLINE static uint64_t allocation_size(uint32_t port_count) { - return Process>::allocation_size(port_count); + return Process::allocation_size(port_count, lane_size); } private: - Process> process; + Process process; }; /// Applies \p fill to the shared buffer and initiates a send operation. -template -template -LIBC_INLINE void Port::send(F fill) { +template template LIBC_INLINE void Port::send(F fill) { uint32_t in = owns_buffer ? out ^ T : process.load_inbox(lane_mask, index); // We need to wait until we own the buffer before sending. process.wait_for_ownership(lane_mask, index, out, in); // Apply the \p fill function to initialize the buffer and release the memory. - invoke_rpc(fill, process.packet[index]); + invoke_rpc(fill, lane_size, process.header[index].mask, + process.get_packet(index, lane_size)); out = process.invert_outbox(index, out); owns_buffer = false; receive = false; } /// Applies \p use to the shared buffer and acknowledges the send. -template -template -LIBC_INLINE void Port::recv(U use) { +template template LIBC_INLINE void Port::recv(U use) { // We only exchange ownership of the buffer during a receive if we are waiting // for a previous receive to finish. if (receive) { @@ -417,15 +420,16 @@ LIBC_INLINE void Port::recv(U use) { process.wait_for_ownership(lane_mask, index, out, in); // Apply the \p use function to read the memory out of the buffer. - invoke_rpc(use, process.packet[index]); + invoke_rpc(use, lane_size, process.header[index].mask, + process.get_packet(index, lane_size)); receive = true; owns_buffer = true; } /// Combines a send and receive into a single function. -template +template template -LIBC_INLINE void Port::send_and_recv(F fill, U use) { +LIBC_INLINE void Port::send_and_recv(F fill, U use) { send(fill); recv(use); } @@ -433,17 +437,17 @@ LIBC_INLINE void Port::send_and_recv(F fill, U use) { /// Combines a receive and send operation into a single function. The \p work /// function modifies the buffer in-place and the send is only used to initiate /// the copy back. -template +template template -LIBC_INLINE void Port::recv_and_send(W work) { +LIBC_INLINE void Port::recv_and_send(W work) { recv(work); send([](Buffer *) { /* no-op */ }); } /// Helper routine to simplify the interface when sending from the GPU using /// thread private pointers to the underlying value. -template -LIBC_INLINE void Port::send_n(const void *src, uint64_t size) { +template +LIBC_INLINE void Port::send_n(const void *src, uint64_t size) { const void **src_ptr = &src; uint64_t *size_ptr = &size; send_n(src_ptr, size_ptr); @@ -451,8 +455,8 @@ LIBC_INLINE void Port::send_n(const void *src, uint64_t size) { /// Sends an arbitrarily sized data buffer \p src across the shared channel in /// multiples of the packet length. -template -LIBC_INLINE void Port::send_n(const void *const *src, uint64_t *size) { +template +LIBC_INLINE void Port::send_n(const void *const *src, uint64_t *size) { uint64_t num_sends = 0; send([&](Buffer *buffer, uint32_t id) { reinterpret_cast(buffer->data)[0] = lane_value(size, id); @@ -465,7 +469,7 @@ LIBC_INLINE void Port::send_n(const void *const *src, uint64_t *size) { rpc_memcpy(&buffer->data[1], lane_value(src, id), len); }); uint64_t idx = sizeof(Buffer::data) - sizeof(uint64_t); - uint64_t mask = process.packet[index].header.mask; + uint64_t mask = process.header[index].mask; while (gpu::ballot(mask, idx < num_sends)) { send([=](Buffer *buffer, uint32_t id) { uint64_t len = lane_value(size, id) - idx > sizeof(Buffer::data) @@ -481,9 +485,9 @@ LIBC_INLINE void Port::send_n(const void *const *src, uint64_t *size) { /// Receives an arbitrarily sized data buffer across the shared channel in /// multiples of the packet length. The \p alloc function is called with the /// size of the data so that we can initialize the size of the \p dst buffer. -template +template template -LIBC_INLINE void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { +LIBC_INLINE void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { uint64_t num_recvs = 0; recv([&](Buffer *buffer, uint32_t id) { lane_value(size, id) = reinterpret_cast(buffer->data)[0]; @@ -498,7 +502,7 @@ LIBC_INLINE void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { rpc_memcpy(lane_value(dst, id), &buffer->data[1], len); }); uint64_t idx = sizeof(Buffer::data) - sizeof(uint64_t); - uint64_t mask = process.packet[index].header.mask; + uint64_t mask = process.header[index].mask; while (gpu::ballot(mask, idx < num_recvs)) { recv([=](Buffer *buffer, uint32_t id) { uint64_t len = lane_value(size, id) - idx > sizeof(Buffer::data) @@ -515,8 +519,10 @@ LIBC_INLINE void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { /// only open a port if we find an index that is in a valid sending state. That /// is, there are send operations pending that haven't been serviced on this /// port. Each port instance uses an associated \p opcode to tell the server -/// what to do. -template LIBC_INLINE Client::Port Client::open() { +/// what to do. The Client interface provides the appropriate lane size to the +/// port using the platform's returned value. +template +[[clang::convergent]] LIBC_INLINE Client::Port Client::open() { // Repeatedly perform a naive linear scan for a port that can be opened to // send data. for (uint32_t index = gpu::get_cluster_id();; ++index) { @@ -540,11 +546,11 @@ template LIBC_INLINE Client::Port Client::open() { } if (gpu::is_first_lane(lane_mask)) { - process.packet[index].header.opcode = opcode; - process.packet[index].header.mask = lane_mask; + process.header[index].opcode = opcode; + process.header[index].mask = lane_mask; } gpu::sync_lane(lane_mask); - return Port(process, lane_mask, index, out); + return Port(process, lane_mask, gpu::get_lane_size(), index, out); } } @@ -577,7 +583,7 @@ template continue; } - return Port(process, lane_mask, index, out); + return Port(process, lane_mask, lane_size, index, out); } return cpp::nullopt; } diff --git a/libc/test/src/__support/RPC/rpc_smoke_test.cpp b/libc/test/src/__support/RPC/rpc_smoke_test.cpp index 54821e21f9ccf7..58b318c7cfa61b 100644 --- a/libc/test/src/__support/RPC/rpc_smoke_test.cpp +++ b/libc/test/src/__support/RPC/rpc_smoke_test.cpp @@ -13,12 +13,8 @@ namespace { enum { lane_size = 8, port_count = 4 }; -struct Packet { - uint64_t unused; -}; - -using ProcAType = LIBC_NAMESPACE::rpc::Process; -using ProcBType = LIBC_NAMESPACE::rpc::Process; +using ProcAType = LIBC_NAMESPACE::rpc::Process; +using ProcBType = LIBC_NAMESPACE::rpc::Process; static_assert(ProcAType::inbox_offset(port_count) == ProcBType::outbox_offset(port_count)); @@ -26,7 +22,7 @@ static_assert(ProcAType::inbox_offset(port_count) == static_assert(ProcAType::outbox_offset(port_count) == ProcBType::inbox_offset(port_count)); -enum { alloc_size = ProcAType::allocation_size(port_count) }; +enum { alloc_size = ProcAType::allocation_size(port_count, 1) }; alignas(64) char buffer[alloc_size] = {0}; } // namespace diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index a2e5d0fd5a833f..4e535a294a19e4 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -396,62 +396,42 @@ const void *rpc_get_client_buffer(uint32_t device_id) { uint64_t rpc_get_client_size() { return sizeof(rpc::Client); } -using ServerPort = std::variant::Port *, rpc::Server<32>::Port *, - rpc::Server<64>::Port *>; +using ServerPort = std::variant::Port *>; ServerPort get_port(rpc_port_t ref) { - if (ref.lane_size == 1) - return reinterpret_cast::Port *>(ref.handle); - else if (ref.lane_size == 32) - return reinterpret_cast::Port *>(ref.handle); - else if (ref.lane_size == 64) - return reinterpret_cast::Port *>(ref.handle); - else - __builtin_unreachable(); + return reinterpret_cast::Port *>(ref.handle); } void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = get_port(ref); - std::visit( - [=](auto &port) { - port->send([=](rpc::Buffer *buffer) { - callback(reinterpret_cast(buffer), data); - }); - }, - port); + auto port = reinterpret_cast::Port *>(ref.handle); + port->send([=](rpc::Buffer *buffer) { + callback(reinterpret_cast(buffer), data); + }); } void rpc_send_n(rpc_port_t ref, const void *const *src, uint64_t *size) { - auto port = get_port(ref); - std::visit([=](auto &port) { port->send_n(src, size); }, port); + auto port = reinterpret_cast::Port *>(ref.handle); + port->send_n(src, size); } void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = get_port(ref); - std::visit( - [=](auto &port) { - port->recv([=](rpc::Buffer *buffer) { - callback(reinterpret_cast(buffer), data); - }); - }, - port); + auto port = reinterpret_cast::Port *>(ref.handle); + port->recv([=](rpc::Buffer *buffer) { + callback(reinterpret_cast(buffer), data); + }); } void rpc_recv_n(rpc_port_t ref, void **dst, uint64_t *size, rpc_alloc_ty alloc, void *data) { - auto port = get_port(ref); + auto port = reinterpret_cast::Port *>(ref.handle); auto alloc_fn = [=](uint64_t size) { return alloc(size, data); }; - std::visit([=](auto &port) { port->recv_n(dst, size, alloc_fn); }, port); + port->recv_n(dst, size, alloc_fn); } void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = get_port(ref); - std::visit( - [=](auto &port) { - port->recv_and_send([=](rpc::Buffer *buffer) { - callback(reinterpret_cast(buffer), data); - }); - }, - port); + auto port = reinterpret_cast::Port *>(ref.handle); + port->recv_and_send([=](rpc::Buffer *buffer) { + callback(reinterpret_cast(buffer), data); + }); } From d79c3c50c45f2bd0acc0269dbedde9ddeed2d50e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 13 Feb 2024 08:52:13 -0800 Subject: [PATCH 023/240] [flang][cuda] Lower launch_bounds values (#81537) This PR adds a new attribute to carry over the information from `launch_bounds`. The new attribute `CUDALaunchBoundsAttr` holds 2 to 3 integer attrinbutes and is added to `func.func` operation. --- .../flang/Optimizer/Dialect/FIRAttr.td | 12 +++++ .../flang/Optimizer/Dialect/FIROpsSupport.h | 5 +++ flang/lib/Lower/CallInterface.cpp | 45 ++++++++++++++++--- flang/lib/Optimizer/Dialect/FIRAttr.cpp | 3 +- flang/test/Lower/CUDA/cuda-proc-attribute.cuf | 6 +++ 5 files changed, 64 insertions(+), 7 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index 00e293e2f04278..3602c67de1412a 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -113,4 +113,16 @@ def fir_CUDAProcAttributeAttr : let assemblyFormat = [{ ```<` $value `>` }]; } +def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> { + let mnemonic = "launch_bounds"; + + let parameters = (ins + "mlir::IntegerAttr":$maxTPB, + "mlir::IntegerAttr":$minBPM, + OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize + ); + + let assemblyFormat = "`<` struct(params) `>`"; +} + #endif // FIR_DIALECT_FIR_ATTRS diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h index 6ac6a3116d40b0..29fa57cd7a0d8a 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h +++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h @@ -75,6 +75,11 @@ static constexpr llvm::StringRef getTargetAttrName() { return "fir.target"; } /// Attribute to mark Fortran entities with the CUDA attribute. static constexpr llvm::StringRef getCUDAAttrName() { return "fir.cuda_attr"; } +/// Attribute to carry CUDA launch_bounds values. +static constexpr llvm::StringRef getCUDALaunchBoundsAttrName() { + return "fir.cuda_launch_bounds"; +} + /// Attribute to mark that a function argument is a character dummy procedure. /// Character dummy procedure have special ABI constraints. static constexpr llvm::StringRef getCharacterProcedureDummyAttrName() { diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index 41597c1b15386e..f990e0b7ce4dcf 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -524,6 +524,43 @@ static void addSymbolAttribute(mlir::func::FuncOp func, mlir::StringAttr::get(&mlirContext, name)); } +static void +setCUDAAttributes(mlir::func::FuncOp func, + const Fortran::semantics::Symbol *sym, + std::optional + characteristic) { + if (characteristic && characteristic->cudaSubprogramAttrs) { + func.getOperation()->setAttr( + fir::getCUDAAttrName(), + fir::getCUDAProcAttribute(func.getContext(), + *characteristic->cudaSubprogramAttrs)); + } + + if (sym) { + if (auto details = + sym->GetUltimate() + .detailsIf()) { + if (!details->cudaLaunchBounds().empty()) { + assert(details->cudaLaunchBounds().size() >= 2 && + "expect at least 2 values"); + mlir::Type i64Ty = mlir::IntegerType::get(func.getContext(), 64); + auto maxTPBAttr = + mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[0]); + auto minBPMAttr = + mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[1]); + mlir::IntegerAttr ubAttr; + if (details->cudaLaunchBounds().size() > 2) + ubAttr = + mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[2]); + func.getOperation()->setAttr( + fir::getCUDALaunchBoundsAttrName(), + fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTPBAttr, + minBPMAttr, ubAttr)); + } + } + } +} + /// Declare drives the different actions to be performed while analyzing the /// signature and building/finding the mlir::func::FuncOp. template @@ -559,12 +596,8 @@ void Fortran::lower::CallInterface::declare() { if (!placeHolder.value().attributes.empty()) func.setArgAttrs(placeHolder.index(), placeHolder.value().attributes); side().setFuncAttrs(func); - } - if (characteristic && characteristic->cudaSubprogramAttrs) { - func.getOperation()->setAttr( - fir::getCUDAAttrName(), - fir::getCUDAProcAttribute(func.getContext(), - *characteristic->cudaSubprogramAttrs)); + + setCUDAAttributes(func, side().getProcedureSymbol(), characteristic); } } } diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp index 8df7a6c5cfc5d5..8d780e03dcbe73 100644 --- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp +++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp @@ -298,5 +298,6 @@ void fir::printFirAttribute(FIROpsDialect *dialect, mlir::Attribute attr, void FIROpsDialect::registerAttributes() { addAttributes(); + UpperBoundAttr, CUDADataAttributeAttr, CUDAProcAttributeAttr, + CUDALaunchBoundsAttr>(); } diff --git a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf index 050731086d8525..9eb2b85aaf0b83 100644 --- a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf +++ b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf @@ -32,3 +32,9 @@ attributes(host) attributes(device) integer function fct_host_device; end attributes(device) attributes(host) integer function fct_device_host; end ! CHECK: func.func @_QPfct_device_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc} + +attributes(global) launch_bounds(1, 2) subroutine sub_lbounds1(); end +! CHECK: func.func @_QPsub_lbounds1() attributes {fir.cuda_attr = #fir.cuda_proc, fir.cuda_launch_bounds = #fir.launch_bounds} + +attributes(global) launch_bounds(1, 2, 3) subroutine sub_lbounds2(); end +! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc, fir.cuda_launch_bounds = #fir.launch_bounds} From 1dacfd119071af50eaef21a97a46076ee6ff20fd Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 13 Feb 2024 10:55:28 -0600 Subject: [PATCH 024/240] [libc] Round up time for GPU nanosleep implementation (#81630) Summary: The GPU `nanosleep` tests would occasionally fail. This was due to the fact that we used integer division to determine how many ticks we had to sleep for. This would then truncate, leaving us with a value just slightly below the requested value. This would then occasionally leave us with a return value of `-1`. This patch just changes the code to round up by 1 so we always sleep for at least the requested value. --- libc/src/time/gpu/nanosleep.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/libc/src/time/gpu/nanosleep.cpp b/libc/src/time/gpu/nanosleep.cpp index e84fe622100e80..34ff904c49c65b 100644 --- a/libc/src/time/gpu/nanosleep.cpp +++ b/libc/src/time/gpu/nanosleep.cpp @@ -12,18 +12,19 @@ namespace LIBC_NAMESPACE { -constexpr uint64_t TICKS_PER_NS = 1000000000UL; +constexpr uint64_t TICKS_PER_SEC = 1000000000UL; LLVM_LIBC_FUNCTION(int, nanosleep, (const struct timespec *req, struct timespec *rem)) { if (!GPU_CLOCKS_PER_SEC || !req) return -1; - uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_NS; + uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_SEC; + uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC; uint64_t start = gpu::fixed_frequency_clock(); #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700 - uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC); + uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate; uint64_t cur = gpu::fixed_frequency_clock(); // The NVPTX architecture supports sleeping and guaruntees the actual time // slept will be somewhere between zero and twice the requested amount. Here @@ -34,7 +35,7 @@ LLVM_LIBC_FUNCTION(int, nanosleep, nsecs -= nsecs > cur - start ? cur - start : 0; } #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) - uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC); + uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate; uint64_t cur = gpu::fixed_frequency_clock(); // The AMDGPU architecture does not provide a sleep implementation with a // known delay so we simply repeatedly sleep with a large value of ~960 clock @@ -56,11 +57,11 @@ LLVM_LIBC_FUNCTION(int, nanosleep, // Check to make sure we slept for at least the desired duration and set the // remaining time if not. - uint64_t elapsed = (stop - start) * (TICKS_PER_NS / GPU_CLOCKS_PER_SEC); + uint64_t elapsed = (stop - start) * tick_rate; if (elapsed < nsecs) { if (rem) { - rem->tv_sec = (nsecs - elapsed) / TICKS_PER_NS; - rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_NS; + rem->tv_sec = (nsecs - elapsed) / TICKS_PER_SEC; + rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_SEC; } return -1; } From e847abc5b47210de63455f67e58225121617873b Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 13 Feb 2024 16:47:45 +0000 Subject: [PATCH 025/240] [TableGen] Remove trivial helper function hasRegUnit. NFC. --- llvm/utils/TableGen/CodeGenRegisters.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index 0b671271bf0c9c..e29bc50118f0e6 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -253,11 +253,6 @@ CodeGenRegister::RegUnitList RegUnitIterator::Sentinel; } // end anonymous namespace -// Return true of this unit appears in RegUnits. -static bool hasRegUnit(CodeGenRegister::RegUnitList &RegUnits, unsigned Unit) { - return RegUnits.test(Unit); -} - // Inherit register units from subregisters. // Return true if the RegUnits changed. bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) { @@ -1842,9 +1837,8 @@ static bool normalizeWeight(CodeGenRegister *Reg, // for this register, has not been used to normalize a subregister's set, // and has not already been used to singularly determine this UberRegSet. unsigned AdjustUnit = *Reg->getRegUnits().begin(); - if (Reg->getRegUnits().count() != 1 || - hasRegUnit(NormalUnits, AdjustUnit) || - hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) { + if (Reg->getRegUnits().count() != 1 || NormalUnits.test(AdjustUnit) || + UberSet->SingularDeterminants.test(AdjustUnit)) { // We don't have an adjustable unit, so adopt a new one. AdjustUnit = RegBank.newRegUnit(UberSet->Weight - RegWeight); Reg->adoptRegUnit(AdjustUnit); From a7cebadc10948ca1b9df1a740370f8ef7cef7e77 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 13 Feb 2024 17:16:25 +0000 Subject: [PATCH 026/240] [TableGen] Trivial simplification in computeRegUnitSets. NFC. --- llvm/utils/TableGen/CodeGenRegisters.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index e29bc50118f0e6..7d266c8896d8e3 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -2107,10 +2107,8 @@ void CodeGenRegBank::computeRegUnitSets() { ++UnitIdx) { std::vector RUSets; for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) { - RegUnitSet &RUSet = RegUnitSets[i]; - if (!is_contained(RUSet.Units, UnitIdx)) - continue; - RUSets.push_back(i); + if (is_contained(RegUnitSets[i].Units, UnitIdx)) + RUSets.push_back(i); } unsigned RCUnitSetsIdx = 0; for (unsigned e = RegClassUnitSets.size(); RCUnitSetsIdx != e; From 9be7b0a539f673081bf8d1d5a5b08135190fd46d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 13 Feb 2024 09:46:50 -0800 Subject: [PATCH 027/240] [IRGen][AArch64][RISCV] Generalize bitcast between i1 predicate vector and i8 fixed vector. (#76548) Instead of only handling vscale x 16 x i1 predicate vectors, handle any scalable i1 vector where the known minimum is divisible by 8. This is used on RISC-V where we have multiple sizes of predicate types. --- clang/lib/CodeGen/CGCall.cpp | 50 +++++++++--------- clang/lib/CodeGen/CGExprScalar.cpp | 51 +++++++++---------- .../attr-riscv-rvv-vector-bits-bitcast.c | 36 ++++++------- .../CodeGen/attr-riscv-rvv-vector-bits-call.c | 26 ++-------- .../CodeGen/attr-riscv-rvv-vector-bits-cast.c | 14 ++--- .../attr-riscv-rvv-vector-bits-codegen.c | 21 ++++---- .../attr-riscv-rvv-vector-bits-globals.c | 26 ++++------ 7 files changed, 94 insertions(+), 130 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index cd26a3df78602c..d05cf1c6e1814e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1301,27 +1301,25 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // If coercing a fixed vector to a scalable vector for ABI compatibility, and // the types match, use the llvm.vector.insert intrinsic to perform the // conversion. - if (auto *ScalableDst = dyn_cast(Ty)) { - if (auto *FixedSrc = dyn_cast(SrcTy)) { - // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate + if (auto *ScalableDstTy = dyn_cast(Ty)) { + if (auto *FixedSrcTy = dyn_cast(SrcTy)) { + // If we are casting a fixed i8 vector to a scalable i1 predicate // vector, use a vector insert and bitcast the result. - bool NeedsBitcast = false; - auto PredType = - llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16); - llvm::Type *OrigType = Ty; - if (ScalableDst == PredType && - FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) { - ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 2); - NeedsBitcast = true; + if (ScalableDstTy->getElementType()->isIntegerTy(1) && + ScalableDstTy->getElementCount().isKnownMultipleOf(8) && + FixedSrcTy->getElementType()->isIntegerTy(8)) { + ScalableDstTy = llvm::ScalableVectorType::get( + FixedSrcTy->getElementType(), + ScalableDstTy->getElementCount().getKnownMinValue() / 8); } - if (ScalableDst->getElementType() == FixedSrc->getElementType()) { + if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) { auto *Load = CGF.Builder.CreateLoad(Src); - auto *UndefVec = llvm::UndefValue::get(ScalableDst); + auto *UndefVec = llvm::UndefValue::get(ScalableDstTy); auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); llvm::Value *Result = CGF.Builder.CreateInsertVector( - ScalableDst, UndefVec, Load, Zero, "cast.scalable"); - if (NeedsBitcast) - Result = CGF.Builder.CreateBitCast(Result, OrigType); + ScalableDstTy, UndefVec, Load, Zero, "cast.scalable"); + if (ScalableDstTy != Ty) + Result = CGF.Builder.CreateBitCast(Result, Ty); return Result; } } @@ -3199,13 +3197,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::Value *Coerced = Fn->getArg(FirstIRArg); if (auto *VecTyFrom = dyn_cast(Coerced->getType())) { - // If we are casting a scalable 16 x i1 predicate vector to a fixed i8 + // If we are casting a scalable i1 predicate vector to a fixed i8 // vector, bitcast the source and use a vector extract. - auto PredType = - llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); - if (VecTyFrom == PredType && + if (VecTyFrom->getElementType()->isIntegerTy(1) && + VecTyFrom->getElementCount().isKnownMultipleOf(8) && VecTyTo->getElementType() == Builder.getInt8Ty()) { - VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); + VecTyFrom = llvm::ScalableVectorType::get( + VecTyTo->getElementType(), + VecTyFrom->getElementCount().getKnownMinValue() / 8); Coerced = Builder.CreateBitCast(Coerced, VecTyFrom); } if (VecTyFrom->getElementType() == VecTyTo->getElementType()) { @@ -5877,12 +5876,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If coercing a fixed vector from a scalable vector for ABI // compatibility, and the types match, use the llvm.vector.extract // intrinsic to perform the conversion. - if (auto *FixedDst = dyn_cast(RetIRTy)) { + if (auto *FixedDstTy = dyn_cast(RetIRTy)) { llvm::Value *V = CI; - if (auto *ScalableSrc = dyn_cast(V->getType())) { - if (FixedDst->getElementType() == ScalableSrc->getElementType()) { + if (auto *ScalableSrcTy = + dyn_cast(V->getType())) { + if (FixedDstTy->getElementType() == ScalableSrcTy->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); - V = Builder.CreateExtractVector(FixedDst, V, Zero, "cast.fixed"); + V = Builder.CreateExtractVector(FixedDstTy, V, Zero, "cast.fixed"); return RValue::get(V); } } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index fa03163bbde577..aa805f291d1757 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2137,26 +2137,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // If Src is a fixed vector and Dst is a scalable vector, and both have the // same element type, use the llvm.vector.insert intrinsic to perform the // bitcast. - if (const auto *FixedSrc = dyn_cast(SrcTy)) { - if (const auto *ScalableDst = dyn_cast(DstTy)) { - // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate + if (auto *FixedSrcTy = dyn_cast(SrcTy)) { + if (auto *ScalableDstTy = dyn_cast(DstTy)) { + // If we are casting a fixed i8 vector to a scalable i1 predicate // vector, use a vector insert and bitcast the result. - bool NeedsBitCast = false; - auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); - llvm::Type *OrigType = DstTy; - if (ScalableDst == PredType && - FixedSrc->getElementType() == Builder.getInt8Ty()) { - DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); - ScalableDst = cast(DstTy); - NeedsBitCast = true; + if (ScalableDstTy->getElementType()->isIntegerTy(1) && + ScalableDstTy->getElementCount().isKnownMultipleOf(8) && + FixedSrcTy->getElementType()->isIntegerTy(8)) { + ScalableDstTy = llvm::ScalableVectorType::get( + FixedSrcTy->getElementType(), + ScalableDstTy->getElementCount().getKnownMinValue() / 8); } - if (FixedSrc->getElementType() == ScalableDst->getElementType()) { - llvm::Value *UndefVec = llvm::UndefValue::get(DstTy); + if (FixedSrcTy->getElementType() == ScalableDstTy->getElementType()) { + llvm::Value *UndefVec = llvm::UndefValue::get(ScalableDstTy); llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); llvm::Value *Result = Builder.CreateInsertVector( - DstTy, UndefVec, Src, Zero, "cast.scalable"); - if (NeedsBitCast) - Result = Builder.CreateBitCast(Result, OrigType); + ScalableDstTy, UndefVec, Src, Zero, "cast.scalable"); + if (Result->getType() != DstTy) + Result = Builder.CreateBitCast(Result, DstTy); return Result; } } @@ -2165,18 +2163,19 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // If Src is a scalable vector and Dst is a fixed vector, and both have the // same element type, use the llvm.vector.extract intrinsic to perform the // bitcast. - if (const auto *ScalableSrc = dyn_cast(SrcTy)) { - if (const auto *FixedDst = dyn_cast(DstTy)) { - // If we are casting a scalable 16 x i1 predicate vector to a fixed i8 + if (auto *ScalableSrcTy = dyn_cast(SrcTy)) { + if (auto *FixedDstTy = dyn_cast(DstTy)) { + // If we are casting a scalable i1 predicate vector to a fixed i8 // vector, bitcast the source and use a vector extract. - auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); - if (ScalableSrc == PredType && - FixedDst->getElementType() == Builder.getInt8Ty()) { - SrcTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); - ScalableSrc = cast(SrcTy); - Src = Builder.CreateBitCast(Src, SrcTy); + if (ScalableSrcTy->getElementType()->isIntegerTy(1) && + ScalableSrcTy->getElementCount().isKnownMultipleOf(8) && + FixedDstTy->getElementType()->isIntegerTy(8)) { + ScalableSrcTy = llvm::ScalableVectorType::get( + FixedDstTy->getElementType(), + ScalableSrcTy->getElementCount().getKnownMinValue() / 8); + Src = Builder.CreateBitCast(Src, ScalableSrcTy); } - if (ScalableSrc->getElementType() == FixedDst->getElementType()) { + if (ScalableSrcTy->getElementType() == FixedDstTy->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); return Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed"); } diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c index a7b3123e61cd52..20fb4a04564c75 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c @@ -177,29 +177,26 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { // CHECK-64-LABEL: @read_bool1( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0) +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] // // CHECK-128-LABEL: @read_bool1( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <16 x i8>, align 16 // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 16 // CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]] -// CHECK-128-NEXT: store <16 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]] +// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v16i8( undef, <16 x i8> [[TMP0]], i64 0) +// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_bool1( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32 // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 32 // CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]] -// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] -// CHECK-256-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[TMP0]], i64 0) +// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] // vbool1_t read_bool1(struct struct_bool1 *s) { @@ -208,29 +205,26 @@ vbool1_t read_bool1(struct struct_bool1 *s) { // CHECK-64-LABEL: @write_bool1( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-64-NEXT: store [[X:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8( [[TMP0]], i64 0) // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 8 -// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]] // CHECK-64-NEXT: ret void // // CHECK-128-LABEL: @write_bool1( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA7:![0-9]+]] -// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]] +// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8( [[TMP0]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: store <16 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-128-NEXT: store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_bool1( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-256-NEXT: store [[X:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[TMP0]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]] // CHECK-256-NEXT: ret void // void write_bool1(struct struct_bool1 *s, vbool1_t x) { diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c index 888abe1a7bc3fb..1824d97d04dda8 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c @@ -70,13 +70,7 @@ fixed_float64m1_t call_float64_ff(fixed_float64m1_t op1, fixed_float64m1_t op2) // CHECK-LABEL: @call_bool1_ff( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 256) -// CHECK-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 8, !tbaa [[TBAA4:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE4]], align 8, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[TMP0:%.*]], [[TMP1:%.*]], i64 256) // CHECK-NEXT: ret [[TMP2]] // fixed_bool1_t call_bool1_ff(fixed_bool1_t op1, fixed_bool1_t op2) { @@ -116,14 +110,8 @@ fixed_float64m1_t call_float64_fs(fixed_float64m1_t op1, vfloat64m1_t op2) { // CHECK-LABEL: @call_bool1_fs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 256) -// CHECK-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 8, !tbaa [[TBAA4]] -// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE2]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[TMP0:%.*]], [[OP2:%.*]], i64 256) +// CHECK-NEXT: ret [[TMP1]] // fixed_bool1_t call_bool1_fs(fixed_bool1_t op1, vbool1_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen); @@ -162,14 +150,8 @@ fixed_float64m1_t call_float64_ss(vfloat64m1_t op1, vfloat64m1_t op2) { // CHECK-LABEL: @call_bool1_ss( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 256) -// CHECK-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: ret [[TMP0]] // fixed_bool1_t call_bool1_ss(vbool1_t op1, vbool1_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen); diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c index fe278174bf6817..3806c3e1b30bbf 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c @@ -65,13 +65,7 @@ fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) { // CHECK-LABEL: @from_vbool1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: ret [[TYPE:%.*]] // fixed_bool1_t from_vbool1_t(vbool1_t type) { return type; @@ -79,7 +73,7 @@ fixed_bool1_t from_vbool1_t(vbool1_t type) { // CHECK-LABEL: @to_vbool1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-NEXT: ret [[TMP0:%.*]] // vbool1_t to_vbool1_t(fixed_bool1_t type) { return type; @@ -105,8 +99,8 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8]] +// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-NEXT: ret [[TMP1]] diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c index ac22bdce0da3e5..eb769fadda9a85 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c @@ -53,25 +53,24 @@ fixed_bool32_t global_bool32; // CHECK-NEXT: [[M_ADDR:%.*]] = alloca , align 1 // CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca , align 1 // CHECK-NEXT: [[MASK:%.*]] = alloca , align 1 -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32 // CHECK-NEXT: store [[M:%.*]], ptr [[M_ADDR]], align 1 // CHECK-NEXT: store [[VEC:%.*]], ptr [[VEC_ADDR]], align 1 // CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[M_ADDR]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @global_bool1, align 8 -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[SAVED_VALUE]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[SAVED_VALUE]], align 32 +// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-NEXT: [[TMP3:%.*]] = call @llvm.riscv.vmand.nxv64i1.i64( [[TMP0]], [[TMP2]], i64 256) // CHECK-NEXT: store [[TMP3]], ptr [[MASK]], align 1 // CHECK-NEXT: [[TMP4:%.*]] = load , ptr [[MASK]], align 1 // CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[VEC_ADDR]], align 1 // CHECK-NEXT: [[TMP6:%.*]] = load <256 x i8>, ptr @global_vec_int8m8, align 8 -// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv64i8.v256i8( undef, <256 x i8> [[TMP6]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = call @llvm.riscv.vadd.mask.nxv64i8.nxv64i8.i64( poison, [[TMP5]], [[CAST_SCALABLE]], [[TMP4]], i64 256, i64 3) +// CHECK-NEXT: [[CAST_SCALABLE1:%.*]] = call @llvm.vector.insert.nxv64i8.v256i8( undef, <256 x i8> [[TMP6]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = call @llvm.riscv.vadd.mask.nxv64i8.nxv64i8.i64( poison, [[TMP5]], [[CAST_SCALABLE1]], [[TMP4]], i64 256, i64 3) // CHECK-NEXT: [[CAST_FIXED:%.*]] = call <256 x i8> @llvm.vector.extract.v256i8.nxv64i8( [[TMP7]], i64 0) // CHECK-NEXT: store <256 x i8> [[CAST_FIXED]], ptr [[RETVAL]], align 8 // CHECK-NEXT: [[TMP8:%.*]] = load <256 x i8>, ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[CAST_SCALABLE1:%.*]] = call @llvm.vector.insert.nxv64i8.v256i8( undef, <256 x i8> [[TMP8]], i64 0) -// CHECK-NEXT: ret [[CAST_SCALABLE1]] +// CHECK-NEXT: [[CAST_SCALABLE2:%.*]] = call @llvm.vector.insert.nxv64i8.v256i8( undef, <256 x i8> [[TMP8]], i64 0) +// CHECK-NEXT: ret [[CAST_SCALABLE2]] // fixed_int8m8_t test_bool1(vbool1_t m, vint8m8_t vec) { vbool1_t mask = __riscv_vmand(m, global_bool1, __riscv_v_fixed_vlen); @@ -181,15 +180,15 @@ fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) { // CHECK-NEXT: [[RETVAL:%.*]] = alloca <32 x i8>, align 8 // CHECK-NEXT: [[ARR:%.*]] = alloca [3 x <32 x i8>], align 8 // CHECK-NEXT: [[PARR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <32 x i8>], ptr [[ARR]], i64 0, i64 0 // CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[PARR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PARR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 8 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 8 [[RETVAL]], i64 32, i1 false) -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[CAST_SCALABLE]] to +// CHECK-NEXT: ret [[TMP3]] // fixed_bool1_t address_of_array_idx_bool1() { fixed_bool1_t arr[3]; diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c index d7df1a24bbfb00..31a245dcb22405 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c @@ -56,18 +56,16 @@ void write_global_i64(vint64m1_t v) { global_i64 = v; } // CHECK-64-LABEL: @write_global_bool1( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-64-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr @global_bool1, align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8( [[TMP0]], i64 0) +// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA4]] // CHECK-64-NEXT: ret void // // CHECK-256-LABEL: @write_global_bool1( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-256-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr @global_bool1, align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[TMP0]], i64 0) +// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA4]] // CHECK-256-NEXT: ret void // void write_global_bool1(vbool1_t v) { global_bool1 = v; } @@ -92,7 +90,7 @@ void write_global_bool4(vbool4_t v) { global_bool4 = v; } // CHECK-256-LABEL: @write_global_bool32( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 -// CHECK-256-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-256-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA7:![0-9]+]] // CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4]] // CHECK-256-NEXT: store <1 x i8> [[TMP0]], ptr @global_bool32, align 1, !tbaa [[TBAA4]] // CHECK-256-NEXT: ret void @@ -120,18 +118,16 @@ vint64m1_t read_global_i64() { return global_i64; } // CHECK-64-LABEL: @read_global_bool1( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0) +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_global_bool1( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32 // CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA4]] -// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] -// CHECK-256-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[TMP0]], i64 0) +// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] // vbool1_t read_global_bool1() { return global_bool1; } From 742a06f577b4c3b1c1f994e91bb6579ae89fe4b0 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 13 Feb 2024 10:49:22 -0700 Subject: [PATCH 028/240] [clang] Remove #undef alloca workaround (#81534) Added in 26670dcba1609574cba5942aff78ff97b567c5f3 to workaround #4885. Windows CI and a local Windows build are happy with this change, so it seems like this has been properly fixed at some point. If this does break somebody, this can be easily reverted. (Also, Linux does the same `#define alloca` in system headers, so I'm not sure why it'd be different on Windows) This is tech debt that caused breakages, see comments on #71709. --- clang/include/clang/Basic/Builtins.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h index f955d21169556a..6700d1903a0088 100644 --- a/clang/include/clang/Basic/Builtins.h +++ b/clang/include/clang/Basic/Builtins.h @@ -20,10 +20,6 @@ #include "llvm/ADT/StringRef.h" #include -// VC++ defines 'alloca' as an object-like macro, which interferes with our -// builtins. -#undef alloca - namespace clang { class TargetInfo; class IdentifierTable; From 9838c8512bc29e3a1b8edeb0eb2541160e4c727f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 12 Feb 2024 21:17:53 -0800 Subject: [PATCH 029/240] [RISCV] Copy typepromotion-overflow.ll from AArch64. NFC --- .../CodeGen/RISCV/typepromotion-overflow.ll | 388 ++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/typepromotion-overflow.ll diff --git a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll new file mode 100644 index 00000000000000..fad9e6c0756b36 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll @@ -0,0 +1,388 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck %s + +define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: overflow_add: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: ori a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 48 +; CHECK-NEXT: srli a1, a0, 48 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: bltu a2, a1, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: ret + %add = add i16 %b, %a + %or = or i16 %add, 1 + %cmp = icmp ugt i16 %or, 1024 + %res = select i1 %cmp, i16 2, i16 5 + ret i16 %res +} + +define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: overflow_sub: +; CHECK: # %bb.0: +; CHECK-NEXT: subw a0, a0, a1 +; CHECK-NEXT: ori a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 48 +; CHECK-NEXT: srli a1, a0, 48 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: bltu a2, a1, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: ret + %add = sub i16 %a, %b + %or = or i16 %add, 1 + %cmp = icmp ugt i16 %or, 1024 + %res = select i1 %cmp, i16 2, i16 5 + ret i16 %res +} + +define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: overflow_mul: +; CHECK: # %bb.0: +; CHECK-NEXT: mul a0, a1, a0 +; CHECK-NEXT: ori a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 48 +; CHECK-NEXT: srli a1, a0, 48 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: bltu a2, a1, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: ret + %add = mul i16 %b, %a + %or = or i16 %add, 1 + %cmp = icmp ugt i16 %or, 1024 + %res = select i1 %cmp, i16 2, i16 5 + ret i16 %res +} + +define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: overflow_shl: +; CHECK: # %bb.0: +; CHECK-NEXT: sll a0, a0, a1 +; CHECK-NEXT: ori a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 48 +; CHECK-NEXT: srli a1, a0, 48 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: bltu a2, a1, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: ret + %add = shl i16 %a, %b + %or = or i16 %add, 1 + %cmp = icmp ugt i16 %or, 1024 + %res = select i1 %cmp, i16 2, i16 5 + ret i16 %res +} + +define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %limit) { +; CHECK-LABEL: overflow_add_no_consts: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: bltu a2, a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: ret + %add = add i8 %b, %a + %cmp = icmp ugt i8 %add, %limit + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) { +; CHECK-LABEL: overflow_add_const_limit: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: bltu a2, a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: ret + %add = add i8 %b, %a + %cmp = icmp ugt i8 %add, -128 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +define i32 @overflow_add_positive_const_limit(i8 zeroext %a) { +; CHECK-LABEL: overflow_add_positive_const_limit: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 56 +; CHECK-NEXT: srai a1, a0, 56 +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: blt a1, a2, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: ret + %cmp = icmp slt i8 %a, -1 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +define i32 @unsafe_add_underflow(i8 zeroext %a) { +; CHECK-LABEL: unsafe_add_underflow: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: beq a1, a2, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: ret + %cmp = icmp eq i8 %a, 1 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +define i32 @safe_add_underflow(i8 zeroext %a) { +; CHECK-LABEL: safe_add_underflow: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: beqz a1, .LBB8_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: ret + %cmp = icmp eq i8 %a, 0 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +define i32 @safe_add_underflow_neg(i8 zeroext %a) { +; CHECK-LABEL: safe_add_underflow_neg: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: li a2, 251 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: bltu a1, a2, .LBB9_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: ret + %add = add i8 %a, -2 + %cmp = icmp ult i8 %add, -5 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { +; CHECK-LABEL: overflow_sub_negative_const_limit: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 56 +; CHECK-NEXT: srai a1, a0, 56 +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: blt a1, a2, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: ret + %cmp = icmp slt i8 %a, -1 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; This is valid so long as the icmp immediate is sext. +define i32 @sext_sub_underflow(i8 zeroext %a) { +; CHECK-LABEL: sext_sub_underflow: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -6 +; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: li a2, 250 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: bltu a2, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: ret + %sub = add i8 %a, -6 + %cmp = icmp ugt i8 %sub, -6 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +define i32 @safe_sub_underflow(i8 zeroext %a) { +; CHECK-LABEL: safe_sub_underflow: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: beqz a1, .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: ret + %cmp.not = icmp eq i8 %a, 0 + %res = select i1 %cmp.not, i32 16, i32 8 + ret i32 %res +} + +define i32 @safe_sub_underflow_neg(i8 zeroext %a) { +; CHECK-LABEL: safe_sub_underflow_neg: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: li a2, 250 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: bltu a2, a1, .LBB13_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: ret + %sub = add i8 %a, -4 + %cmp = icmp ugt i8 %sub, -6 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; This is valid so long as the icmp immediate is sext. +define i32 @sext_sub_underflow_neg(i8 zeroext %a) { +; CHECK-LABEL: sext_sub_underflow_neg: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: li a2, 253 +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: bltu a1, a2, .LBB14_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: ret + %sub = add i8 %a, -4 + %cmp = icmp ult i8 %sub, -3 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +define i32 @safe_sub_imm_var(ptr nocapture readonly %b) local_unnamed_addr #1 { +; CHECK-LABEL: safe_sub_imm_var: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +entry: + ret i32 0 +} + +define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 { +; CHECK-LABEL: safe_sub_var_imm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lbu a0, 0(a0) +; CHECK-NEXT: addi a0, a0, 8 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: sltiu a0, a0, 253 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: ret +entry: + %0 = load i8, ptr %b, align 1 + %sub = add nsw i8 %0, 8 + %cmp = icmp ugt i8 %sub, -4 + %conv4 = zext i1 %cmp to i32 + ret i32 %conv4 +} + +define i32 @safe_add_imm_var(ptr nocapture readnone %b) { +; CHECK-LABEL: safe_add_imm_var: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: ret +entry: + ret i32 1 +} + +define i32 @safe_add_var_imm(ptr nocapture readnone %b) { +; CHECK-LABEL: safe_add_var_imm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: ret +entry: + ret i32 1 +} + +define i8 @convert_add_order(i8 zeroext %arg) { +; CHECK-LABEL: convert_add_order: +; CHECK: # %bb.0: +; CHECK-NEXT: ori a1, a0, 1 +; CHECK-NEXT: sltiu a2, a1, 50 +; CHECK-NEXT: addi a1, a1, -40 +; CHECK-NEXT: andi a1, a1, 255 +; CHECK-NEXT: sltiu a1, a1, 20 +; CHECK-NEXT: li a3, 2 +; CHECK-NEXT: sub a3, a3, a1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: or a2, a2, a3 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: ret + %shl = or i8 %arg, 1 + %cmp.0 = icmp ult i8 %shl, 50 + %sub = add nsw i8 %shl, -40 + %cmp.1 = icmp ult i8 %sub, 20 + %mask.sel.v = select i1 %cmp.1, i8 1, i8 2 + %mask.sel = select i1 %cmp.0, i8 %mask.sel.v, i8 -1 + %res = and i8 %mask.sel, %arg + ret i8 %res +} + +define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) { +; CHECK-LABEL: underflow_if_sub: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.w a2, a0 +; CHECK-NEXT: sgtz a2, a2 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: addi a0, a0, -11 +; CHECK-NEXT: andi a2, a0, 247 +; CHECK-NEXT: bltu a2, a1, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 100 +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %arg, 0 + %conv = zext i1 %cmp to i32 + %and = and i32 %conv, %arg + %trunc = trunc i32 %and to i8 + %conv1 = add nuw nsw i8 %trunc, -11 + %cmp.1 = icmp ult i8 %conv1, %arg1 + %res = select i1 %cmp.1, i8 %conv1, i8 100 + ret i8 %res +} + +define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) { +; CHECK-LABEL: underflow_if_sub_signext: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.w a2, a0 +; CHECK-NEXT: sgtz a2, a2 +; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: addi a0, a0, -11 +; CHECK-NEXT: bltu a0, a1, .LBB21_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 100 +; CHECK-NEXT: .LBB21_2: +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %arg, 0 + %conv = zext i1 %cmp to i32 + %and = and i32 %conv, %arg + %trunc = trunc i32 %and to i8 + %conv1 = add nuw nsw i8 %trunc, -11 + %cmp.1 = icmp ult i8 %conv1, %arg1 + %res = select i1 %cmp.1, i8 %conv1, i8 100 + ret i8 %res +} From 7d40ea85d5ea5cc837536f61e3b4f80ea69f14d0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 12 Feb 2024 21:43:35 -0800 Subject: [PATCH 030/240] [RISCV] Enable the TypePromotion pass from AArch64/ARM. This pass looks for unsigned icmps that have illegal types and tries to widen the use/def graph to improve the placement of the zero extends that type legalization would need to insert. I've explicitly disabled it for i32 by adding a check for isSExtCheaperThanZExt to the pass. The generated code isn't perfect, but my data shows a net dynamic instruction count improvement on spec2017 for both base and Zba+Zbb+Zbs. --- llvm/lib/CodeGen/TypePromotion.cpp | 2 + llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 7 ++ llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + .../RISCV/lack-of-signed-truncation-check.ll | 92 +++++++++++----- llvm/test/CodeGen/RISCV/signbit-test.ll | 30 ++++- .../CodeGen/RISCV/signed-truncation-check.ll | 104 ++++++++++++------ .../CodeGen/RISCV/typepromotion-overflow.ll | 41 ++++--- 7 files changed, 190 insertions(+), 87 deletions(-) diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 053caf518bd1f7..7a3bc6c2043f4c 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -937,6 +937,8 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM, return 0; EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT); + if (TLI->isSExtCheaperThanZExt(SrcVT, PromotedVT)) + return 0; if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " << "for promoted type\n"); diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 4c3da3ad311168..adef40e19cba4a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -366,6 +366,7 @@ class RISCVPassConfig : public TargetPassConfig { void addIRPasses() override; bool addPreISel() override; + void addCodeGenPrepare() override; bool addInstSelector() override; bool addIRTranslator() override; void addPreLegalizeMachineIR() override; @@ -452,6 +453,12 @@ bool RISCVPassConfig::addPreISel() { return false; } +void RISCVPassConfig::addCodeGenPrepare() { + if (getOptLevel() != CodeGenOptLevel::None) + addPass(createTypePromotionLegacyPass()); + TargetPassConfig::addCodeGenPrepare(); +} + bool RISCVPassConfig::addInstSelector() { addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel())); diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index e7db8ef9d5aff3..364c1e430b9156 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -68,6 +68,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: Type Promotion ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation diff --git a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll index 9e7f2e9525d3b4..6e3a50542939f1 100644 --- a/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll @@ -254,21 +254,39 @@ define i1 @shifts_necmp_i64_i8(i64 %x) nounwind { ; ---------------------------------------------------------------------------- ; define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { -; RV32-LABEL: add_ultcmp_i16_i8: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -128 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: sltiu a0, a0, 255 -; RV32-NEXT: ret +; RV32I-LABEL: add_ultcmp_i16_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: addi a0, a0, -128 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sltiu a0, a0, 255 +; RV32I-NEXT: ret ; -; RV64-LABEL: add_ultcmp_i16_i8: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, a0, -128 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: sltiu a0, a0, 255 -; RV64-NEXT: ret +; RV64I-LABEL: add_ultcmp_i16_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: addi a0, a0, -128 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sltiu a0, a0, 255 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: add_ultcmp_i16_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -128 +; RV32ZBB-NEXT: srli a0, a0, 8 +; RV32ZBB-NEXT: sltiu a0, a0, 255 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: add_ultcmp_i16_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -128 +; RV64ZBB-NEXT: srli a0, a0, 8 +; RV64ZBB-NEXT: sltiu a0, a0, 255 +; RV64ZBB-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp ult i16 %tmp0, -256 ; ~0U << 8 ret i1 %tmp1 @@ -421,21 +439,39 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; Slightly more canonical variant define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { -; RV32-LABEL: add_ulecmp_i16_i8: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -128 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: sltiu a0, a0, 255 -; RV32-NEXT: ret +; RV32I-LABEL: add_ulecmp_i16_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: addi a0, a0, -128 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sltiu a0, a0, 255 +; RV32I-NEXT: ret ; -; RV64-LABEL: add_ulecmp_i16_i8: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, a0, -128 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: sltiu a0, a0, 255 -; RV64-NEXT: ret +; RV64I-LABEL: add_ulecmp_i16_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: addi a0, a0, -128 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sltiu a0, a0, 255 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: add_ulecmp_i16_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -128 +; RV32ZBB-NEXT: srli a0, a0, 8 +; RV32ZBB-NEXT: sltiu a0, a0, 255 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: add_ulecmp_i16_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -128 +; RV64ZBB-NEXT: srli a0, a0, 8 +; RV64ZBB-NEXT: sltiu a0, a0, 255 +; RV64ZBB-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp ule i16 %tmp0, -257 ; ~0U << 8 - 1 ret i1 %tmp1 diff --git a/llvm/test/CodeGen/RISCV/signbit-test.ll b/llvm/test/CodeGen/RISCV/signbit-test.ll index 69a9026d9af9e2..4e10fae06d8860 100644 --- a/llvm/test/CodeGen/RISCV/signbit-test.ll +++ b/llvm/test/CodeGen/RISCV/signbit-test.ll @@ -303,7 +303,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind { ; RV32-NEXT: bnez a1, .LBB10_2 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: li a0, 42 -; RV32-NEXT: .LBB10_2: # %f +; RV32-NEXT: ret +; RV32-NEXT: .LBB10_2: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_clear_mask_i16_i8: @@ -312,7 +315,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind { ; RV64-NEXT: bnez a1, .LBB10_2 ; RV64-NEXT: # %bb.1: # %t ; RV64-NEXT: li a0, 42 -; RV64-NEXT: .LBB10_2: # %f +; RV64-NEXT: ret +; RV64-NEXT: .LBB10_2: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret entry: %a = and i16 %x, 128 @@ -332,7 +338,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind { ; RV32-NEXT: beqz a1, .LBB11_2 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: li a0, 42 -; RV32-NEXT: .LBB11_2: # %f +; RV32-NEXT: ret +; RV32-NEXT: .LBB11_2: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_set_mask_i16_i8: @@ -341,7 +350,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind { ; RV64-NEXT: beqz a1, .LBB11_2 ; RV64-NEXT: # %bb.1: # %t ; RV64-NEXT: li a0, 42 -; RV64-NEXT: .LBB11_2: # %f +; RV64-NEXT: ret +; RV64-NEXT: .LBB11_2: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret entry: %a = and i16 %x, 128 @@ -361,7 +373,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind { ; RV32-NEXT: beqz a1, .LBB12_2 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: li a0, 42 -; RV32-NEXT: .LBB12_2: # %f +; RV32-NEXT: ret +; RV32-NEXT: .LBB12_2: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_set_mask_i16_i7: @@ -370,7 +385,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind { ; RV64-NEXT: beqz a1, .LBB12_2 ; RV64-NEXT: # %bb.1: # %t ; RV64-NEXT: li a0, 42 -; RV64-NEXT: .LBB12_2: # %f +; RV64-NEXT: ret +; RV64-NEXT: .LBB12_2: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret entry: %a = and i16 %x, 64 diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll index 0860853ae9c0af..de36bcdb910609 100644 --- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll +++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll @@ -254,23 +254,43 @@ define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind { ; ---------------------------------------------------------------------------- ; define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { -; RV32-LABEL: add_ugecmp_i16_i8: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -128 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: sltiu a0, a0, 255 -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: ret +; RV32I-LABEL: add_ugecmp_i16_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: addi a0, a0, -128 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sltiu a0, a0, 255 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: ret ; -; RV64-LABEL: add_ugecmp_i16_i8: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, a0, -128 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: sltiu a0, a0, 255 -; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: ret +; RV64I-LABEL: add_ugecmp_i16_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: addi a0, a0, -128 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sltiu a0, a0, 255 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: add_ugecmp_i16_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -128 +; RV32ZBB-NEXT: srli a0, a0, 8 +; RV32ZBB-NEXT: sltiu a0, a0, 255 +; RV32ZBB-NEXT: xori a0, a0, 1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: add_ugecmp_i16_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -128 +; RV64ZBB-NEXT: srli a0, a0, 8 +; RV64ZBB-NEXT: sltiu a0, a0, 255 +; RV64ZBB-NEXT: xori a0, a0, 1 +; RV64ZBB-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8 ret i1 %tmp1 @@ -471,23 +491,43 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; Slightly more canonical variant define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { -; RV32-LABEL: add_ugtcmp_i16_i8: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, a0, -128 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 24 -; RV32-NEXT: sltiu a0, a0, 255 -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: ret +; RV32I-LABEL: add_ugtcmp_i16_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: addi a0, a0, -128 +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sltiu a0, a0, 255 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: ret ; -; RV64-LABEL: add_ugtcmp_i16_i8: -; RV64: # %bb.0: -; RV64-NEXT: addi a0, a0, -128 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 56 -; RV64-NEXT: sltiu a0, a0, 255 -; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: ret +; RV64I-LABEL: add_ugtcmp_i16_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: addi a0, a0, -128 +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sltiu a0, a0, 255 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: add_ugtcmp_i16_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -128 +; RV32ZBB-NEXT: srli a0, a0, 8 +; RV32ZBB-NEXT: sltiu a0, a0, 255 +; RV32ZBB-NEXT: xori a0, a0, 1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: add_ugtcmp_i16_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -128 +; RV64ZBB-NEXT: srli a0, a0, 8 +; RV64ZBB-NEXT: sltiu a0, a0, 255 +; RV64ZBB-NEXT: xori a0, a0, 1 +; RV64ZBB-NEXT: ret %tmp0 = add i16 %x, -128 ; ~0U << (8-1) %tmp1 = icmp ugt i16 %tmp0, -257 ; ~0U << 8 - 1 ret i1 %tmp1 diff --git a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll index fad9e6c0756b36..3740dc675949fa 100644 --- a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll +++ b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll @@ -171,8 +171,7 @@ define i32 @safe_add_underflow(i8 zeroext %a) { define i32 @safe_add_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: safe_add_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: addi a1, a0, -2 ; CHECK-NEXT: li a2, 251 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: bltu a1, a2, .LBB9_2 @@ -207,9 +206,8 @@ define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { define i32 @sext_sub_underflow(i8 zeroext %a) { ; CHECK-LABEL: sext_sub_underflow: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -6 -; CHECK-NEXT: andi a1, a0, 255 -; CHECK-NEXT: li a2, 250 +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: li a2, -6 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: bltu a2, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: @@ -240,8 +238,7 @@ define i32 @safe_sub_underflow(i8 zeroext %a) { define i32 @safe_sub_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: safe_sub_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: andi a1, a0, 255 +; CHECK-NEXT: addi a1, a0, -4 ; CHECK-NEXT: li a2, 250 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: bltu a2, a1, .LBB13_2 @@ -259,9 +256,8 @@ define i32 @safe_sub_underflow_neg(i8 zeroext %a) { define i32 @sext_sub_underflow_neg(i8 zeroext %a) { ; CHECK-LABEL: sext_sub_underflow_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: andi a1, a0, 255 -; CHECK-NEXT: li a2, 253 +; CHECK-NEXT: addi a1, a0, -4 +; CHECK-NEXT: li a2, -3 ; CHECK-NEXT: li a0, 8 ; CHECK-NEXT: bltu a1, a2, .LBB14_2 ; CHECK-NEXT: # %bb.1: @@ -322,15 +318,18 @@ define i8 @convert_add_order(i8 zeroext %arg) { ; CHECK-LABEL: convert_add_order: ; CHECK: # %bb.0: ; CHECK-NEXT: ori a1, a0, 1 -; CHECK-NEXT: sltiu a2, a1, 50 +; CHECK-NEXT: li a2, 50 +; CHECK-NEXT: bltu a1, a2, .LBB19_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 255 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB19_2: ; CHECK-NEXT: addi a1, a1, -40 -; CHECK-NEXT: andi a1, a1, 255 ; CHECK-NEXT: sltiu a1, a1, 20 -; CHECK-NEXT: li a3, 2 -; CHECK-NEXT: sub a3, a3, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: or a2, a2, a3 -; CHECK-NEXT: and a0, a2, a0 +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: ret %shl = or i8 %arg, 1 %cmp.0 = icmp ult i8 %shl, 50 @@ -348,9 +347,8 @@ define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) { ; CHECK-NEXT: sext.w a2, a0 ; CHECK-NEXT: sgtz a2, a2 ; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: addi a0, a0, -11 -; CHECK-NEXT: andi a2, a0, 247 -; CHECK-NEXT: bltu a2, a1, .LBB20_2 +; CHECK-NEXT: addi a0, a0, 245 +; CHECK-NEXT: bltu a0, a1, .LBB20_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 100 ; CHECK-NEXT: .LBB20_2: @@ -369,9 +367,10 @@ define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) { ; CHECK-LABEL: underflow_if_sub_signext: ; CHECK: # %bb.0: ; CHECK-NEXT: sext.w a2, a0 +; CHECK-NEXT: andi a1, a1, 255 ; CHECK-NEXT: sgtz a2, a2 ; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: addi a0, a0, -11 +; CHECK-NEXT: addi a0, a0, 245 ; CHECK-NEXT: bltu a0, a1, .LBB21_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 100 From 5e3c7e3aa48356a62a4b70d5d9d3e4ddd055a390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 13 Feb 2024 10:02:52 -0800 Subject: [PATCH 031/240] [flang][cuda] Lower cluster_dims values (#81636) This PR adds a new attribute to carry over the information from `cluster_dims`. The new attribute `CUDAClusterDimsAttr` holds 3 integer attributes and is added to `func.func` operation. --- flang/include/flang/Optimizer/Dialect/FIRAttr.td | 12 ++++++++++++ .../flang/Optimizer/Dialect/FIROpsSupport.h | 5 +++++ flang/lib/Lower/CallInterface.cpp | 16 +++++++++++++++- flang/lib/Optimizer/Dialect/FIRAttr.cpp | 2 +- flang/test/Lower/CUDA/cuda-proc-attribute.cuf | 3 +++ 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index 3602c67de1412a..66d6cd471116b0 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -125,4 +125,16 @@ def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> { let assemblyFormat = "`<` struct(params) `>`"; } +def fir_CUDAClusterDimsAttr : fir_Attr<"CUDAClusterDims"> { + let mnemonic = "cluster_dims"; + + let parameters = (ins + "mlir::IntegerAttr":$x, + "mlir::IntegerAttr":$y, + "mlir::IntegerAttr":$z + ); + + let assemblyFormat = "`<` struct(params) `>`"; +} + #endif // FIR_DIALECT_FIR_ATTRS diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h index 29fa57cd7a0d8a..e8226b6df58ca2 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h +++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h @@ -80,6 +80,11 @@ static constexpr llvm::StringRef getCUDALaunchBoundsAttrName() { return "fir.cuda_launch_bounds"; } +/// Attribute to carry CUDA cluster_dims values. +static constexpr llvm::StringRef getCUDAClusterDimsAttrName() { + return "fir.cuda_cluster_dims"; +} + /// Attribute to mark that a function argument is a character dummy procedure. /// Character dummy procedure have special ABI constraints. static constexpr llvm::StringRef getCharacterProcedureDummyAttrName() { diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index f990e0b7ce4dcf..6b71aabf7fdc89 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -540,10 +540,10 @@ setCUDAAttributes(mlir::func::FuncOp func, if (auto details = sym->GetUltimate() .detailsIf()) { + mlir::Type i64Ty = mlir::IntegerType::get(func.getContext(), 64); if (!details->cudaLaunchBounds().empty()) { assert(details->cudaLaunchBounds().size() >= 2 && "expect at least 2 values"); - mlir::Type i64Ty = mlir::IntegerType::get(func.getContext(), 64); auto maxTPBAttr = mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[0]); auto minBPMAttr = @@ -557,6 +557,20 @@ setCUDAAttributes(mlir::func::FuncOp func, fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTPBAttr, minBPMAttr, ubAttr)); } + + if (!details->cudaClusterDims().empty()) { + assert(details->cudaClusterDims().size() == 3 && "expect 3 values"); + auto xAttr = + mlir::IntegerAttr::get(i64Ty, details->cudaClusterDims()[0]); + auto yAttr = + mlir::IntegerAttr::get(i64Ty, details->cudaClusterDims()[1]); + auto zAttr = + mlir::IntegerAttr::get(i64Ty, details->cudaClusterDims()[2]); + func.getOperation()->setAttr( + fir::getCUDAClusterDimsAttrName(), + fir::CUDAClusterDimsAttr::get(func.getContext(), xAttr, yAttr, + zAttr)); + } } } } diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp index 8d780e03dcbe73..0cf8dfb9f784c3 100644 --- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp +++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp @@ -299,5 +299,5 @@ void FIROpsDialect::registerAttributes() { addAttributes(); + CUDALaunchBoundsAttr, CUDAClusterDimsAttr>(); } diff --git a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf index 9eb2b85aaf0b83..d9765f6cd2fe8c 100644 --- a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf +++ b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf @@ -38,3 +38,6 @@ attributes(global) launch_bounds(1, 2) subroutine sub_lbounds1(); end attributes(global) launch_bounds(1, 2, 3) subroutine sub_lbounds2(); end ! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc, fir.cuda_launch_bounds = #fir.launch_bounds} + +attributes(global) cluster_dims(1, 2, 3) subroutine sub_clusterdims1(); end +! CHECK: func.func @_QPsub_clusterdims1() attributes {fir.cuda_attr = #fir.cuda_proc, fir.cuda_cluster_dims = #fir.cluster_dims} From 502a88bae799694d0ed90e1839cd7a0aacb6bc9d Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Tue, 13 Feb 2024 10:13:35 -0800 Subject: [PATCH 032/240] [lldb][NFCI] Add header guard to PlatformRemoteAppleXR.h (#81565) --- lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleXR.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleXR.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleXR.h index 4fed6e15eda31c..2fbb6caad8110f 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleXR.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleXR.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMREMOTEAPPLEXR_H +#define LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMREMOTEAPPLEXR_H + #include "PlatformRemoteDarwinDevice.h" namespace lldb_private { @@ -36,3 +39,5 @@ class PlatformRemoteAppleXR : public PlatformRemoteDarwinDevice { llvm::StringRef GetPlatformName() override; }; } // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMREMOTEAPPLEXR_H From 63198e0682058c81cd546cb1851e785cec1387ef Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 13 Feb 2024 12:26:45 -0600 Subject: [PATCH 033/240] [libc] Remove remaining GPU architecture dependent instructions (#81612) Summary: Recent patches have added solutions to the remaining sources of divergence. This patch simply removes the last occures of things like `has_builtin`, `ifdef` or builtins with feature requirements. The one exception here is `nanosleep`, but I made changes in the `__nvvm_reflect` pass to make usage like this actually work at O0. Depends on https://github.com/llvm/llvm-project/pull/81331 --- libc/src/__support/GPU/amdgpu/utils.h | 9 +-------- libc/src/__support/GPU/nvptx/utils.h | 6 ++---- libc/src/__support/RPC/rpc_util.h | 5 +++-- libc/src/time/gpu/nanosleep.cpp | 5 +++-- libc/src/time/gpu/time_utils.h | 21 +++++---------------- 5 files changed, 14 insertions(+), 32 deletions(-) diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h index 9432b7b39f7836..75f0b5744ebd72 100644 --- a/libc/src/__support/GPU/amdgpu/utils.h +++ b/libc/src/__support/GPU/amdgpu/utils.h @@ -152,14 +152,7 @@ LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); } /// Returns a fixed-frequency timestamp. The actual frequency is dependent on /// the card and can only be queried via the driver. LIBC_INLINE uint64_t fixed_frequency_clock() { - if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_sendmsg_rtnl)) - return __builtin_amdgcn_s_sendmsg_rtnl(0x83); - else if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memrealtime)) - return __builtin_amdgcn_s_memrealtime(); - else if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memtime)) - return __builtin_amdgcn_s_memtime(); - else - return 0; + return __builtin_readsteadycounter(); } /// Terminates execution of the associated wavefront. diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h index 6c4bb5a7720a50..22a46e87cfc05d 100644 --- a/libc/src/__support/GPU/nvptx/utils.h +++ b/libc/src/__support/GPU/nvptx/utils.h @@ -135,13 +135,11 @@ LIBC_INLINE uint32_t get_lane_size() { return 32; } } /// Returns the current value of the GPU's processor clock. -LIBC_INLINE uint64_t processor_clock() { - return __nvvm_read_ptx_sreg_clock64(); -} +LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); } /// Returns a global fixed-frequency timer at nanosecond frequency. LIBC_INLINE uint64_t fixed_frequency_clock() { - return __nvvm_read_ptx_sreg_globaltimer(); + return __builtin_readsteadycounter(); } /// Terminates execution of the calling thread. diff --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h index ff9569298a1ed7..cc2a11a1108e01 100644 --- a/libc/src/__support/RPC/rpc_util.h +++ b/libc/src/__support/RPC/rpc_util.h @@ -21,8 +21,9 @@ namespace rpc { /// Suspend the thread briefly to assist the thread scheduler during busy loops. LIBC_INLINE void sleep_briefly() { -#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700 - __nvvm_nanosleep(64); +#if defined(LIBC_TARGET_ARCH_IS_NVPTX) + if (__nvvm_reflect("__CUDA_ARCH") >= 700) + LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory"); #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) __builtin_amdgcn_s_sleep(2); #elif defined(LIBC_TARGET_ARCH_IS_X86) diff --git a/libc/src/time/gpu/nanosleep.cpp b/libc/src/time/gpu/nanosleep.cpp index 34ff904c49c65b..dd669ff46c75c8 100644 --- a/libc/src/time/gpu/nanosleep.cpp +++ b/libc/src/time/gpu/nanosleep.cpp @@ -23,14 +23,15 @@ LLVM_LIBC_FUNCTION(int, nanosleep, uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC; uint64_t start = gpu::fixed_frequency_clock(); -#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700 +#if defined(LIBC_TARGET_ARCH_IS_NVPTX) uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate; uint64_t cur = gpu::fixed_frequency_clock(); // The NVPTX architecture supports sleeping and guaruntees the actual time // slept will be somewhere between zero and twice the requested amount. Here // we will sleep again if we undershot the time. while (cur < end) { - __nvvm_nanosleep(static_cast(nsecs)); + if (__nvvm_reflect("__CUDA_ARCH") >= 700) + LIBC_INLINE_ASM("nanosleep.u32 %0;" ::"r"(nsecs)); cur = gpu::fixed_frequency_clock(); nsecs -= nsecs > cur - start ? cur - start : 0; } diff --git a/libc/src/time/gpu/time_utils.h b/libc/src/time/gpu/time_utils.h index 531a748665b07b..8a9a5f0f65b89c 100644 --- a/libc/src/time/gpu/time_utils.h +++ b/libc/src/time/gpu/time_utils.h @@ -15,24 +15,13 @@ namespace LIBC_NAMESPACE { #if defined(LIBC_TARGET_ARCH_IS_AMDGPU) // AMDGPU does not have a single set frequency. Different architectures and -// cards can have vary values. Here we default to a few known values, but for -// complete support the frequency needs to be read from the kernel driver. -#if defined(__GFX10__) || defined(__GFX11__) || defined(__GFX12__) || \ - defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) -// These architectures use a 100 MHz fixed frequency clock. -constexpr uint64_t clock_freq = 100000000; -#elif defined(__GFX9__) -// These architectures use a 25 MHz fixed frequency clock expect for Vega 10 -// which is actually 27 Mhz. We default to 25 MHz in all cases anyway. -constexpr uint64_t clock_freq = 25000000; -#else -// The frequency for these architecture is unknown. We simply default to zero. -constexpr uint64_t clock_freq = 0; -#endif +// cards can have different values. The actualy frequency needs to be read from +// the kernel driver and will be between 25 MHz and 100 MHz on most cards. All +// cards following the GFX9 ISAs use a 100 MHz clock so we will default to that. +constexpr uint64_t clock_freq = 100000000UL; // We provide an externally visible symbol such that the runtime can set -// this to the correct value. If it is not set we try to default to the -// known values. +// this to the correct value. extern "C" [[gnu::visibility("protected")]] uint64_t [[clang::address_space(4)]] __llvm_libc_clock_freq; #define GPU_CLOCKS_PER_SEC static_cast(__llvm_libc_clock_freq) From 16140ff219b68f61fedf92df13019d89a4990a47 Mon Sep 17 00:00:00 2001 From: Giuseppe Rossini Date: Tue, 13 Feb 2024 18:29:49 +0000 Subject: [PATCH 034/240] [mlir][ROCDL] Add synchronization primitives (#80888) This PR adds two LLVM intrinsics to MLIR: - llvm.amdgcn.s.setprio which sets the priority of a wave for the GPU scheduler - llvm.amdgcn.sched.barrier which sets a software barrier so that the scheduler cannot move instructions around --- mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 16 ++++++++++++++++ mlir/test/Dialect/LLVMIR/rocdl.mlir | 12 ++++++++++++ mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index 638e46a2f9c752..962c159e68a2ee 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -192,6 +192,22 @@ def ROCDL_BarrierOp : ROCDL_Op<"barrier"> { let assemblyFormat = "attr-dict"; } +def ROCDL_SetPrioOp : ROCDL_IntrOp<"s.setprio", [], [], [], 0>, + Arguments<(ins I16Attr:$priority)> { + let results = (outs); + let assemblyFormat = "$priority attr-dict"; + string llvmBuilder = + "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_setprio,builder.getInt16(op.getPriority()));"; +} + +def ROCDL_SchedBarrier : ROCDL_IntrOp<"sched.barrier", [], [], [], 0>, + Arguments<(ins I32Attr:$mask)> { + let results = (outs); + let assemblyFormat = "$mask attr-dict"; + string llvmBuilder = + "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_sched_barrier,builder.getInt32(op.getMask()));"; +} + //===---------------------------------------------------------------------===// // Xdlops intrinsics diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir index 5a14df9ef9f8dc..89e8e7836c3a0c 100644 --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -35,6 +35,18 @@ func.func @rocdl.barrier() { llvm.return } +func.func @rocdl.sched_barrier() { + // CHECK: rocdl.sched.barrier + rocdl.sched.barrier 0 + llvm.return +} + +func.func @rocdl.setprio() { + // CHECK: rocdl.s.setprio + rocdl.s.setprio 0 + llvm.return +} + func.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32, %arg2 : vector<32xf32>, %arg3 : i32, %arg4 : vector<16xf32>, %arg5 : vector<4xf32>, diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir index 26123300d74888..06b78650c8d010 100644 --- a/mlir/test/Target/LLVMIR/rocdl.mlir +++ b/mlir/test/Target/LLVMIR/rocdl.mlir @@ -90,6 +90,22 @@ llvm.func @rocdl.barrier() { llvm.return } +llvm.func @rocdl.setprio() { + // CHECK: call void @llvm.amdgcn.s.setprio(i16 0) + rocdl.s.setprio 0 + // CHECK-NEXT: call void @llvm.amdgcn.s.setprio(i16 1) + rocdl.s.setprio 1 + llvm.return +} + +llvm.func @rocdl.schedbarrier() { + // CHECK: call void @llvm.amdgcn.sched.barrier(i32 0) + rocdl.sched.barrier 0 + // CHECK-NEXT: call void @llvm.amdgcn.sched.barrier(i32 1) + rocdl.sched.barrier 1 + llvm.return +} + llvm.func @rocdl.xdlops(%arg0 : f32, %arg1 : f32, %arg2 : vector<32 x f32>, %arg3: i32, %arg4 : vector<16 x f32>, %arg5 : vector<4xf32>, From c830c1205dc164b645edb9c40cccbe768d5b337c Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 13 Feb 2024 12:43:44 -0600 Subject: [PATCH 035/240] [libc] Remove leftover target dependent intrinsic Summary: I forgot to remove these because I thought I did it already. This caused the build to fail when actually linked. --- libc/src/__support/GPU/nvptx/utils.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h index 22a46e87cfc05d..a92c8847b6ecdf 100644 --- a/libc/src/__support/GPU/nvptx/utils.h +++ b/libc/src/__support/GPU/nvptx/utils.h @@ -110,21 +110,13 @@ LIBC_INLINE uint32_t get_lane_size() { return 32; } uint32_t x) { uint32_t mask = static_cast(lane_mask); uint32_t id = __builtin_ffs(mask) - 1; -#if __CUDA_ARCH__ >= 600 return __nvvm_shfl_sync_idx_i32(mask, x, id, get_lane_size() - 1); -#else - return __nvvm_shfl_idx_i32(x, id, get_lane_size() - 1); -#endif } /// Returns a bitmask of threads in the current lane for which \p x is true. [[clang::convergent]] LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { uint32_t mask = static_cast(lane_mask); -#if __CUDA_ARCH__ >= 600 return __nvvm_vote_ballot_sync(mask, x); -#else - return mask & __nvvm_vote_ballot(x); -#endif } /// Waits for all the threads in the block to converge and issues a fence. [[clang::convergent]] LIBC_INLINE void sync_threads() { __syncthreads(); } From 2422e969bf0a05b9b5cb4a6233a5f8dd335c2de5 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Tue, 13 Feb 2024 10:49:35 -0800 Subject: [PATCH 036/240] [NFC][InstrProf]Factor out getCanonicalName to compute the canonical name given a pgo name. (#81547) - Also update the `InstrProf::addFuncWithName` to call the newly added `getCanonicalName`. --- llvm/include/llvm/ProfileData/InstrProf.h | 11 +++++ llvm/lib/ProfileData/InstrProf.cpp | 49 ++++++++++++++--------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index aa08e949b5eaf2..a928ba6961f367 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -449,6 +449,17 @@ class InstrProfSymtab { return "** External Symbol **"; } + // Returns the canonial name of the given PGOName. In a canonical name, all + // suffixes that begins with "." except ".__uniq." are stripped. + // FIXME: Unify this with `FunctionSamples::getCanonicalFnName`. + static StringRef getCanonicalName(StringRef PGOName); + + // Add the function into the symbol table, by creating the following + // map entries: + // name-set = {PGOFuncName} + {getCanonicalName(PGOFuncName)} if the canonical + // name is different from pgo name + // - In MD5NameMap: for name in name-set + // - In MD5FuncMap: for name in name-set Error addFuncWithName(Function &F, StringRef PGOFuncName); // If the symtab is created by a series of calls to \c addFuncName, \c diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index d26004e2385bca..2eeeff987399de 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -517,35 +517,46 @@ Error InstrProfSymtab::create(StringRef NameStrings) { std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1)); } -Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { - if (Error E = addFuncName(PGOFuncName)) - return E; - MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F); +StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName) { // In ThinLTO, local function may have been promoted to global and have // suffix ".llvm." added to the function name. We need to add the // stripped function name to the symbol table so that we can find a match // from profile. // - // We may have other suffixes similar as ".llvm." which are needed to - // be stripped before the matching, but ".__uniq." suffix which is used - // to differentiate internal linkage functions in different modules - // should be kept. Now this is the only suffix with the pattern ".xxx" - // which is kept before matching. + // ".__uniq." suffix is used to differentiate internal linkage functions in + // different modules and should be kept. This is the only suffix with the + // pattern ".xxx" which is kept before matching, other suffixes similar as + // ".llvm." will be stripped. const std::string UniqSuffix = ".__uniq."; - auto pos = PGOFuncName.find(UniqSuffix); - // Search '.' after ".__uniq." if ".__uniq." exists, otherwise - // search '.' from the beginning. - if (pos != std::string::npos) + size_t pos = PGOName.find(UniqSuffix); + if (pos != StringRef::npos) pos += UniqSuffix.length(); else pos = 0; - pos = PGOFuncName.find('.', pos); - if (pos != std::string::npos && pos != 0) { - StringRef OtherFuncName = PGOFuncName.substr(0, pos); - if (Error E = addFuncName(OtherFuncName)) + + // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from + // the beginning. + pos = PGOName.find('.', pos); + if (pos != StringRef::npos && pos != 0) + return PGOName.substr(0, pos); + + return PGOName; +} + +Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { + auto mapName = [&](StringRef Name) -> Error { + if (Error E = addFuncName(Name)) return E; - MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F); - } + MD5FuncMap.emplace_back(Function::getGUID(Name), &F); + return Error::success(); + }; + if (Error E = mapName(PGOFuncName)) + return E; + + StringRef CanonicalFuncName = getCanonicalName(PGOFuncName); + if (CanonicalFuncName != PGOFuncName) + return mapName(CanonicalFuncName); + return Error::success(); } From 79ce933114e46c891a5632f7ad4a004b93a5b808 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 13 Feb 2024 00:42:24 -0600 Subject: [PATCH 037/240] [InstCombine] Extend `(lshr/shl (shl/lshr -1, x), x)` -> `(lshr/shl -1, x)` for multi-use We previously did this iff the inner `(shl/lshr -1, x)` was one-use. No instructions are added even if the inner `(shl/lshr -1, x)` is multi-use and this canonicalization both makes the resulting instruction easier to analyze and shrinks its dependency chain. Closes #81576 --- .../InstCombine/InstCombineShifts.cpp | 12 ++++++++++ ...low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll | 20 ++++++++-------- ...low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll | 20 ++++++++-------- ...nput-masking-after-truncation-variant-d.ll | 14 +++++------ ...dant-left-shift-input-masking-variant-d.ll | 10 ++++---- ...nput-masking-after-truncation-variant-d.ll | 10 ++++---- ...dant-left-shift-input-masking-variant-d.ll | 24 +++++++++---------- 7 files changed, 61 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 3fbe98fae0b61b..eafd2889ec50bd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1206,6 +1206,12 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { return BinaryOperator::CreateAnd(Mask, X); } + // Transform (-1 >> y) << y to -1 << y + if (match(Op0, m_LShr(m_AllOnes(), m_Specific(Op1)))) { + Constant *AllOnes = ConstantInt::getAllOnesValue(Ty); + return BinaryOperator::CreateShl(AllOnes, Op1); + } + Constant *C1; if (match(Op1, m_Constant(C1))) { Constant *C2; @@ -1493,6 +1499,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { return BinaryOperator::CreateAnd(Mask, X); } + // Transform (-1 << y) >> y to -1 >> y + if (match(Op0, m_Shl(m_AllOnes(), m_Specific(Op1)))) { + Constant *AllOnes = ConstantInt::getAllOnesValue(Ty); + return BinaryOperator::CreateLShr(AllOnes, Op1); + } + if (Instruction *Overflow = foldLShrOverflowBit(I)) return Overflow; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll index 81f9fe4a3d181a..d13129c1248a49 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll @@ -22,7 +22,7 @@ define i1 @p0(i8 %x, i8 %y) { ; CHECK-LABEL: @p0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp uge i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: ret i1 [[RET]] ; @@ -42,7 +42,7 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact <2 x i8> [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> , [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp uge <2 x i8> [[T1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -58,7 +58,7 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p2_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact <3 x i8> [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> , [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp uge <3 x i8> [[T1]], [[X:%.*]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; @@ -80,7 +80,7 @@ define i1 @c0(i8 %y) { ; CHECK-LABEL: @c0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() ; CHECK-NEXT: [[RET:%.*]] = icmp ule i8 [[X]], [[T1]] ; CHECK-NEXT: ret i1 [[RET]] @@ -98,7 +98,7 @@ define i1 @c1(i8 %y) { ; CHECK-LABEL: @c1( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() ; CHECK-NEXT: [[RET:%.*]] = icmp ule i8 [[X]], [[T1]] ; CHECK-NEXT: ret i1 [[RET]] @@ -116,7 +116,7 @@ define i1 @c2(i8 %y) { ; CHECK-LABEL: @c2( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() ; CHECK-NEXT: [[RET:%.*]] = icmp ule i8 [[X]], [[T1]] ; CHECK-NEXT: ret i1 [[RET]] @@ -138,7 +138,7 @@ define i1 @oneuse0(i8 %x, i8 %y) { ; CHECK-LABEL: @oneuse0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: call void @use8(i8 [[T1]]) ; CHECK-NEXT: [[RET:%.*]] = icmp uge i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: ret i1 [[RET]] @@ -156,7 +156,7 @@ define i1 @oneuse1(i8 %x, i8 %y) { ; CHECK-LABEL: @oneuse1( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T2]]) ; CHECK-NEXT: [[RET:%.*]] = icmp uge i8 [[T1]], [[X]] @@ -175,7 +175,7 @@ define i1 @oneuse2(i8 %x, i8 %y) { ; CHECK-LABEL: @oneuse2( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: call void @use8(i8 [[T1]]) ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T2]]) @@ -200,7 +200,7 @@ define i1 @n0(i8 %x, i8 %y, i8 %notx) { ; CHECK-LABEL: @n0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[NOTX:%.*]] ; CHECK-NEXT: ret i1 [[RET]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll index 321a1159d0af7a..a1517b36d0b9d9 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll @@ -22,7 +22,7 @@ define i1 @p0(i8 %x, i8 %y) { ; CHECK-LABEL: @p0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ult i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: ret i1 [[RET]] ; @@ -42,7 +42,7 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact <2 x i8> [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> , [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[T1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -58,7 +58,7 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p2_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact <3 x i8> [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> , [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[T1]], [[X:%.*]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; @@ -80,7 +80,7 @@ define i1 @c0(i8 %y) { ; CHECK-LABEL: @c0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() ; CHECK-NEXT: [[RET:%.*]] = icmp ugt i8 [[X]], [[T1]] ; CHECK-NEXT: ret i1 [[RET]] @@ -98,7 +98,7 @@ define i1 @c1(i8 %y) { ; CHECK-LABEL: @c1( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() ; CHECK-NEXT: [[RET:%.*]] = icmp ugt i8 [[X]], [[T1]] ; CHECK-NEXT: ret i1 [[RET]] @@ -116,7 +116,7 @@ define i1 @c2(i8 %y) { ; CHECK-LABEL: @c2( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() ; CHECK-NEXT: [[RET:%.*]] = icmp ugt i8 [[X]], [[T1]] ; CHECK-NEXT: ret i1 [[RET]] @@ -138,7 +138,7 @@ define i1 @oneuse0(i8 %x, i8 %y) { ; CHECK-LABEL: @oneuse0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: call void @use8(i8 [[T1]]) ; CHECK-NEXT: [[RET:%.*]] = icmp ult i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: ret i1 [[RET]] @@ -156,7 +156,7 @@ define i1 @oneuse1(i8 %x, i8 %y) { ; CHECK-LABEL: @oneuse1( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T2]]) ; CHECK-NEXT: [[RET:%.*]] = icmp ult i8 [[T1]], [[X]] @@ -175,7 +175,7 @@ define i1 @oneuse2(i8 %x, i8 %y) { ; CHECK-LABEL: @oneuse2( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: call void @use8(i8 [[T1]]) ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T2]]) @@ -200,7 +200,7 @@ define i1 @n0(i8 %x, i8 %y, i8 %notx) { ; CHECK-LABEL: @n0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr exact i8 [[T0]], [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr i8 -1, [[Y]] ; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[NOTX:%.*]] ; CHECK-NEXT: ret i1 [[RET]] diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll index 6b7061ff9bd064..48873852cfc7cd 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -18,7 +18,7 @@ define i32 @t0_basic(i64 %x, i32 %nbits) { ; CHECK-LABEL: @t0_basic( ; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 ; CHECK-NEXT: [[T1:%.*]] = shl nsw i64 -1, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact i64 [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr i64 -1, [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -33 ; CHECK-NEXT: call void @use64(i64 [[T0]]) ; CHECK-NEXT: call void @use64(i64 [[T1]]) @@ -54,7 +54,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -85,7 +85,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_undef( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -116,7 +116,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -149,7 +149,7 @@ define i32 @n4_extrause0(i64 %x, i32 %nbits) { ; CHECK-LABEL: @n4_extrause0( ; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 ; CHECK-NEXT: [[T1:%.*]] = shl nsw i64 -1, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact i64 [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr i64 -1, [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -33 ; CHECK-NEXT: call void @use64(i64 [[T0]]) ; CHECK-NEXT: call void @use64(i64 [[T1]]) @@ -182,7 +182,7 @@ define i32 @n5_extrause1(i64 %x, i32 %nbits) { ; CHECK-LABEL: @n5_extrause1( ; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 ; CHECK-NEXT: [[T1:%.*]] = shl nsw i64 -1, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact i64 [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr i64 -1, [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -33 ; CHECK-NEXT: call void @use64(i64 [[T0]]) ; CHECK-NEXT: call void @use64(i64 [[T1]]) @@ -215,7 +215,7 @@ define i32 @n6_extrause2(i64 %x, i32 %nbits) { ; CHECK-LABEL: @n6_extrause2( ; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 ; CHECK-NEXT: [[T1:%.*]] = shl nsw i64 -1, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact i64 [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr i64 -1, [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -33 ; CHECK-NEXT: call void @use64(i64 [[T0]]) ; CHECK-NEXT: call void @use64(i64 [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll index 92805c620683c0..9c096d1418a5b4 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll @@ -16,7 +16,7 @@ declare void @use32(i32) define i32 @t0_basic(i32 %x, i32 %nbits) { ; CHECK-LABEL: @t0_basic( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -1 ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: call void @use32(i32 [[T1]]) @@ -43,7 +43,7 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact <8 x i32> [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) @@ -66,7 +66,7 @@ define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_undef( ; CHECK-NEXT: [[T0:%.*]] = shl <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact <8 x i32> [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) @@ -89,7 +89,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact <8 x i32> [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) @@ -114,7 +114,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { define i32 @n3_extrause(i32 %x, i32 %nbits) { ; CHECK-LABEL: @n3_extrause( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], [[X:%.*]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -1 ; CHECK-NEXT: call void @use32(i32 [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll index bdc7bebf00ecac..1a977f67a6a5a8 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -18,7 +18,7 @@ define i32 @t0_basic(i64 %x, i32 %nbits) { ; CHECK-LABEL: @t0_basic( ; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 ; CHECK-NEXT: [[T1:%.*]] = shl nsw i64 -1, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact i64 [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr i64 -1, [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -32 ; CHECK-NEXT: [[T4:%.*]] = and i64 [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use64(i64 [[T0]]) @@ -56,7 +56,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -89,7 +89,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_undef( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -122,7 +122,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -157,7 +157,7 @@ define i32 @n4_extrause(i64 %x, i32 %nbits) { ; CHECK-LABEL: @n4_extrause( ; CHECK-NEXT: [[T0:%.*]] = zext i32 [[NBITS:%.*]] to i64 ; CHECK-NEXT: [[T1:%.*]] = shl nsw i64 -1, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr exact i64 [[T1]], [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr i64 -1, [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], -32 ; CHECK-NEXT: [[T4:%.*]] = and i64 [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use64(i64 [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll index c91e5a0cc577c8..549729fe8b59c5 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll @@ -18,7 +18,7 @@ declare void @use32(i32) define i32 @t0_basic(i32 %x, i32 %nbits) { ; CHECK-LABEL: @t0_basic( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: call void @use32(i32 [[T1]]) @@ -39,7 +39,7 @@ define i32 @t0_basic(i32 %x, i32 %nbits) { define i32 @t1_bigger_shift(i32 %x, i32 %nbits) { ; CHECK-LABEL: @t1_bigger_shift( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], [[X:%.*]] ; CHECK-NEXT: [[T3:%.*]] = add i32 [[NBITS]], 1 ; CHECK-NEXT: call void @use32(i32 [[T0]]) @@ -68,7 +68,7 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact <3 x i32> [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] ; CHECK-NEXT: [[T3:%.*]] = add <3 x i32> [[NBITS]], ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) @@ -93,7 +93,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact <3 x i32> [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] ; CHECK-NEXT: [[T3:%.*]] = add <3 x i32> [[NBITS]], ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) @@ -118,7 +118,7 @@ define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t4_vec_undef(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t4_vec_undef( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact <3 x i32> [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) @@ -147,7 +147,7 @@ define i32 @t5_commutativity0(i32 %nbits) { ; CHECK-LABEL: @t5_commutativity0( ; CHECK-NEXT: [[X:%.*]] = call i32 @gen32() ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[X]], [[T1]] ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: call void @use32(i32 [[T1]]) @@ -169,7 +169,7 @@ define i32 @t5_commutativity0(i32 %nbits) { define i32 @t6_commutativity1(i32 %nbits0, i32 %nbits1) { ; CHECK-LABEL: @t6_commutativity1( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS0:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS0]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS0]] ; CHECK-NEXT: [[T2:%.*]] = shl nsw i32 -1, [[NBITS1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = lshr i32 [[T0]], [[NBITS1]] ; CHECK-NEXT: [[T4:%.*]] = and i32 [[T3]], [[T1]] @@ -197,7 +197,7 @@ define i32 @t6_commutativity1(i32 %nbits0, i32 %nbits1) { define i32 @t7_commutativity2(i32 %nbits0, i32 %nbits1) { ; CHECK-LABEL: @t7_commutativity2( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS0:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS0]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS0]] ; CHECK-NEXT: [[T2:%.*]] = shl nsw i32 -1, [[NBITS1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = lshr i32 [[T0]], [[NBITS1]] ; CHECK-NEXT: [[T4:%.*]] = and i32 [[T3]], [[T1]] @@ -228,7 +228,7 @@ define i32 @t7_commutativity2(i32 %nbits0, i32 %nbits1) { define i32 @t8_nuw(i32 %x, i32 %nbits) { ; CHECK-LABEL: @t8_nuw( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: call void @use32(i32 [[T1]]) @@ -249,7 +249,7 @@ define i32 @t8_nuw(i32 %x, i32 %nbits) { define i32 @t9_nsw(i32 %x, i32 %nbits) { ; CHECK-LABEL: @t9_nsw( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: call void @use32(i32 [[T1]]) @@ -270,7 +270,7 @@ define i32 @t9_nsw(i32 %x, i32 %nbits) { define i32 @t10_nuw_nsw(i32 %x, i32 %nbits) { ; CHECK-LABEL: @t10_nuw_nsw( ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: call void @use32(i32 [[T1]]) @@ -298,7 +298,7 @@ define i32 @t11_assume_uge(i32 %x, i32 %masknbits, i32 %shiftnbits) { ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[SHIFTNBITS:%.*]], [[MASKNBITS:%.*]] ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[T0:%.*]] = shl nsw i32 -1, [[MASKNBITS]] -; CHECK-NEXT: [[T1:%.*]] = lshr exact i32 [[T0]], [[MASKNBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr i32 -1, [[MASKNBITS]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use32(i32 [[T0]]) ; CHECK-NEXT: call void @use32(i32 [[T1]]) From f79f58d5f1be370ccc212236e1c10b55835eb3c7 Mon Sep 17 00:00:00 2001 From: Prabhuk Date: Tue, 13 Feb 2024 10:57:39 -0800 Subject: [PATCH 038/240] Revert "[clang] Remove #undef alloca workaround" (#81649) Reverts llvm/llvm-project#81534 llvm/llvm-project#81534 breaks building (Fuchsia) Clang toolchain on Windows. Log: https://logs.chromium.org/logs/fuchsia/buildbucket/cr-buildbucket/8756186536543250705/+/u/clang/install/stdout Builder: https://ci.chromium.org/ui/p/fuchsia/builders/toolchain.ci/clang-windows-x64/b8756186536543250705/overview ``` FAILED: tools/clang/tools/extra/clang-include-fixer/tool/CMakeFiles/clang-include-fixer.dir/ClangIncludeFixer.cpp.obj C:\b\s\w\ir\x\w\cipd\bin\clang-cl.exe /nologo -TP -DCLANG_REPOSITORY_STRING=\"https://llvm.googlesource.com/llvm-project\" -DGTEST_HAS_RTTI=0 -DUNICODE -D_CRT_NONSTDC_NO_DEPRECATE -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS -D_GLIBCXX_ASSERTIONS -D_HAS_EXCEPTIONS=0 -D_SCL_SECURE_NO_DEPRECATE -D_SCL_SECURE_NO_WARNINGS -D_UNICODE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -IC:\b\s\w\ir\x\w\llvm_build\tools\clang\tools\extra\clang-include-fixer\tool -IC:\b\s\w\ir\x\w\llvm-llvm-project\clang-tools-extra\clang-include-fixer\tool -IC:\b\s\w\ir\x\w\llvm-llvm-project\clang\include -IC:\b\s\w\ir\x\w\llvm_build\tools\clang\include -IC:\b\s\w\ir\x\w\recipe_cleanup\tensorflow-venv\store\python_venv-q9i5kpsp0iun0ktmqgab125ti8\contents\Lib\site-packages\tensorflow\include -IC:\b\s\w\ir\x\w\llvm_build\include -IC:\b\s\w\ir\x\w\llvm-llvm-project\llvm\include -IC:\b\s\w\ir\x\w\llvm-llvm-project\clang-tools-extra\clang-include-fixer\tool\.. -imsvcC:\b\s\w\ir\x\w\zlib_install_target\include -imsvcC:\b\s\w\ir\x\w\zstd_install\include /DWIN32 /D_WINDOWS /Zc:inline /Zc:__cplusplus /Oi /Brepro /bigobj /permissive- /W4 -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported /Gw -no-canonical-prefixes /O2 /Ob2 -std:c++17 -MT /EHs-c- /GR- -UNDEBUG /showIncludes /Fotools\clang\tools\extra\clang-include-fixer\tool\CMakeFiles\clang-include-fixer.dir\ClangIncludeFixer.cpp.obj /Fdtools\clang\tools\extra\clang-include-fixer\tool\CMakeFiles\clang-include-fixer.dir\ -c -- C:\b\s\w\ir\x\w\llvm-llvm-project\clang-tools-extra\clang-include-fixer\tool\ClangIncludeFixer.cpp In file included from C:\b\s\w\ir\x\w\llvm-llvm-project\clang-tools-extra\clang-include-fixer\tool\ClangIncludeFixer.cpp:11: In file included from C:\b\s\w\ir\x\w\llvm-llvm-project\clang-tools-extra\clang-include-fixer\tool\..\IncludeFixer.h:15: In file included from C:\b\s\w\ir\x\w\llvm-llvm-project\clang\include\clang/Sema/ExternalSemaSource.h:15: In file included from C:\b\s\w\ir\x\w\llvm-llvm-project\clang\include\clang/AST/ExternalASTSource.h:18: In file included from C:\b\s\w\ir\x\w\llvm-llvm-project\clang\include\clang/AST/DeclBase.h:18: In file included from C:\b\s\w\ir\x\w\llvm-llvm-project\clang\include\clang/AST/DeclarationName.h:18: In file included from C:\b\s\w\ir\x\w\llvm-llvm-project\clang\include\clang/Basic/IdentifierTable.h:18: In file included from C:\b\s\w\ir\x\w\llvm-llvm-project\clang\include\clang/Basic/Builtins.h:63: C:\b\s\w\ir\x\w\llvm_build\tools\clang\include\clang/Basic/Builtins.inc(151,1): error: redefinition of enumerator 'BI_alloca' 151 | LANGBUILTIN(_alloca, "v*z", "n", ALL_MS_LANGUAGES) | ^ C:\b\s\w\ir\x\w\llvm_build\tools\clang\include\clang/Basic/Builtins.inc(15,54): note: expanded from macro 'LANGBUILTIN' 15 | # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) | ^ C:\b\s\w\ir\x\w\llvm-llvm-project\clang\include\clang/Basic/Builtins.h(62,34): note: expanded from macro 'BUILTIN' 62 | #define BUILTIN(ID, TYPE, ATTRS) BI##ID, | ^ (72,1): note: expanded from here 72 | BI_alloca | ^ C:\b\s\w\ir\x\w\llvm_build\tools\clang\include\clang/Basic/Builtins.inc(150,1): note: previous definition is here 150 | LIBBUILTIN(alloca, "v*z", "fn", STDLIB_H, ALL_GNU_LANGUAGES) | ^ C:\b\s\w\ir\x\w\llvm_build\tools\clang\include\clang/Basic/Builtins.inc(11,61): note: expanded from macro 'LIBBUILTIN' 11 | # define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) | ^ C:\b\s\w\ir\x\w\llvm-llvm-project\clang\include\clang/Basic/Builtins.h(62,34): note: expanded from macro 'BUILTIN' 62 | #define BUILTIN(ID, TYPE, ATTRS) BI##ID, | ^ (71,1): note: expanded from here 71 | BI_alloca | ^ ``` --- clang/include/clang/Basic/Builtins.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h index 6700d1903a0088..f955d21169556a 100644 --- a/clang/include/clang/Basic/Builtins.h +++ b/clang/include/clang/Basic/Builtins.h @@ -20,6 +20,10 @@ #include "llvm/ADT/StringRef.h" #include +// VC++ defines 'alloca' as an object-like macro, which interferes with our +// builtins. +#undef alloca + namespace clang { class TargetInfo; class IdentifierTable; From e20462a069670c24ff512cca32688a29803852f4 Mon Sep 17 00:00:00 2001 From: Danila Malyutin Date: Tue, 13 Feb 2024 22:58:01 +0400 Subject: [PATCH 039/240] [StatepointLowering] Use Constant instead of TargetConstant for undef value (#81635) Prevents isel errors when trying to lower gc relocate of undef value (which turns into CopyToReg of TargetConstant). Such relocates may occur after DCE (e.g. after GVN removes some dead blocks) if there are not passes like instcombine scheduled after to clean them up. Fixes #80294 --------- Co-authored-by: Matt Arsenault --- .../SelectionDAG/StatepointLowering.cpp | 2 +- .../CodeGen/X86/statepoint-relocate-undef.ll | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/statepoint-relocate-undef.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 1320479915e361..d7f4d1c8937563 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1287,7 +1287,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) { // Lowering relocate(undef) as arbitrary constant. Current constant value // is chosen such that it's unlikely to be a valid pointer. - setValue(&Relocate, DAG.getTargetConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64)); + setValue(&Relocate, DAG.getConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64)); return; } diff --git a/llvm/test/CodeGen/X86/statepoint-relocate-undef.ll b/llvm/test/CodeGen/X86/statepoint-relocate-undef.ll new file mode 100644 index 00000000000000..69e6976c32033b --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-relocate-undef.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare void @use(...) +declare void @f() +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32) + +;; Check that llvm doesn't crash if relocate with undef base/derived ptr survives until isel +define void @test_gcrelocate_undef(ptr addrspace(1) %ptr) gc "statepoint-example" { +; CHECK-LABEL: test_gcrelocate_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, (%rsp) +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: movl $4278124286, %edi # imm = 0xFEFEFEFE +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %tok = tail call token (i64, i32, ptr, i32, i32, ...) + @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @f, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %ptr, ptr addrspace(1) undef), "deopt" (ptr addrspace(1) %ptr, i32 undef)] + %a = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %tok, i32 0, i32 1) + call void (...) @use(ptr addrspace(1) %a) + ret void +} From 9dd2c59312bfae3526cee5e836a6b67b2e9b4989 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 14 Feb 2024 00:28:46 +0530 Subject: [PATCH 040/240] InstCombine: Enable SimplifyDemandedUseFPClass and remove flag (#81108) This completes the unrevert of ef388334ee5a3584255b9ef5b3fefdb244fa3fd7. --- clang/test/Headers/__clang_hip_math.hip | 56 +++++++++++++++---- .../InstCombine/InstructionCombining.cpp | 9 --- .../InstCombine/simplify-demanded-fpclass.ll | 2 +- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index e9a9cb4bb3c746..37099de74fb8ec 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -2557,33 +2557,65 @@ extern "C" __device__ double test_nan(const char *tag) { return nan(tag); } -// CHECK-LABEL: @test_nanf_emptystr( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret float 0x7FF8000000000000 +// DEFAULT-LABEL: @test_nanf_emptystr( +// DEFAULT-NEXT: entry: +// DEFAULT-NEXT: ret float 0x7FF8000000000000 +// +// FINITEONLY-LABEL: @test_nanf_emptystr( +// FINITEONLY-NEXT: entry: +// FINITEONLY-NEXT: ret float poison +// +// APPROX-LABEL: @test_nanf_emptystr( +// APPROX-NEXT: entry: +// APPROX-NEXT: ret float 0x7FF8000000000000 // extern "C" __device__ float test_nanf_emptystr() { return nanf(""); } -// CHECK-LABEL: @test_nan_emptystr( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret double 0x7FF8000000000000 +// DEFAULT-LABEL: @test_nan_emptystr( +// DEFAULT-NEXT: entry: +// DEFAULT-NEXT: ret double 0x7FF8000000000000 +// +// FINITEONLY-LABEL: @test_nan_emptystr( +// FINITEONLY-NEXT: entry: +// FINITEONLY-NEXT: ret double poison +// +// APPROX-LABEL: @test_nan_emptystr( +// APPROX-NEXT: entry: +// APPROX-NEXT: ret double 0x7FF8000000000000 // extern "C" __device__ double test_nan_emptystr() { return nan(""); } -// CHECK-LABEL: @test_nanf_fill( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret float 0x7FF8000000000000 +// DEFAULT-LABEL: @test_nanf_fill( +// DEFAULT-NEXT: entry: +// DEFAULT-NEXT: ret float 0x7FF8000000000000 +// +// FINITEONLY-LABEL: @test_nanf_fill( +// FINITEONLY-NEXT: entry: +// FINITEONLY-NEXT: ret float poison +// +// APPROX-LABEL: @test_nanf_fill( +// APPROX-NEXT: entry: +// APPROX-NEXT: ret float 0x7FF8000000000000 // extern "C" __device__ float test_nanf_fill() { return nanf("0x456"); } -// CHECK-LABEL: @test_nan_fill( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret double 0x7FF8000000000000 +// DEFAULT-LABEL: @test_nan_fill( +// DEFAULT-NEXT: entry: +// DEFAULT-NEXT: ret double 0x7FF8000000000000 +// +// FINITEONLY-LABEL: @test_nan_fill( +// FINITEONLY-NEXT: entry: +// FINITEONLY-NEXT: ret double poison +// +// APPROX-LABEL: @test_nan_fill( +// APPROX-NEXT: entry: +// APPROX-NEXT: ret double 0x7FF8000000000000 // extern "C" __device__ double test_nan_fill() { return nan("0x123"); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index b1e2262fac4794..7450f39c1e7641 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -142,12 +142,6 @@ static cl::opt MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine")); -// TODO: Remove this option -static cl::opt EnableSimplifyDemandedUseFPClass( - "instcombine-simplify-demanded-fp-class", - cl::desc("Enable demanded floating-point class optimizations"), - cl::init(false)); - // FIXME: Remove this flag when it is no longer necessary to convert // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false // increases variable availability at the cost of accuracy. Variables that @@ -3111,9 +3105,6 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { } Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { - if (!EnableSimplifyDemandedUseFPClass) - return nullptr; - Value *RetVal = RI.getReturnValue(); if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType())) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll index dd9b71415bd6d9..5dfeb0734fbbed 100644 --- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll +++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -passes=instcombine -instcombine-simplify-demanded-fp-class < %s | FileCheck %s +; RUN: opt -S -passes=instcombine < %s | FileCheck %s declare float @llvm.fabs.f32(float) declare float @llvm.copysign.f32(float, float) From fc0e9c8315564288f9079a633892abadace534cf Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Tue, 13 Feb 2024 20:04:34 +0100 Subject: [PATCH 041/240] [libc++][modules] Re-add build dir CMakeLists.txt. (#81370) This CMakeLists.txt is used to build modules without build system support. This was removed in d06ae33ec32122bb526fb35025c1f0cf979f1090. This is used in the documentation how to use modules. Made some minor changes to make it work with the std.compat module using the std module. Note the CMakeLists.txt in the build dir should be removed once build system support is generally available. --- libcxx/docs/Modules.rst | 4 ++ libcxx/modules/CMakeLists.txt | 20 ++++++++ libcxx/modules/CMakeLists.txt.in | 88 ++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 libcxx/modules/CMakeLists.txt.in diff --git a/libcxx/docs/Modules.rst b/libcxx/docs/Modules.rst index 533c3fbd2a1eea..ee2b81d3b9e7ca 100644 --- a/libcxx/docs/Modules.rst +++ b/libcxx/docs/Modules.rst @@ -218,9 +218,13 @@ Building this project is done with the following steps, assuming the files $ mkdir build $ cmake -G Ninja -S . -B build -DCMAKE_CXX_COMPILER= -DLIBCXX_BUILD= + $ ninja -j1 std -C build $ ninja -C build $ build/main +.. note:: The ``std`` dependencies of ``std.compat`` is not always resolved when + building the ``std`` target using multiple jobs. + .. warning:: ```` should point point to the real binary and not to a symlink. diff --git a/libcxx/modules/CMakeLists.txt b/libcxx/modules/CMakeLists.txt index 0388c048dacb8b..0dea8cfca94ac3 100644 --- a/libcxx/modules/CMakeLists.txt +++ b/libcxx/modules/CMakeLists.txt @@ -137,6 +137,25 @@ set(LIBCXX_MODULE_STD_COMPAT_SOURCES std.compat/cwctype.inc ) +# TODO MODULES the CMakeLists.txt in the build directory is only temporary. +# This allows using as available in the build directory. Once build systems +# have proper support for the installed files this will be removed. +if ("${LIBCXX_GENERATED_INCLUDE_DIR}" STREQUAL "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}") + # This typically happens when the target is not installed. + set(LIBCXX_CONFIGURED_INCLUDE_DIRS "${LIBCXX_GENERATED_INCLUDE_DIR}") +else() + # It's important that the arch directory be included first so that its header files + # which interpose on the default include dir be included instead of the default ones. + set(LIBCXX_CONFIGURED_INCLUDE_DIRS + "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR};${LIBCXX_GENERATED_INCLUDE_DIR}" + ) +endif() +configure_file( + "CMakeLists.txt.in" + "${LIBCXX_GENERATED_MODULE_DIR}/CMakeLists.txt" + @ONLY +) + set(LIBCXX_MODULE_STD_INCLUDE_SOURCES) foreach(file ${LIBCXX_MODULE_STD_SOURCES}) set( @@ -166,6 +185,7 @@ configure_file( ) set(_all_modules) +list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/CMakeLists.txt") list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/std.cppm") list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/std.compat.cppm") foreach(file ${LIBCXX_MODULE_STD_SOURCES} ${LIBCXX_MODULE_STD_COMPAT_SOURCES}) diff --git a/libcxx/modules/CMakeLists.txt.in b/libcxx/modules/CMakeLists.txt.in new file mode 100644 index 00000000000000..e332d70cc16333 --- /dev/null +++ b/libcxx/modules/CMakeLists.txt.in @@ -0,0 +1,88 @@ +cmake_minimum_required(VERSION 3.26) + +project(libc++-modules LANGUAGES CXX) + +# Enable CMake's module support +if(CMAKE_VERSION VERSION_LESS "3.28.0") + if(CMAKE_VERSION VERSION_LESS "3.27.0") + set(CMAKE_EXPERIMENTAL_CXX_MODULE_CMAKE_API "2182bf5c-ef0d-489a-91da-49dbc3090d2a") + else() + set(CMAKE_EXPERIMENTAL_CXX_MODULE_CMAKE_API "aa1f7df0-828a-4fcd-9afc-2dc80491aca7") + endif() + set(CMAKE_EXPERIMENTAL_CXX_MODULE_DYNDEP 1) +else() + cmake_policy(VERSION 3.28) +endif() + +# Default to C++ extensions being off. Libc++'s modules support have trouble +# with extensions right now. +set(CMAKE_CXX_EXTENSIONS OFF) + +# Propagates the CMake options to the modules. +# +# This uses the std module hard-coded since the std.compat module does not +# depend on these flags. +macro(compile_define_if_not condition def) + if (NOT ${condition}) + target_compile_definitions(std PRIVATE ${def}) + endif() +endmacro() +macro(compile_define_if condition def) + if (${condition}) + target_compile_definitions(std PRIVATE ${def}) + endif() +endmacro() + +### STD + +add_library(std) +target_sources(std + PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES + std.cppm +) + +target_include_directories(std SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@) + +if (NOT @LIBCXX_ENABLE_EXCEPTIONS@) + target_compile_options(std PUBLIC -fno-exceptions) +endif() + +target_compile_options(std + PUBLIC + -nostdinc++ + -Wno-reserved-module-identifier + -Wno-reserved-user-defined-literal + @LIBCXX_COMPILE_FLAGS@ +) +set_target_properties(std + PROPERTIES + OUTPUT_NAME "c++std" +) + +### STD.COMPAT + +add_library(std.compat) +target_sources(std.compat + PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES + std.compat.cppm +) + +target_include_directories(std.compat SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@) + +if (NOT @LIBCXX_ENABLE_EXCEPTIONS@) + target_compile_options(std.compat PUBLIC -fno-exceptions) +endif() + +target_compile_options(std.compat + PUBLIC + -nostdinc++ + -Wno-reserved-module-identifier + -Wno-reserved-user-defined-literal + -fmodule-file=std=${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/std.dir/std.pcm + @LIBCXX_COMPILE_FLAGS@ +) +set_target_properties(std.compat + PROPERTIES + OUTPUT_NAME "c++std.compat" +) +add_dependencies(std.compat std) From a04c6366b156f508cdf84a32ef4484b53a6dabee Mon Sep 17 00:00:00 2001 From: jimingham Date: Tue, 13 Feb 2024 11:06:32 -0800 Subject: [PATCH 042/240] Don't count all the frames just to skip the current inlined ones. (#80918) The algorithm to find the DW_OP_entry_value requires you to find the nearest non-inlined frame. It did that by counting the number of stack frames so that it could use that as a loop stopper. That is unnecessary and inefficient. Unnecessary because GetFrameAtIndex will return a null frame when you step past the oldest frame, so you already have the "got to the end" signal without counting all the stack frames. And counting all the stack frames can be expensive. --- lldb/include/lldb/Target/Thread.h | 7 +++++++ lldb/source/Expression/DWARFExpression.cpp | 7 +++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h index e423dd4a6d2baa..30863ad4c90299 100644 --- a/lldb/include/lldb/Target/Thread.h +++ b/lldb/include/lldb/Target/Thread.h @@ -390,6 +390,13 @@ class Thread : public std::enable_shared_from_this, /// and having the thread call the SystemRuntime again. virtual bool ThreadHasQueueInformation() const { return false; } + /// GetStackFrameCount can be expensive. Stacks can get very deep, and they + /// require memory reads for each frame. So only use GetStackFrameCount when + /// you need to know the depth of the stack. When iterating over frames, its + /// better to generate the frames one by one with GetFrameAtIndex, and when + /// that returns NULL, you are at the end of the stack. That way your loop + /// will only do the work it needs to, without forcing lldb to realize + /// StackFrames you weren't going to look at. virtual uint32_t GetStackFrameCount() { return GetStackFrameList()->GetNumFrames(); } diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index fe4928d4f43a43..c061fd1140fff7 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -608,11 +608,10 @@ static bool Evaluate_DW_OP_entry_value(std::vector &stack, StackFrameSP parent_frame = nullptr; addr_t return_pc = LLDB_INVALID_ADDRESS; uint32_t current_frame_idx = current_frame->GetFrameIndex(); - uint32_t num_frames = thread->GetStackFrameCount(); - for (uint32_t parent_frame_idx = current_frame_idx + 1; - parent_frame_idx < num_frames; ++parent_frame_idx) { + + for (uint32_t parent_frame_idx = current_frame_idx + 1;;parent_frame_idx++) { parent_frame = thread->GetStackFrameAtIndex(parent_frame_idx); - // Require a valid sequence of frames. + // If this is null, we're at the end of the stack. if (!parent_frame) break; From a69ecb2420f644e31f18fcc61a07b3ca627e8939 Mon Sep 17 00:00:00 2001 From: jimingham Date: Tue, 13 Feb 2024 11:09:47 -0800 Subject: [PATCH 043/240] Add the ability to define a Python based command that uses CommandObjectParsed (#70734) This allows you to specify options and arguments and their definitions and then have lldb handle the completions, help, etc. in the same way that lldb does for its parsed commands internally. This feature has some design considerations as well as the code, so I've also set up an RFC, but I did this one first and will put the RFC address in here once I've pushed it... Note, the lldb "ParsedCommand interface" doesn't actually do all the work that it should. For instance, saying the type of an option that has a completer doesn't automatically hook up the completer, and ditto for argument values. We also do almost no work to verify that the arguments match their definition, or do auto-completion for them. This patch allows you to make a command that's bug-for-bug compatible with built-in ones, but I didn't want to stall it on getting the auto-command checking to work all the way correctly. As an overall design note, my primary goal here was to make an interface that worked well in the script language. For that I needed, for instance, to have a property-based way to get all the option values that were specified. It was much more convenient to do that by making a fairly bare-bones C interface to define the options and arguments of a command, and set their values, and then wrap that in a Python class (installed along with the other bits of the lldb python module) which you can then derive from to make your new command. This approach will also make it easier to experiment. See the file test_commands.py in the test case for examples of how this works. --- lldb/bindings/python/CMakeLists.txt | 4 +- lldb/bindings/python/python-wrapper.swig | 31 +- lldb/examples/python/cmdtemplate.py | 129 ++-- lldb/examples/python/templates/parsed_cmd.py | 360 +++++++++ lldb/include/lldb/Interpreter/CommandObject.h | 5 +- .../lldb/Interpreter/ScriptInterpreter.h | 29 + .../source/Commands/CommandObjectCommands.cpp | 729 +++++++++++++++++- lldb/source/Commands/Options.td | 22 +- lldb/source/Interpreter/CommandObject.cpp | 17 + .../Python/PythonDataObjects.h | 2 + .../Python/SWIGPythonBridge.h | 7 + .../Python/ScriptInterpreterPython.cpp | 253 +++++- .../Python/ScriptInterpreterPythonImpl.h | 21 + .../script/add/TestAddParsedCommand.py | 146 ++++ .../command/script/add/test_commands.py | 174 +++++ .../Python/PythonTestSuite.cpp | 8 + 16 files changed, 1831 insertions(+), 106 deletions(-) create mode 100644 lldb/examples/python/templates/parsed_cmd.py create mode 100644 lldb/test/API/commands/command/script/add/TestAddParsedCommand.py create mode 100644 lldb/test/API/commands/command/script/add/test_commands.py diff --git a/lldb/bindings/python/CMakeLists.txt b/lldb/bindings/python/CMakeLists.txt index c941f764dfc92a..73b1239495e22e 100644 --- a/lldb/bindings/python/CMakeLists.txt +++ b/lldb/bindings/python/CMakeLists.txt @@ -96,13 +96,15 @@ function(finish_swig_python swig_target lldb_python_bindings_dir lldb_python_tar ${lldb_python_target_dir} "utils" FILES "${LLDB_SOURCE_DIR}/examples/python/in_call_stack.py" - "${LLDB_SOURCE_DIR}/examples/python/symbolication.py") + "${LLDB_SOURCE_DIR}/examples/python/symbolication.py" + ) create_python_package( ${swig_target} ${lldb_python_target_dir} "plugins" FILES + "${LLDB_SOURCE_DIR}/examples/python/templates/parsed_cmd.py" "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_process.py" "${LLDB_SOURCE_DIR}/examples/python/templates/scripted_platform.py" "${LLDB_SOURCE_DIR}/examples/python/templates/operating_system.py") diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 17bc7b1f219870..1370afc885d43f 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -287,12 +287,12 @@ PythonObject lldb_private::python::SWIGBridge::LLDBSwigPythonCreateScriptedThrea } bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( - void *implementor, const char *method_name, lldb_private::Event *event, + void *implementer, const char *method_name, lldb_private::Event *event, bool &got_error) { got_error = false; PyErr_Cleaner py_err_cleaner(false); - PythonObject self(PyRefType::Borrowed, static_cast(implementor)); + PythonObject self(PyRefType::Borrowed, static_cast(implementer)); auto pfunc = self.ResolveName(method_name); if (!pfunc.IsAllocated()) @@ -325,12 +325,12 @@ bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( } bool lldb_private::python::SWIGBridge::LLDBSWIGPythonCallThreadPlan( - void *implementor, const char *method_name, lldb_private::Stream *stream, + void *implementer, const char *method_name, lldb_private::Stream *stream, bool &got_error) { got_error = false; PyErr_Cleaner py_err_cleaner(false); - PythonObject self(PyRefType::Borrowed, static_cast(implementor)); + PythonObject self(PyRefType::Borrowed, static_cast(implementer)); auto pfunc = self.ResolveName(method_name); if (!pfunc.IsAllocated()) @@ -831,6 +831,29 @@ bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallCommandObject( return true; } +#include "lldb/Interpreter/CommandReturnObject.h" + +bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallParsedCommandObject( + PyObject *implementor, lldb::DebuggerSP debugger, lldb_private::StructuredDataImpl &args_impl, + lldb_private::CommandReturnObject &cmd_retobj, + lldb::ExecutionContextRefSP exe_ctx_ref_sp) { + + PyErr_Cleaner py_err_cleaner(true); + + PythonObject self(PyRefType::Borrowed, implementor); + auto pfunc = self.ResolveName("__call__"); + + if (!pfunc.IsAllocated()) { + cmd_retobj.AppendError("Could not find '__call__' method in implementation class"); + return false; + } + + pfunc(SWIGBridge::ToSWIGWrapper(std::move(debugger)), SWIGBridge::ToSWIGWrapper(args_impl), + SWIGBridge::ToSWIGWrapper(exe_ctx_ref_sp), SWIGBridge::ToSWIGWrapper(cmd_retobj).obj()); + + return true; +} + PythonObject lldb_private::python::SWIGBridge::LLDBSWIGPythonCreateOSPlugin( const char *python_class_name, const char *session_dictionary_name, const lldb::ProcessSP &process_sp) { diff --git a/lldb/examples/python/cmdtemplate.py b/lldb/examples/python/cmdtemplate.py index a3c30f30afea08..49a08365268f83 100644 --- a/lldb/examples/python/cmdtemplate.py +++ b/lldb/examples/python/cmdtemplate.py @@ -11,115 +11,84 @@ import inspect import lldb -import optparse -import shlex import sys +from lldb.plugins.parsed_cmd import ParsedCommand - -class FrameStatCommand: +class FrameStatCommand(ParsedCommand): program = "framestats" @classmethod def register_lldb_command(cls, debugger, module_name): - parser = cls.create_options() - cls.__doc__ = parser.format_help() - # Add any commands contained in this module to LLDB - command = "command script add -o -c %s.%s %s" % ( - module_name, - cls.__name__, - cls.program, - ) - debugger.HandleCommand(command) + ParsedCommandBase.do_register_cmd(cls, debugger, module_name) print( 'The "{0}" command has been installed, type "help {0}" or "{0} ' '--help" for detailed help.'.format(cls.program) ) - @classmethod - def create_options(cls): - usage = "usage: %prog [options]" - description = ( - "This command is meant to be an example of how to make " - "an LLDB command that does something useful, follows " - "best practices, and exploits the SB API. " - "Specifically, this command computes the aggregate " - "and average size of the variables in the current " - "frame and allows you to tweak exactly which variables " - "are to be accounted in the computation." - ) + def setup_command_definition(self): - # Pass add_help_option = False, since this keeps the command in line - # with lldb commands, and we wire up "help command" to work by - # providing the long & short help methods below. - parser = optparse.OptionParser( - description=description, - prog=cls.program, - usage=usage, - add_help_option=False, + self.ov_parser.add_option( + "i", + "in-scope", + help = "in_scope_only = True", + value_type = lldb.eArgTypeBoolean, + dest = "bool_arg", + default = True, ) - parser.add_option( - "-i", - "--in-scope", - action="store_true", - dest="inscope", - help="in_scope_only = True", + self.ov_parser.add_option( + "i", + "in-scope", + help = "in_scope_only = True", + value_type = lldb.eArgTypeBoolean, + dest = "inscope", default=True, ) - - parser.add_option( - "-a", - "--arguments", - action="store_true", - dest="arguments", - help="arguments = True", - default=True, + + self.ov_parser.add_option( + "a", + "arguments", + help = "arguments = True", + value_type = lldb.eArgTypeBoolean, + dest = "arguments", + default = True, ) - parser.add_option( - "-l", - "--locals", - action="store_true", - dest="locals", - help="locals = True", - default=True, + self.ov_parser.add_option( + "l", + "locals", + help = "locals = True", + value_type = lldb.eArgTypeBoolean, + dest = "locals", + default = True, ) - parser.add_option( - "-s", - "--statics", - action="store_true", - dest="statics", - help="statics = True", - default=True, + self.ov_parser.add_option( + "s", + "statics", + help = "statics = True", + value_type = lldb.eArgTypeBoolean, + dest = "statics", + default = True, ) - return parser - def get_short_help(self): return "Example command for use in debugging" def get_long_help(self): - return self.help_string + return ("This command is meant to be an example of how to make " + "an LLDB command that does something useful, follows " + "best practices, and exploits the SB API. " + "Specifically, this command computes the aggregate " + "and average size of the variables in the current " + "frame and allows you to tweak exactly which variables " + "are to be accounted in the computation.") + def __init__(self, debugger, unused): - self.parser = self.create_options() - self.help_string = self.parser.format_help() + super().__init__(debugger, unused) def __call__(self, debugger, command, exe_ctx, result): - # Use the Shell Lexer to properly parse up command options just like a - # shell would - command_args = shlex.split(command) - - try: - (options, args) = self.parser.parse_args(command_args) - except: - # if you don't handle exceptions, passing an incorrect argument to - # the OptionParser will cause LLDB to exit (courtesy of OptParse - # dealing with argument errors by throwing SystemExit) - result.SetError("option parsing failed") - return - # Always get program state from the lldb.SBExecutionContext passed # in as exe_ctx frame = exe_ctx.GetFrame() @@ -128,7 +97,7 @@ def __call__(self, debugger, command, exe_ctx, result): return variables_list = frame.GetVariables( - options.arguments, options.locals, options.statics, options.inscope + self.ov_parser.arguments, self.ov_parser.locals, self.ov_parser.statics, self.ov_parser.inscope ) variables_count = variables_list.GetSize() if variables_count == 0: diff --git a/lldb/examples/python/templates/parsed_cmd.py b/lldb/examples/python/templates/parsed_cmd.py new file mode 100644 index 00000000000000..61ea57c275aae4 --- /dev/null +++ b/lldb/examples/python/templates/parsed_cmd.py @@ -0,0 +1,360 @@ +""" +This module implements a couple of utility classes to make writing +lldb parsed commands more Pythonic. +The way to use it is to make a class for your command that inherits from ParsedCommandBase. +That will make an LLDBOptionValueParser which you will use for your +option definition, and to fetch option values for the current invocation +of your command. Access to the OV parser is through: + +ParsedCommandBase.get_parser() + +Next, implement setup_command_definition() in your new command class, and call: + + self.get_parser().add_option() + +to add all your options. The order doesn't matter for options, lldb will sort them +alphabetically for you when it prints help. + +Similarly you can define the arguments with: + + self.get_parser().add_argument() + +At present, lldb doesn't do as much work as it should verifying arguments, it +only checks that commands that take no arguments don't get passed arguments. + +Then implement the execute function for your command as: + + def __call__(self, debugger, args_list, exe_ctx, result): + +The arguments will be a list of strings. + +You can access the option values using the 'dest' string you passed in when defining the option. +And if you need to know whether a given option was set by the user or not, you can +use the was_set API. + +So for instance, if you have an option whose "dest" is "my_option", then: + + self.get_parser().my_option + +will fetch the value, and: + + self.get_parser().was_set("my_option") + +will return True if the user set this option, and False if it was left at its default +value. + +There are example commands in the lldb testsuite at: + +llvm-project/lldb/test/API/commands/command/script/add/test_commands.py +""" +import inspect +import lldb +import sys +from abc import abstractmethod + +class LLDBOptionValueParser: + """ + This class holds the option definitions for the command, and when + the command is run, you can ask the parser for the current values. """ + + def __init__(self): + # This is a dictionary of dictionaries. The key is the long option + # name, and the value is the rest of the definition. + self.options_dict = {} + self.args_array = [] + + # Some methods to translate common value types. Should return a + # tuple of the value and an error value (True => error) if the + # type can't be converted. These are called internally when the + # command line is parsed into the 'dest' properties, you should + # not need to call them directly. + # FIXME: Need a way to push the conversion error string back to lldb. + @staticmethod + def to_bool(in_value): + error = True + value = False + if type(in_value) != str or len(in_value) == 0: + return (value, error) + + low_in = in_value.lower() + if low_in in ["y", "yes", "t", "true", "1"]: + value = True + error = False + + if not value and low_in in ["n", "no", "f", "false", "0"]: + value = False + error = False + + return (value, error) + + @staticmethod + def to_int(in_value): + #FIXME: Not doing errors yet... + return (int(in_value), False) + + @staticmethod + def to_unsigned(in_value): + # FIXME: find an unsigned converter... + # And handle errors. + return (int(in_value), False) + + translators = { + lldb.eArgTypeBoolean : to_bool, + lldb.eArgTypeBreakpointID : to_unsigned, + lldb.eArgTypeByteSize : to_unsigned, + lldb.eArgTypeCount : to_unsigned, + lldb.eArgTypeFrameIndex : to_unsigned, + lldb.eArgTypeIndex : to_unsigned, + lldb.eArgTypeLineNum : to_unsigned, + lldb.eArgTypeNumLines : to_unsigned, + lldb.eArgTypeNumberPerLine : to_unsigned, + lldb.eArgTypeOffset : to_int, + lldb.eArgTypeThreadIndex : to_unsigned, + lldb.eArgTypeUnsignedInteger : to_unsigned, + lldb.eArgTypeWatchpointID : to_unsigned, + lldb.eArgTypeColumnNum : to_unsigned, + lldb.eArgTypeRecognizerID : to_unsigned, + lldb.eArgTypeTargetID : to_unsigned, + lldb.eArgTypeStopHookID : to_unsigned + } + + @classmethod + def translate_value(cls, value_type, value): + try: + return cls.translators[value_type](value) + except KeyError: + # If we don't have a translator, return the string value. + return (value, False) + + # FIXME: would this be better done on the C++ side? + # The common completers are missing some useful ones. + # For instance there really should be a common Type completer + # And an "lldb command name" completer. + completion_table = { + lldb.eArgTypeAddressOrExpression : lldb.eVariablePathCompletion, + lldb.eArgTypeArchitecture : lldb.eArchitectureCompletion, + lldb.eArgTypeBreakpointID : lldb.eBreakpointCompletion, + lldb.eArgTypeBreakpointIDRange : lldb.eBreakpointCompletion, + lldb.eArgTypeBreakpointName : lldb.eBreakpointNameCompletion, + lldb.eArgTypeClassName : lldb.eSymbolCompletion, + lldb.eArgTypeDirectoryName : lldb.eDiskDirectoryCompletion, + lldb.eArgTypeExpression : lldb.eVariablePathCompletion, + lldb.eArgTypeExpressionPath : lldb.eVariablePathCompletion, + lldb.eArgTypeFilename : lldb.eDiskFileCompletion, + lldb.eArgTypeFrameIndex : lldb.eFrameIndexCompletion, + lldb.eArgTypeFunctionName : lldb.eSymbolCompletion, + lldb.eArgTypeFunctionOrSymbol : lldb.eSymbolCompletion, + lldb.eArgTypeLanguage : lldb.eTypeLanguageCompletion, + lldb.eArgTypePath : lldb.eDiskFileCompletion, + lldb.eArgTypePid : lldb.eProcessIDCompletion, + lldb.eArgTypeProcessName : lldb.eProcessNameCompletion, + lldb.eArgTypeRegisterName : lldb.eRegisterCompletion, + lldb.eArgTypeRunArgs : lldb.eDiskFileCompletion, + lldb.eArgTypeShlibName : lldb.eModuleCompletion, + lldb.eArgTypeSourceFile : lldb.eSourceFileCompletion, + lldb.eArgTypeSymbol : lldb.eSymbolCompletion, + lldb.eArgTypeThreadIndex : lldb.eThreadIndexCompletion, + lldb.eArgTypeVarName : lldb.eVariablePathCompletion, + lldb.eArgTypePlatform : lldb.ePlatformPluginCompletion, + lldb.eArgTypeWatchpointID : lldb.eWatchpointIDCompletion, + lldb.eArgTypeWatchpointIDRange : lldb.eWatchpointIDCompletion, + lldb.eArgTypeModuleUUID : lldb.eModuleUUIDCompletion, + lldb.eArgTypeStopHookID : lldb.eStopHookIDCompletion + } + + @classmethod + def determine_completion(cls, arg_type): + return cls.completion_table.get(arg_type, lldb.eNoCompletion) + + def add_argument_set(self, arguments): + self.args_array.append(arguments) + + def get_option_element(self, long_name): + return self.options_dict.get(long_name, None) + + def is_enum_opt(self, opt_name): + elem = self.get_option_element(opt_name) + if not elem: + return False + return "enum_values" in elem + + def option_parsing_started(self): + """ This makes the ivars for all the "dest" values in the array and gives them + their default values. You should not have to call this by hand, though if + you have some option that needs to do some work when a new command invocation + starts, you can override this to handle your special option. """ + for key, elem in self.options_dict.items(): + elem['_value_set'] = False + try: + object.__setattr__(self, elem["dest"], elem["default"]) + except AttributeError: + # It isn't an error not to have a "dest" variable name, you'll + # just have to manage this option's value on your own. + continue + + def set_enum_value(self, enum_values, input): + """ This sets the value for an enum option, you should not have to call this + by hand. """ + candidates = [] + for candidate in enum_values: + # The enum_values are a two element list of value & help string. + value = candidate[0] + if value.startswith(input): + candidates.append(value) + + if len(candidates) == 1: + return (candidates[0], False) + else: + return (input, True) + + def set_option_value(self, exe_ctx, opt_name, opt_value): + """ This sets a single option value. This will handle most option + value types, but if you have an option that has some complex behavior, + you can override this to implement that behavior, and then pass the + rest of the options to the base class implementation. """ + elem = self.get_option_element(opt_name) + if not elem: + return False + + if "enum_values" in elem: + (value, error) = self.set_enum_value(elem["enum_values"], opt_value) + else: + (value, error) = __class__.translate_value(elem["value_type"], opt_value) + + if error: + return False + + object.__setattr__(self, elem["dest"], value) + elem["_value_set"] = True + return True + + def was_set(self, opt_name): + """ Call this in the __call__ method of your command to determine + whether this option was set on the command line. It is sometimes + useful to know whether an option has the default value because the + user set it explicitly (was_set -> True) or not. """ + + elem = self.get_option_element(opt_name) + if not elem: + return False + try: + return elem["_value_set"] + except AttributeError: + return False + + def add_option(self, short_option, long_option, help, default, + dest = None, required=False, groups = None, + value_type=lldb.eArgTypeNone, completion_type=None, + enum_values=None): + """ + short_option: one character, must be unique, not required + long_option: no spaces, must be unique, required + help: a usage string for this option, will print in the command help + default: the initial value for this option (if it has a value) + dest: the name of the property that gives you access to the value for + this value. Defaults to the long option if not provided. + required: if true, this option must be provided or the command will error out + groups: Which "option groups" does this option belong to + value_type: one of the lldb.eArgType enum values. Some of the common arg + types also have default completers, which will be applied automatically. + completion_type: currently these are values form the lldb.CompletionType enum, I + haven't done custom completions yet. + enum_values: An array of duples: ["element_name", "element_help"]. If provided, + only one of the enum elements is allowed. The value will be the + element_name for the chosen enum element as a string. + """ + if not dest: + dest = long_option + + if not completion_type: + completion_type = self.determine_completion(value_type) + + dict = {"short_option" : short_option, + "required" : required, + "help" : help, + "value_type" : value_type, + "completion_type" : completion_type, + "dest" : dest, + "default" : default} + + if enum_values: + dict["enum_values"] = enum_values + if groups: + dict["groups"] = groups + + self.options_dict[long_option] = dict + + def make_argument_element(self, arg_type, repeat = "optional", groups = None): + element = {"arg_type" : arg_type, "repeat" : repeat} + if groups: + element["groups"] = groups + return element + +class ParsedCommand: + def __init__(self, debugger, unused): + self.debugger = debugger + self.ov_parser = LLDBOptionValueParser() + self.setup_command_definition() + + def get_options_definition(self): + return self.get_parser().options_dict + + def get_flags(self): + return 0 + + def get_args_definition(self): + return self.get_parser().args_array + + # The base class will handle calling these methods + # when appropriate. + + def option_parsing_started(self): + self.get_parser().option_parsing_started() + + def set_option_value(self, exe_ctx, opt_name, opt_value): + return self.get_parser().set_option_value(exe_ctx, opt_name, opt_value) + + def get_parser(self): + """Returns the option value parser for this command. + When defining the command, use the parser to add + argument and option definitions to the command. + When you are in the command callback, the parser + gives you access to the options passes to this + invocation""" + + return self.ov_parser + + # These are the two "pure virtual" methods: + @abstractmethod + def __call__(self, debugger, args_array, exe_ctx, result): + """This is the command callback. The option values are + provided by the 'dest' properties on the parser. + + args_array: This is the list of arguments provided. + exe_ctx: Gives the SBExecutionContext on which the + command should operate. + result: Any results of the command should be + written into this SBCommandReturnObject. + """ + raise NotImplementedError() + + @abstractmethod + def setup_command_definition(self): + """This will be called when your command is added to + the command interpreter. Here is where you add your + options and argument definitions for the command.""" + raise NotImplementedError() + + @staticmethod + def do_register_cmd(cls, debugger, module_name): + """ Add any commands contained in this module to LLDB """ + command = "command script add -o -p -c %s.%s %s" % ( + module_name, + cls.__name__, + cls.program, + ) + debugger.HandleCommand(command) + print( + 'The "{0}" command has been installed, type "help {0}"' + 'for detailed help.'.format(cls.program) + ) diff --git a/lldb/include/lldb/Interpreter/CommandObject.h b/lldb/include/lldb/Interpreter/CommandObject.h index 7b427de0264f75..b99de56f534469 100644 --- a/lldb/include/lldb/Interpreter/CommandObject.h +++ b/lldb/include/lldb/Interpreter/CommandObject.h @@ -224,7 +224,10 @@ class CommandObject : public std::enable_shared_from_this { void GetFormattedCommandArguments(Stream &str, uint32_t opt_set_mask = LLDB_OPT_SET_ALL); - bool IsPairType(ArgumentRepetitionType arg_repeat_type); + static bool IsPairType(ArgumentRepetitionType arg_repeat_type); + + static std::optional + ArgRepetitionFromString(llvm::StringRef string); bool ParseOptions(Args &args, CommandReturnObject &result); diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index b941f6012a117b..932eaa8b8a4a28 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -473,6 +473,14 @@ class ScriptInterpreter : public PluginInterface { return false; } + virtual bool RunScriptBasedParsedCommand( + StructuredData::GenericSP impl_obj_sp, Args& args, + ScriptedCommandSynchronicity synchronicity, + lldb_private::CommandReturnObject &cmd_retobj, Status &error, + const lldb_private::ExecutionContext &exe_ctx) { + return false; + } + virtual bool RunScriptFormatKeyword(const char *impl_function, Process *process, std::string &output, Status &error) { @@ -517,6 +525,27 @@ class ScriptInterpreter : public PluginInterface { dest.clear(); return false; } + + virtual StructuredData::ObjectSP + GetOptionsForCommandObject(StructuredData::GenericSP cmd_obj_sp) { + return {}; + } + + virtual StructuredData::ObjectSP + GetArgumentsForCommandObject(StructuredData::GenericSP cmd_obj_sp) { + return {}; + } + + virtual bool SetOptionValueForCommandObject( + StructuredData::GenericSP cmd_obj_sp, ExecutionContext *exe_ctx, + llvm::StringRef long_option, llvm::StringRef value) { + return false; + } + + virtual void OptionParsingStartedForCommandObject( + StructuredData::GenericSP cmd_obj_sp) { + return; + } virtual uint32_t GetFlagsForCommandObject(StructuredData::GenericSP cmd_obj_sp) { diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index a51e5ab1af30c6..3dfd452b92509d 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -1151,13 +1151,16 @@ class CommandObjectPythonFunction : public CommandObjectRaw { CompletionType m_completion_type = eNoCompletion; }; -class CommandObjectScriptingObject : public CommandObjectRaw { +/// This class implements a "raw" scripted command. lldb does no parsing of the +/// command line, instead passing the line unaltered (except for backtick +/// substitution). +class CommandObjectScriptingObjectRaw : public CommandObjectRaw { public: - CommandObjectScriptingObject(CommandInterpreter &interpreter, - std::string name, - StructuredData::GenericSP cmd_obj_sp, - ScriptedCommandSynchronicity synch, - CompletionType completion_type) + CommandObjectScriptingObjectRaw(CommandInterpreter &interpreter, + std::string name, + StructuredData::GenericSP cmd_obj_sp, + ScriptedCommandSynchronicity synch, + CompletionType completion_type) : CommandObjectRaw(interpreter, name), m_cmd_obj_sp(cmd_obj_sp), m_synchro(synch), m_fetched_help_short(false), m_fetched_help_long(false), m_completion_type(completion_type) { @@ -1168,7 +1171,7 @@ class CommandObjectScriptingObject : public CommandObjectRaw { GetFlags().Set(scripter->GetFlagsForCommandObject(cmd_obj_sp)); } - ~CommandObjectScriptingObject() override = default; + ~CommandObjectScriptingObjectRaw() override = default; void HandleArgumentCompletion(CompletionRequest &request, @@ -1246,6 +1249,699 @@ class CommandObjectScriptingObject : public CommandObjectRaw { CompletionType m_completion_type = eNoCompletion; }; + +/// This command implements a lldb parsed scripted command. The command +/// provides a definition of the options and arguments, and a option value +/// setting callback, and then the command's execution function gets passed +/// just the parsed arguments. +/// Note, implementing a command in Python using these base interfaces is a bit +/// of a pain, but it is much easier to export this low level interface, and +/// then make it nicer on the Python side, than to try to do that in a +/// script language neutral way. +/// So I've also added a base class in Python that provides a table-driven +/// way of defining the options and arguments, which automatically fills the +/// option values, making them available as properties in Python. +/// +class CommandObjectScriptingObjectParsed : public CommandObjectParsed { +private: + class CommandOptions : public Options { + public: + CommandOptions(CommandInterpreter &interpreter, + StructuredData::GenericSP cmd_obj_sp) : m_interpreter(interpreter), + m_cmd_obj_sp(cmd_obj_sp) {} + + ~CommandOptions() override = default; + + Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, + ExecutionContext *execution_context) override { + Status error; + ScriptInterpreter *scripter = + m_interpreter.GetDebugger().GetScriptInterpreter(); + if (!scripter) { + error.SetErrorString("No script interpreter for SetOptionValue."); + return error; + } + if (!m_cmd_obj_sp) { + error.SetErrorString("SetOptionValue called with empty cmd_obj."); + return error; + } + if (!m_options_definition_up) { + error.SetErrorString("SetOptionValue called before options definitions " + "were created."); + return error; + } + // Pass the long option, since you aren't actually required to have a + // short_option, and for those options the index or short option character + // aren't meaningful on the python side. + const char * long_option = + m_options_definition_up.get()[option_idx].long_option; + bool success = scripter->SetOptionValueForCommandObject(m_cmd_obj_sp, + execution_context, long_option, option_arg); + if (!success) + error.SetErrorStringWithFormatv("Error setting option: {0} to {1}", + long_option, option_arg); + return error; + } + + void OptionParsingStarting(ExecutionContext *execution_context) override { + ScriptInterpreter *scripter = + m_interpreter.GetDebugger().GetScriptInterpreter(); + if (!scripter || !m_cmd_obj_sp) + return; + + scripter->OptionParsingStartedForCommandObject(m_cmd_obj_sp); + } + + llvm::ArrayRef GetDefinitions() override { + if (!m_options_definition_up) + return {}; + return llvm::ArrayRef(m_options_definition_up.get(), m_num_options); + } + + static Status ParseUsageMaskFromArray(StructuredData::ObjectSP obj_sp, + size_t counter, uint32_t &usage_mask) { + // If the usage entry is not provided, we use LLDB_OPT_SET_ALL. + // If the usage mask is a UINT, the option belongs to that group. + // If the usage mask is a vector of UINT's, the option belongs to all the + // groups listed. + // If a subelement of the vector is a vector of two ints, then the option + // belongs to the inclusive range from the first to the second element. + Status error; + if (!obj_sp) { + usage_mask = LLDB_OPT_SET_ALL; + return error; + } + + usage_mask = 0; + + StructuredData::UnsignedInteger *uint_val = + obj_sp->GetAsUnsignedInteger(); + if (uint_val) { + // If this is an integer, then this specifies a single group: + uint32_t value = uint_val->GetValue(); + if (value == 0) { + error.SetErrorStringWithFormatv( + "0 is not a valid group for option {0}", counter); + return error; + } + usage_mask = (1 << (value - 1)); + return error; + } + // Otherwise it has to be an array: + StructuredData::Array *array_val = obj_sp->GetAsArray(); + if (!array_val) { + error.SetErrorStringWithFormatv( + "required field is not a array for option {0}", counter); + return error; + } + // This is the array ForEach for accumulating a group usage mask from + // an array of string descriptions of groups. + auto groups_accumulator + = [counter, &usage_mask, &error] + (StructuredData::Object *obj) -> bool { + StructuredData::UnsignedInteger *int_val = obj->GetAsUnsignedInteger(); + if (int_val) { + uint32_t value = int_val->GetValue(); + if (value == 0) { + error.SetErrorStringWithFormatv( + "0 is not a valid group for element {0}", counter); + return false; + } + usage_mask |= (1 << (value - 1)); + return true; + } + StructuredData::Array *arr_val = obj->GetAsArray(); + if (!arr_val) { + error.SetErrorStringWithFormatv( + "Group element not an int or array of integers for element {0}", + counter); + return false; + } + size_t num_range_elem = arr_val->GetSize(); + if (num_range_elem != 2) { + error.SetErrorStringWithFormatv( + "Subranges of a group not a start and a stop for element {0}", + counter); + return false; + } + int_val = arr_val->GetItemAtIndex(0)->GetAsUnsignedInteger(); + if (!int_val) { + error.SetErrorStringWithFormatv("Start element of a subrange of a " + "group not unsigned int for element {0}", counter); + return false; + } + uint32_t start = int_val->GetValue(); + int_val = arr_val->GetItemAtIndex(1)->GetAsUnsignedInteger(); + if (!int_val) { + error.SetErrorStringWithFormatv("End element of a subrange of a group" + " not unsigned int for element {0}", counter); + return false; + } + uint32_t end = int_val->GetValue(); + if (start == 0 || end == 0 || start > end) { + error.SetErrorStringWithFormatv("Invalid subrange of a group: {0} - " + "{1} for element {2}", start, end, counter); + return false; + } + for (uint32_t i = start; i <= end; i++) { + usage_mask |= (1 << (i - 1)); + } + return true; + }; + array_val->ForEach(groups_accumulator); + return error; + } + + + Status SetOptionsFromArray(StructuredData::Dictionary &options) { + Status error; + m_num_options = options.GetSize(); + m_options_definition_up.reset(new OptionDefinition[m_num_options]); + // We need to hand out pointers to contents of these vectors; we reserve + // as much as we'll need up front so they don't get freed on resize... + m_usage_container.reserve(m_num_options); + m_enum_storage.reserve(m_num_options); + m_enum_vector.reserve(m_num_options); + + size_t counter = 0; + size_t short_opt_counter = 0; + // This is the Array::ForEach function for adding option elements: + auto add_element = [this, &error, &counter, &short_opt_counter] + (llvm::StringRef long_option, StructuredData::Object *object) -> bool { + StructuredData::Dictionary *opt_dict = object->GetAsDictionary(); + if (!opt_dict) { + error.SetErrorString("Value in options dictionary is not a dictionary"); + return false; + } + OptionDefinition &option_def = m_options_definition_up.get()[counter]; + + // We aren't exposing the validator yet, set it to null + option_def.validator = nullptr; + // We don't require usage masks, so set it to one group by default: + option_def.usage_mask = 1; + + // Now set the fields of the OptionDefinition Array from the dictionary: + // + // Note that I don't check for unknown fields in the option dictionaries + // so a scriptor can add extra elements that are helpful when they go to + // do "set_option_value" + + // Usage Mask: + StructuredData::ObjectSP obj_sp = opt_dict->GetValueForKey("groups"); + if (obj_sp) { + error = ParseUsageMaskFromArray(obj_sp, counter, + option_def.usage_mask); + if (error.Fail()) + return false; + } + + // Required: + option_def.required = false; + obj_sp = opt_dict->GetValueForKey("required"); + if (obj_sp) { + StructuredData::Boolean *boolean_val = obj_sp->GetAsBoolean(); + if (!boolean_val) { + error.SetErrorStringWithFormatv("'required' field is not a boolean " + "for option {0}", counter); + return false; + } + option_def.required = boolean_val->GetValue(); + } + + // Short Option: + int short_option; + obj_sp = opt_dict->GetValueForKey("short_option"); + if (obj_sp) { + // The value is a string, so pull the + llvm::StringRef short_str = obj_sp->GetStringValue(); + if (short_str.empty()) { + error.SetErrorStringWithFormatv("short_option field empty for " + "option {0}", counter); + return false; + } else if (short_str.size() != 1) { + error.SetErrorStringWithFormatv("short_option field has extra " + "characters for option {0}", counter); + return false; + } + short_option = (int) short_str[0]; + } else { + // If the short option is not provided, then we need a unique value + // less than the lowest printable ASCII character. + short_option = short_opt_counter++; + } + option_def.short_option = short_option; + + // Long Option is the key from the outer dict: + if (long_option.empty()) { + error.SetErrorStringWithFormatv("empty long_option for option {0}", + counter); + return false; + } + auto inserted = g_string_storer.insert(long_option.str()); + option_def.long_option = ((*(inserted.first)).data()); + + // Value Type: + obj_sp = opt_dict->GetValueForKey("value_type"); + if (obj_sp) { + StructuredData::UnsignedInteger *uint_val + = obj_sp->GetAsUnsignedInteger(); + if (!uint_val) { + error.SetErrorStringWithFormatv("Value type must be an unsigned " + "integer"); + return false; + } + uint64_t val_type = uint_val->GetValue(); + if (val_type >= eArgTypeLastArg) { + error.SetErrorStringWithFormatv("Value type {0} beyond the " + "CommandArgumentType bounds", val_type); + return false; + } + option_def.argument_type = (CommandArgumentType) val_type; + option_def.option_has_arg = true; + } else { + option_def.argument_type = eArgTypeNone; + option_def.option_has_arg = false; + } + + // Completion Type: + obj_sp = opt_dict->GetValueForKey("completion_type"); + if (obj_sp) { + StructuredData::UnsignedInteger *uint_val = obj_sp->GetAsUnsignedInteger(); + if (!uint_val) { + error.SetErrorStringWithFormatv("Completion type must be an " + "unsigned integer for option {0}", counter); + return false; + } + uint64_t completion_type = uint_val->GetValue(); + if (completion_type > eCustomCompletion) { + error.SetErrorStringWithFormatv("Completion type for option {0} " + "beyond the CompletionType bounds", completion_type); + return false; + } + option_def.completion_type = (CommandArgumentType) completion_type; + } else + option_def.completion_type = eNoCompletion; + + // Usage Text: + std::string usage_text; + obj_sp = opt_dict->GetValueForKey("help"); + if (!obj_sp) { + error.SetErrorStringWithFormatv("required usage missing from option " + "{0}", counter); + return false; + } + llvm::StringRef usage_stref; + usage_stref = obj_sp->GetStringValue(); + if (usage_stref.empty()) { + error.SetErrorStringWithFormatv("empty usage text for option {0}", + counter); + return false; + } + m_usage_container[counter] = usage_stref.str().c_str(); + option_def.usage_text = m_usage_container[counter].data(); + + // Enum Values: + + obj_sp = opt_dict->GetValueForKey("enum_values"); + if (obj_sp) { + StructuredData::Array *array = obj_sp->GetAsArray(); + if (!array) { + error.SetErrorStringWithFormatv("enum values must be an array for " + "option {0}", counter); + return false; + } + size_t num_elem = array->GetSize(); + size_t enum_ctr = 0; + m_enum_storage[counter] = std::vector(num_elem); + std::vector &curr_elem = m_enum_storage[counter]; + + // This is the Array::ForEach function for adding enum elements: + // Since there are only two fields to specify the enum, use a simple + // two element array with value first, usage second. + // counter is only used for reporting so I pass it by value here. + auto add_enum = [&enum_ctr, &curr_elem, counter, &error] + (StructuredData::Object *object) -> bool { + StructuredData::Array *enum_arr = object->GetAsArray(); + if (!enum_arr) { + error.SetErrorStringWithFormatv("Enum values for option {0} not " + "an array", counter); + return false; + } + size_t num_enum_elements = enum_arr->GetSize(); + if (num_enum_elements != 2) { + error.SetErrorStringWithFormatv("Wrong number of elements: {0} " + "for enum {1} in option {2}", + num_enum_elements, enum_ctr, counter); + return false; + } + // Enum Value: + StructuredData::ObjectSP obj_sp = enum_arr->GetItemAtIndex(0); + llvm::StringRef val_stref = obj_sp->GetStringValue(); + std::string value_cstr_str = val_stref.str().c_str(); + + // Enum Usage: + obj_sp = enum_arr->GetItemAtIndex(1); + if (!obj_sp) { + error.SetErrorStringWithFormatv("No usage for enum {0} in option " + "{1}", enum_ctr, counter); + return false; + } + llvm::StringRef usage_stref = obj_sp->GetStringValue(); + std::string usage_cstr_str = usage_stref.str().c_str(); + curr_elem[enum_ctr] = EnumValueStorage(value_cstr_str, + usage_cstr_str, enum_ctr); + + enum_ctr++; + return true; + }; // end of add_enum + + array->ForEach(add_enum); + if (!error.Success()) + return false; + // We have to have a vector of elements to set in the options, make + // that here: + for (auto &elem : curr_elem) + m_enum_vector[counter].emplace_back(elem.element); + + option_def.enum_values = llvm::ArrayRef(m_enum_vector[counter]); + } + counter++; + return true; + }; // end of add_element + + options.ForEach(add_element); + return error; + } + + private: + struct EnumValueStorage { + EnumValueStorage() { + element.string_value = "value not set"; + element.usage = "usage not set"; + element.value = 0; + } + + EnumValueStorage(std::string in_str_val, std::string in_usage, + size_t in_value) : value(std::move(in_str_val)), usage(std::move(in_usage)) { + SetElement(in_value); + } + + EnumValueStorage(const EnumValueStorage &in) : value(in.value), + usage(in.usage) { + SetElement(in.element.value); + } + + EnumValueStorage &operator=(const EnumValueStorage &in) { + value = in.value; + usage = in.usage; + SetElement(in.element.value); + return *this; + } + + void SetElement(size_t in_value) { + element.value = in_value; + element.string_value = value.data(); + element.usage = usage.data(); + } + + std::string value; + std::string usage; + OptionEnumValueElement element; + }; + // We have to provide char * values for the long option, usage and enum + // values, that's what the option definitions hold. + // The long option strings are quite likely to be reused in other added + // commands, so those are stored in a global set: g_string_storer. + // But the usages are much less likely to be reused, so those are stored in + // a vector in the command instance. It gets resized to the correct size + // and then filled with null-terminated strings in the std::string, so the + // are valid C-strings that won't move around. + // The enum values and descriptions are treated similarly - these aren't + // all that common so it's not worth the effort to dedup them. + size_t m_num_options = 0; + std::unique_ptr m_options_definition_up; + std::vector> m_enum_storage; + std::vector> m_enum_vector; + std::vector m_usage_container; + CommandInterpreter &m_interpreter; + StructuredData::GenericSP m_cmd_obj_sp; + static std::unordered_set g_string_storer; + }; + +public: + static CommandObjectSP Create(CommandInterpreter &interpreter, + std::string name, + StructuredData::GenericSP cmd_obj_sp, + ScriptedCommandSynchronicity synch, + CommandReturnObject &result) { + CommandObjectSP new_cmd_sp(new CommandObjectScriptingObjectParsed( + interpreter, name, cmd_obj_sp, synch)); + + CommandObjectScriptingObjectParsed *parsed_cmd + = static_cast(new_cmd_sp.get()); + // Now check all the failure modes, and report if found. + Status opt_error = parsed_cmd->GetOptionsError(); + Status arg_error = parsed_cmd->GetArgsError(); + + if (opt_error.Fail()) + result.AppendErrorWithFormat("failed to parse option definitions: %s", + opt_error.AsCString()); + if (arg_error.Fail()) + result.AppendErrorWithFormat("%sfailed to parse argument definitions: %s", + opt_error.Fail() ? ", also " : "", + arg_error.AsCString()); + + if (!result.Succeeded()) + return {}; + + return new_cmd_sp; + } + + CommandObjectScriptingObjectParsed(CommandInterpreter &interpreter, + std::string name, + StructuredData::GenericSP cmd_obj_sp, + ScriptedCommandSynchronicity synch) + : CommandObjectParsed(interpreter, name.c_str()), + m_cmd_obj_sp(cmd_obj_sp), m_synchro(synch), + m_options(interpreter, cmd_obj_sp), m_fetched_help_short(false), + m_fetched_help_long(false) { + StreamString stream; + ScriptInterpreter *scripter = GetDebugger().GetScriptInterpreter(); + if (!scripter) { + m_options_error.SetErrorString("No script interpreter"); + return; + } + + // Set the flags: + GetFlags().Set(scripter->GetFlagsForCommandObject(cmd_obj_sp)); + + // Now set up the options definitions from the options: + StructuredData::ObjectSP options_object_sp + = scripter->GetOptionsForCommandObject(cmd_obj_sp); + // It's okay not to have an options dict. + if (options_object_sp) { + // The options come as a dictionary of dictionaries. The key of the + // outer dict is the long option name (since that's required). The + // value holds all the other option specification bits. + StructuredData::Dictionary *options_dict + = options_object_sp->GetAsDictionary(); + // but if it exists, it has to be an array. + if (options_dict) { + m_options_error = m_options.SetOptionsFromArray(*(options_dict)); + // If we got an error don't bother with the arguments... + if (m_options_error.Fail()) + return; + } else { + m_options_error.SetErrorString("Options array not an array"); + return; + } + } + // Then fetch the args. Since the arguments can have usage masks you need + // an array of arrays. + StructuredData::ObjectSP args_object_sp + = scripter->GetArgumentsForCommandObject(cmd_obj_sp); + if (args_object_sp) { + StructuredData::Array *args_array = args_object_sp->GetAsArray(); + if (!args_array) { + m_args_error.SetErrorString("Argument specification is not an array"); + return; + } + size_t counter = 0; + + // This is the Array::ForEach function that handles the + // CommandArgumentEntry arrays one by one: + auto arg_array_adder = [this, &counter] (StructuredData::Object *object) + -> bool { + // This is the Array::ForEach function to add argument entries: + CommandArgumentEntry this_entry; + size_t elem_counter = 0; + auto args_adder = [this, counter, &elem_counter, &this_entry] + (StructuredData::Object *object) -> bool { + // The arguments definition has three fields, the argument type, the + // repeat and the usage mask. + CommandArgumentType arg_type = eArgTypeNone; + ArgumentRepetitionType arg_repetition = eArgRepeatOptional; + uint32_t arg_opt_set_association; + + auto report_error = [this, elem_counter, counter] + (const char *err_txt) -> bool { + m_args_error.SetErrorStringWithFormatv("Element {0} of arguments " + "list element {1}: %s.", elem_counter, counter, err_txt); + return false; + }; + + StructuredData::Dictionary *arg_dict = object->GetAsDictionary(); + if (!arg_dict) { + report_error("is not a dictionary."); + return false; + } + // Argument Type: + StructuredData::ObjectSP obj_sp + = arg_dict->GetValueForKey("arg_type"); + if (obj_sp) { + StructuredData::UnsignedInteger *uint_val + = obj_sp->GetAsUnsignedInteger(); + if (!uint_val) { + report_error("value type must be an unsigned integer"); + return false; + } + uint64_t arg_type_int = uint_val->GetValue(); + if (arg_type_int >= eArgTypeLastArg) { + report_error("value type beyond ArgumentRepetitionType bounds"); + return false; + } + arg_type = (CommandArgumentType) arg_type_int; + } + // Repeat Value: + obj_sp = arg_dict->GetValueForKey("repeat"); + std::optional repeat; + if (obj_sp) { + llvm::StringRef repeat_str = obj_sp->GetStringValue(); + if (repeat_str.empty()) { + report_error("repeat value is empty"); + return false; + } + repeat = ArgRepetitionFromString(repeat_str); + if (!repeat) { + report_error("invalid repeat value"); + return false; + } + arg_repetition = *repeat; + } + + // Usage Mask: + obj_sp = arg_dict->GetValueForKey("groups"); + m_args_error = CommandOptions::ParseUsageMaskFromArray(obj_sp, + counter, arg_opt_set_association); + this_entry.emplace_back(arg_type, arg_repetition, + arg_opt_set_association); + elem_counter++; + return true; + }; + StructuredData::Array *args_array = object->GetAsArray(); + if (!args_array) { + m_args_error.SetErrorStringWithFormatv("Argument definition element " + "{0} is not an array", counter); + } + + args_array->ForEach(args_adder); + if (m_args_error.Fail()) + return false; + if (this_entry.empty()) { + m_args_error.SetErrorStringWithFormatv("Argument definition element " + "{0} is empty", counter); + return false; + } + m_arguments.push_back(this_entry); + counter++; + return true; + }; // end of arg_array_adder + // Here we actually parse the args definition: + args_array->ForEach(arg_array_adder); + } + } + + ~CommandObjectScriptingObjectParsed() override = default; + + Status GetOptionsError() { return m_options_error; } + Status GetArgsError() { return m_args_error; } + bool WantsCompletion() override { return true; } + + bool IsRemovable() const override { return true; } + + ScriptedCommandSynchronicity GetSynchronicity() { return m_synchro; } + + llvm::StringRef GetHelp() override { + if (m_fetched_help_short) + return CommandObjectParsed::GetHelp(); + ScriptInterpreter *scripter = GetDebugger().GetScriptInterpreter(); + if (!scripter) + return CommandObjectParsed::GetHelp(); + std::string docstring; + m_fetched_help_short = + scripter->GetShortHelpForCommandObject(m_cmd_obj_sp, docstring); + if (!docstring.empty()) + SetHelp(docstring); + + return CommandObjectParsed::GetHelp(); + } + + llvm::StringRef GetHelpLong() override { + if (m_fetched_help_long) + return CommandObjectParsed::GetHelpLong(); + + ScriptInterpreter *scripter = GetDebugger().GetScriptInterpreter(); + if (!scripter) + return CommandObjectParsed::GetHelpLong(); + + std::string docstring; + m_fetched_help_long = + scripter->GetLongHelpForCommandObject(m_cmd_obj_sp, docstring); + if (!docstring.empty()) + SetHelpLong(docstring); + return CommandObjectParsed::GetHelpLong(); + } + + Options *GetOptions() override { return &m_options; } + + +protected: + void DoExecute(Args &args, + CommandReturnObject &result) override { + ScriptInterpreter *scripter = GetDebugger().GetScriptInterpreter(); + + Status error; + + result.SetStatus(eReturnStatusInvalid); + + if (!scripter || + !scripter->RunScriptBasedParsedCommand(m_cmd_obj_sp, args, + m_synchro, result, error, m_exe_ctx)) { + result.AppendError(error.AsCString()); + } else { + // Don't change the status if the command already set it... + if (result.GetStatus() == eReturnStatusInvalid) { + if (result.GetOutputData().empty()) + result.SetStatus(eReturnStatusSuccessFinishNoResult); + else + result.SetStatus(eReturnStatusSuccessFinishResult); + } + } + } + +private: + StructuredData::GenericSP m_cmd_obj_sp; + ScriptedCommandSynchronicity m_synchro; + CommandOptions m_options; + Status m_options_error; + Status m_args_error; + bool m_fetched_help_short : 1; + bool m_fetched_help_long : 1; +}; + +std::unordered_set + CommandObjectScriptingObjectParsed::CommandOptions::g_string_storer; + // CommandObjectCommandsScriptImport #define LLDB_OPTIONS_script_import #include "CommandOptions.inc" @@ -1439,6 +2135,9 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, case 'o': m_overwrite_lazy = eLazyBoolYes; break; + case 'p': + m_parsed_command = true; + break; case 's': m_synchronicity = (ScriptedCommandSynchronicity)OptionArgParser::ToOptionEnum( @@ -1474,6 +2173,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, m_completion_type = eNoCompletion; m_overwrite_lazy = eLazyBoolCalculate; m_synchronicity = eScriptedCommandSynchronicitySynchronous; + m_parsed_command = false; } llvm::ArrayRef GetDefinitions() override { @@ -1489,6 +2189,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, ScriptedCommandSynchronicity m_synchronicity = eScriptedCommandSynchronicitySynchronous; CompletionType m_completion_type = eNoCompletion; + bool m_parsed_command = false; }; void IOHandlerActivated(IOHandler &io_handler, bool interactive) override { @@ -1628,10 +2329,16 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, "'{0}'", m_options.m_class_name); return; } - - new_cmd_sp.reset(new CommandObjectScriptingObject( - m_interpreter, m_cmd_name, cmd_obj_sp, m_synchronicity, - m_completion_type)); + + if (m_options.m_parsed_command) { + new_cmd_sp = CommandObjectScriptingObjectParsed::Create(m_interpreter, + m_cmd_name, cmd_obj_sp, m_synchronicity, result); + if (!result.Succeeded()) + return; + } else + new_cmd_sp.reset(new CommandObjectScriptingObjectRaw( + m_interpreter, m_cmd_name, cmd_obj_sp, m_synchronicity, + m_completion_type)); } // Assume we're going to succeed... diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index a87f457105aac0..dd732e35220287 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -805,19 +805,25 @@ let Command = "script add" in { def script_add_function : Option<"function", "f">, Group<1>, Arg<"PythonFunction">, Desc<"Name of the Python function to bind to this command name.">; - def script_add_class : Option<"class", "c">, Group<2>, Arg<"PythonClass">, - Desc<"Name of the Python class to bind to this command name.">; + def script_add_class : Option<"class", "c">, Groups<[2,3]>, + Arg<"PythonClass">, + Desc<"Name of the Python class to bind to this command name.">; def script_add_help : Option<"help", "h">, Group<1>, Arg<"HelpText">, - Desc<"The help text to display for this command.">; - def script_add_overwrite : Option<"overwrite", "o">, Groups<[1,2]>, - Desc<"Overwrite an existing command at this node.">; + Desc<"The help text to display for this command.">; + def script_add_overwrite : Option<"overwrite", "o">, + Desc<"Overwrite an existing command at this node.">; def script_add_synchronicity : Option<"synchronicity", "s">, EnumArg<"ScriptedCommandSynchronicity">, Desc<"Set the synchronicity of this command's executions with regard to " "LLDB event system.">; - def completion_type : Option<"completion-type", "C">, - EnumArg<"CompletionType">, - Desc<"Specify which completion type the command should use - if none is specified, the command won't use auto-completion.">; + def script_add_completion_type : Option<"completion-type", "C">, + Groups<[1,2]>, EnumArg<"CompletionType">, + Desc<"Specify which completion type the command should use - if none is " + "specified, the command won't use auto-completion.">; + def script_add_parsed_command : Option<"parsed", "p">, Group<3>, + Desc<"Make a parsed command. The command class will provide the command " + "definition by implementing get_options and get_arguments.">; + } let Command = "container add" in { diff --git a/lldb/source/Interpreter/CommandObject.cpp b/lldb/source/Interpreter/CommandObject.cpp index 6324c7e701ed54..6ed0fd1f1ddbd9 100644 --- a/lldb/source/Interpreter/CommandObject.cpp +++ b/lldb/source/Interpreter/CommandObject.cpp @@ -447,6 +447,23 @@ bool CommandObject::IsPairType(ArgumentRepetitionType arg_repeat_type) { (arg_repeat_type == eArgRepeatPairRangeOptional); } +std::optional +CommandObject::ArgRepetitionFromString(llvm::StringRef string) { + return llvm::StringSwitch(string) + .Case("plain", eArgRepeatPlain) + .Case("optional", eArgRepeatOptional) + .Case("plus", eArgRepeatPlus) + .Case("star", eArgRepeatStar) + .Case("range", eArgRepeatRange) + .Case("pair-plain", eArgRepeatPairPlain) + .Case("pair-optional", eArgRepeatPairOptional) + .Case("pair-plus", eArgRepeatPairPlus) + .Case("pair-star", eArgRepeatPairStar) + .Case("pair-range", eArgRepeatPairRange) + .Case("pair-range-optional", eArgRepeatPairRangeOptional) + .Default({}); +} + static CommandObject::CommandArgumentEntry OptSetFiltered(uint32_t opt_set_mask, CommandObject::CommandArgumentEntry &cmd_arg_entry) { diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h index 82eee76e42b27a..88c1bb7e729e7f 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h @@ -194,6 +194,8 @@ template struct PassthroughFormat { }; template <> struct PythonFormat : PassthroughFormat {}; +template <> struct PythonFormat : + PassthroughFormat {}; template <> struct PythonFormat : PassthroughFormat {}; template <> struct PythonFormat : PassthroughFormat {}; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h index 7cdd5577919ba8..c1a11b9134d62b 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h @@ -32,6 +32,7 @@ class SBStream; class SBStructuredData; class SBFileSpec; class SBModuleSpec; +class SBStringList; } // namespace lldb namespace lldb_private { @@ -212,6 +213,12 @@ class SWIGBridge { lldb::DebuggerSP debugger, const char *args, lldb_private::CommandReturnObject &cmd_retobj, lldb::ExecutionContextRefSP exe_ctx_ref_sp); + static bool + LLDBSwigPythonCallParsedCommandObject(PyObject *implementor, + lldb::DebuggerSP debugger, + StructuredDataImpl &args_impl, + lldb_private::CommandReturnObject &cmd_retobj, + lldb::ExecutionContextRefSP exe_ctx_ref_sp); static bool LLDBSwigPythonCallModuleInit(const char *python_module_name, const char *session_dictionary_name, diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index ef7a2c128a2207..dadcde612614ba 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -24,6 +24,7 @@ #include "ScriptInterpreterPythonImpl.h" #include "lldb/API/SBError.h" +#include "lldb/API/SBExecutionContext.h" #include "lldb/API/SBFrame.h" #include "lldb/API/SBValue.h" #include "lldb/Breakpoint/StoppointCallbackContext.h" @@ -531,7 +532,6 @@ void ScriptInterpreterPythonImpl::IOHandlerInputComplete(IOHandler &io_handler, break; data_up->user_source.SplitIntoLines(data); - StructuredData::ObjectSP empty_args_sp; if (GenerateBreakpointCommandCallbackData(data_up->user_source, data_up->script_source, /*has_extra_args=*/false, @@ -2766,6 +2766,58 @@ bool ScriptInterpreterPythonImpl::RunScriptBasedCommand( return ret_val; } +bool ScriptInterpreterPythonImpl::RunScriptBasedParsedCommand( + StructuredData::GenericSP impl_obj_sp, Args &args, + ScriptedCommandSynchronicity synchronicity, + lldb_private::CommandReturnObject &cmd_retobj, Status &error, + const lldb_private::ExecutionContext &exe_ctx) { + if (!impl_obj_sp || !impl_obj_sp->IsValid()) { + error.SetErrorString("no function to execute"); + return false; + } + + lldb::DebuggerSP debugger_sp = m_debugger.shared_from_this(); + lldb::ExecutionContextRefSP exe_ctx_ref_sp(new ExecutionContextRef(exe_ctx)); + + if (!debugger_sp.get()) { + error.SetErrorString("invalid Debugger pointer"); + return false; + } + + bool ret_val = false; + + std::string err_msg; + + { + Locker py_lock(this, + Locker::AcquireLock | Locker::InitSession | + (cmd_retobj.GetInteractive() ? 0 : Locker::NoSTDIN), + Locker::FreeLock | Locker::TearDownSession); + + SynchronicityHandler synch_handler(debugger_sp, synchronicity); + + StructuredData::ArraySP args_arr_sp(new StructuredData::Array()); + + for (const Args::ArgEntry &entry : args) { + args_arr_sp->AddStringItem(entry.ref()); + } + StructuredDataImpl args_impl(args_arr_sp); + + ret_val = SWIGBridge::LLDBSwigPythonCallParsedCommandObject( + static_cast(impl_obj_sp->GetValue()), debugger_sp, + args_impl, cmd_retobj, exe_ctx_ref_sp); + } + + if (!ret_val) + error.SetErrorString("unable to execute script function"); + else if (cmd_retobj.GetStatus() == eReturnStatusFailed) + return false; + + error.Clear(); + return ret_val; +} + + /// In Python, a special attribute __doc__ contains the docstring for an object /// (function, method, class, ...) if any is defined Otherwise, the attribute's /// value is None. @@ -2884,6 +2936,205 @@ uint32_t ScriptInterpreterPythonImpl::GetFlagsForCommandObject( return result; } +StructuredData::ObjectSP +ScriptInterpreterPythonImpl::GetOptionsForCommandObject( + StructuredData::GenericSP cmd_obj_sp) { + StructuredData::ObjectSP result = {}; + + Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); + + static char callee_name[] = "get_options_definition"; + + if (!cmd_obj_sp) + return result; + + PythonObject implementor(PyRefType::Borrowed, + (PyObject *)cmd_obj_sp->GetValue()); + + if (!implementor.IsAllocated()) + return result; + + PythonObject pmeth(PyRefType::Owned, + PyObject_GetAttrString(implementor.get(), callee_name)); + + if (PyErr_Occurred()) + PyErr_Clear(); + + if (!pmeth.IsAllocated()) + return result; + + if (PyCallable_Check(pmeth.get()) == 0) { + if (PyErr_Occurred()) + PyErr_Clear(); + return result; + } + + if (PyErr_Occurred()) + PyErr_Clear(); + + PythonDictionary py_return = unwrapOrSetPythonException( + As(implementor.CallMethod(callee_name))); + + // if it fails, print the error but otherwise go on + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + return {}; + } + return py_return.CreateStructuredObject(); +} + +StructuredData::ObjectSP +ScriptInterpreterPythonImpl::GetArgumentsForCommandObject( + StructuredData::GenericSP cmd_obj_sp) { + StructuredData::ObjectSP result = {}; + + Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); + + static char callee_name[] = "get_args_definition"; + + if (!cmd_obj_sp) + return result; + + PythonObject implementor(PyRefType::Borrowed, + (PyObject *)cmd_obj_sp->GetValue()); + + if (!implementor.IsAllocated()) + return result; + + PythonObject pmeth(PyRefType::Owned, + PyObject_GetAttrString(implementor.get(), callee_name)); + + if (PyErr_Occurred()) + PyErr_Clear(); + + if (!pmeth.IsAllocated()) + return result; + + if (PyCallable_Check(pmeth.get()) == 0) { + if (PyErr_Occurred()) + PyErr_Clear(); + return result; + } + + if (PyErr_Occurred()) + PyErr_Clear(); + + PythonList py_return = unwrapOrSetPythonException( + As(implementor.CallMethod(callee_name))); + + // if it fails, print the error but otherwise go on + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + return {}; + } + return py_return.CreateStructuredObject(); +} + +void +ScriptInterpreterPythonImpl::OptionParsingStartedForCommandObject( + StructuredData::GenericSP cmd_obj_sp) { + + Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); + + static char callee_name[] = "option_parsing_started"; + + if (!cmd_obj_sp) + return ; + + PythonObject implementor(PyRefType::Borrowed, + (PyObject *)cmd_obj_sp->GetValue()); + + if (!implementor.IsAllocated()) + return; + + PythonObject pmeth(PyRefType::Owned, + PyObject_GetAttrString(implementor.get(), callee_name)); + + if (PyErr_Occurred()) + PyErr_Clear(); + + if (!pmeth.IsAllocated()) + return; + + if (PyCallable_Check(pmeth.get()) == 0) { + if (PyErr_Occurred()) + PyErr_Clear(); + return; + } + + if (PyErr_Occurred()) + PyErr_Clear(); + + // option_parsing_starting doesn't return anything, ignore anything but + // python errors. + unwrapOrSetPythonException( + As(implementor.CallMethod(callee_name))); + + // if it fails, print the error but otherwise go on + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + return; + } +} + +bool +ScriptInterpreterPythonImpl::SetOptionValueForCommandObject( + StructuredData::GenericSP cmd_obj_sp, ExecutionContext *exe_ctx, + llvm::StringRef long_option, llvm::StringRef value) { + StructuredData::ObjectSP result = {}; + + Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); + + static char callee_name[] = "set_option_value"; + + if (!cmd_obj_sp) + return false; + + PythonObject implementor(PyRefType::Borrowed, + (PyObject *)cmd_obj_sp->GetValue()); + + if (!implementor.IsAllocated()) + return false; + + PythonObject pmeth(PyRefType::Owned, + PyObject_GetAttrString(implementor.get(), callee_name)); + + if (PyErr_Occurred()) + PyErr_Clear(); + + if (!pmeth.IsAllocated()) + return false; + + if (PyCallable_Check(pmeth.get()) == 0) { + if (PyErr_Occurred()) + PyErr_Clear(); + return false; + } + + if (PyErr_Occurred()) + PyErr_Clear(); + + lldb::ExecutionContextRefSP exe_ctx_ref_sp; + if (exe_ctx) + exe_ctx_ref_sp.reset(new ExecutionContextRef(exe_ctx)); + PythonObject ctx_ref_obj = SWIGBridge::ToSWIGWrapper(exe_ctx_ref_sp); + + bool py_return = unwrapOrSetPythonException( + As(implementor.CallMethod(callee_name, ctx_ref_obj, long_option.str().c_str(), + value.str().c_str()))); + + // if it fails, print the error but otherwise go on + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + return false; + } + return py_return; +} + bool ScriptInterpreterPythonImpl::GetLongHelpForCommandObject( StructuredData::GenericSP cmd_obj_sp, std::string &dest) { dest.clear(); diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index a33499816d8d38..fcd21dff612b1e 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -182,6 +182,13 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { lldb_private::CommandReturnObject &cmd_retobj, Status &error, const lldb_private::ExecutionContext &exe_ctx) override; + virtual bool RunScriptBasedParsedCommand( + StructuredData::GenericSP impl_obj_sp, Args& args, + ScriptedCommandSynchronicity synchronicity, + lldb_private::CommandReturnObject &cmd_retobj, Status &error, + const lldb_private::ExecutionContext &exe_ctx) override; + + Status GenerateFunction(const char *signature, const StringList &input, bool is_callback) override; @@ -212,6 +219,20 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { bool GetLongHelpForCommandObject(StructuredData::GenericSP cmd_obj_sp, std::string &dest) override; + + StructuredData::ObjectSP + GetOptionsForCommandObject(StructuredData::GenericSP cmd_obj_sp) override; + + StructuredData::ObjectSP + GetArgumentsForCommandObject(StructuredData::GenericSP cmd_obj_sp) override; + + bool SetOptionValueForCommandObject(StructuredData::GenericSP cmd_obj_sp, + ExecutionContext *exe_ctx, + llvm::StringRef long_option, + llvm::StringRef value) override; + + void OptionParsingStartedForCommandObject( + StructuredData::GenericSP cmd_obj_sp) override; bool CheckObjectExists(const char *name) override { if (!name || !name[0]) diff --git a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py new file mode 100644 index 00000000000000..7dba9c6937f211 --- /dev/null +++ b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py @@ -0,0 +1,146 @@ +""" +Test option and argument definitions in parsed script commands +""" + + +import sys +import os +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + + +class ParsedCommandTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def test(self): + self.pycmd_tests() + + def check_help_options(self, cmd_name, opt_list, substrs = []): + """ + Pass the command name in cmd_name and a vector of the short option, type & long option. + This will append the checks for all the options and test "help command". + Any strings already in substrs will also be checked. + Any element in opt list that begin with "+" will be added to the checked strings as is. + """ + for elem in opt_list: + if elem[0] == "+": + substrs.append(elem[1:]) + else: + (short_opt, type, long_opt) = elem + substrs.append(f"-{short_opt} <{type}> ( --{long_opt} <{type}> )") + print(f"Opt Vec\n{substrs}") + self.expect("help " + cmd_name, substrs = substrs) + + def pycmd_tests(self): + source_dir = self.getSourceDir() + test_file_path = os.path.join(source_dir, "test_commands.py") + self.runCmd("command script import " + test_file_path) + self.expect("help", substrs = ["no-args", "one-arg-no-opt", "two-args"]) + + # Test that we did indeed add these commands as user commands: + + # This is the function to remove the custom commands in order to have a + # clean slate for the next test case. + def cleanup(): + self.runCmd("command script delete no-args one-arg-no-opt two-args", check=False) + + # Execute the cleanup function during test case tear down. + self.addTearDownHook(cleanup) + + # First test the no arguments command. Make sure the help is right: + no_arg_opts = [["b", "boolean", "bool-arg"], + "+a boolean arg, defaults to True", + ["d", "filename", "disk-file-name"], + "+An on disk filename", + ["e", "none", "enum-option"], + "+An enum, doesn't actually do anything", + "+Values: foo | bar | baz", + ["l", "linenum", "line-num"], + "+A line number", + ["s", "shlib-name", "shlib-name"], + "+A shared library name"] + substrs = ["Example command for use in debugging", + "Syntax: no-args "] + + self.check_help_options("no-args", no_arg_opts, substrs) + + # Make sure the command doesn't accept arguments: + self.expect("no-args an-arg", substrs=["'no-args' doesn't take any arguments."], + error=True) + + # Try setting the bool with the wrong value: + self.expect("no-args -b Something", + substrs=["Error setting option: bool-arg to Something"], + error=True) + # Try setting the enum to an illegal value as well: + self.expect("no-args --enum-option Something", + substrs=["error: Error setting option: enum-option to Something"], + error=True) + + # Check some of the command groups: + self.expect("no-args -b true -s Something -l 10", + substrs=["error: invalid combination of options for the given command"], + error=True) + + # Now set the bool arg correctly, note only the first option was set: + self.expect("no-args -b true", substrs=["bool-arg (set: True): True", + "shlib-name (set: False):", + "disk-file-name (set: False):", + "line-num (set: False):", + "enum-option (set: False):"]) + + # Now set the enum arg correctly, note only the first option was set: + self.expect("no-args -e foo", substrs=["bool-arg (set: False):", + "shlib-name (set: False):", + "disk-file-name (set: False):", + "line-num (set: False):", + "enum-option (set: True): foo"]) + # Try a pair together: + self.expect("no-args -b false -s Something", substrs=["bool-arg (set: True): False", + "shlib-name (set: True): Something", + "disk-file-name (set: False):", + "line-num (set: False):", + "enum-option (set: False):"]) + + # Next try some completion tests: + + interp = self.dbg.GetCommandInterpreter() + matches = lldb.SBStringList() + descriptions = lldb.SBStringList() + + # First try an enum completion: + num_completions = interp.HandleCompletionWithDescriptions("no-args -e f", 12, 0, + 1000, matches, descriptions) + self.assertEqual(num_completions, 1, "Only one completion for foo") + self.assertEqual(matches.GetSize(), 2, "The first element is the complete additional text") + self.assertEqual(matches.GetStringAtIndex(0), "oo ", "And we got the right extra characters") + self.assertEqual(matches.GetStringAtIndex(1), "foo", "And we got the right match") + self.assertEqual(descriptions.GetSize(), 2, "descriptions matche the return length") + # FIXME: we don't return descriptions for enum elements + #self.assertEqual(descriptions.GetStringAtIndex(1), "does foo things", "And we got the right description") + + # Now try an internal completer, the on disk file one is handy: + partial_name = os.path.join(source_dir, "test_") + cmd_str = f"no-args -d '{partial_name}'" + + matches.Clear() + descriptions.Clear() + num_completions = interp.HandleCompletionWithDescriptions(cmd_str, len(cmd_str) - 1, 0, + 1000, matches, descriptions) + print(f"First: {matches.GetStringAtIndex(0)}\nSecond: {matches.GetStringAtIndex(1)}\nThird: {matches.GetStringAtIndex(2)}") + self.assertEqual(num_completions, 1, "Only one completion for source file") + self.assertEqual(matches.GetSize(), 2, "The first element is the complete line") + self.assertEqual(matches.GetStringAtIndex(0), "commands.py' ", "And we got the right extra characters") + self.assertEqual(matches.GetStringAtIndex(1), test_file_path, "And we got the right match") + self.assertEqual(descriptions.GetSize(), 2, "descriptions match the return length") + # FIXME: we don't return descriptions for enum elements + #self.assertEqual(descriptions.GetStringAtIndex(1), "does foo things", "And we got the right description") + + # Try a command with arguments. + # FIXME: It should be enough to define an argument and it's type to get the completer + # wired up for that argument type if it is a known type. But that isn't wired up in the + # command parser yet, so I don't have any tests for that. We also don't currently check + # that the arguments passed match the argument specifications, so here I just pass a couple + # sets of arguments and make sure we get back what we put in: + self.expect("two-args 'First Argument' 'Second Argument'", substrs=["0: First Argument", "1: Second Argument"]) diff --git a/lldb/test/API/commands/command/script/add/test_commands.py b/lldb/test/API/commands/command/script/add/test_commands.py new file mode 100644 index 00000000000000..801d58814ac373 --- /dev/null +++ b/lldb/test/API/commands/command/script/add/test_commands.py @@ -0,0 +1,174 @@ +""" +Test defining commands using the lldb command definitions +""" +import inspect +import sys +import lldb +from lldb.plugins.parsed_cmd import ParsedCommand + +class ReportingCmd(ParsedCommand): + def __init__(self, debugger, unused): + super().__init__(debugger, unused) + + def __call__(self, debugger, args_array, exe_ctx, result): + opt_def = self.get_options_definition() + if len(opt_def): + result.AppendMessage("Options:\n") + for long_option, elem in opt_def.items(): + dest = elem["dest"] + result.AppendMessage(f"{long_option} (set: {elem['_value_set']}): {object.__getattribute__(self.ov_parser, dest)}\n") + else: + result.AppendMessage("No options\n") + + num_args = args_array.GetSize() + if num_args > 0: + result.AppendMessage(f"{num_args} arguments:") + for idx in range(0,num_args): + result.AppendMessage(f"{idx}: {args_array.GetItemAtIndex(idx).GetStringValue(10000)}\n") + +class NoArgsCommand(ReportingCmd): + program = "no-args" + + def __init__(self, debugger, unused): + super().__init__(debugger, unused) + + @classmethod + def register_lldb_command(cls, debugger, module_name): + ParsedCommand.do_register_cmd(cls, debugger, module_name) + + def setup_command_definition(self): + self.ov_parser.add_option( + "b", + "bool-arg", + "a boolean arg, defaults to True", + value_type = lldb.eArgTypeBoolean, + groups = [1,2], + dest = "bool_arg", + default = True + ) + + self.ov_parser.add_option( + "s", + "shlib-name", + "A shared library name.", + value_type=lldb.eArgTypeShlibName, + groups = [1, [3,4]], + dest = "shlib_name", + default = None + ) + + self.ov_parser.add_option( + "d", + "disk-file-name", + "An on disk filename", + value_type = lldb.eArgTypeFilename, + dest = "disk_file_name", + default = None + ) + + self.ov_parser.add_option( + "l", + "line-num", + "A line number", + value_type = lldb.eArgTypeLineNum, + groups = 3, + dest = "line_num", + default = 0 + ) + + self.ov_parser.add_option( + "e", + "enum-option", + "An enum, doesn't actually do anything", + enum_values = [["foo", "does foo things"], + ["bar", "does bar things"], + ["baz", "does baz things"]], + groups = 4, + dest = "enum_option", + default = "foo" + ) + + def get_short_help(self): + return "Example command for use in debugging" + + def get_long_help(self): + return self.help_string + +class OneArgCommandNoOptions(ReportingCmd): + program = "one-arg-no-opt" + + def __init__(self, debugger, unused): + super().__init__(debugger, unused) + + @classmethod + def register_lldb_command(cls, debugger, module_name): + ParsedCommand.do_register_cmd(cls, debugger, module_name) + + def setup_command_definition(self): + self.ov_parser.add_argument_set([self.ov_parser.make_argument_element(lldb.eArgTypeSourceFile, "plain")]) + + def get_short_help(self): + return "Example command for use in debugging" + + def get_long_help(self): + return self.help_string + +class TwoArgGroupsCommand(ReportingCmd): + program = "two-args" + + def __init__(self, debugger, unused): + super().__init__(debugger, unused) + + @classmethod + def register_lldb_command(cls, debugger, module_name): + ParsedCommand.do_register_cmd(cls, debugger, module_name) + + def setup_command_definition(self): + self.ov_parser.add_option( + "l", + "language", + "language defaults to None", + value_type = lldb.eArgTypeLanguage, + groups = [1,2], + dest = "language", + default = None + ) + + self.ov_parser.add_option( + "c", + "log-channel", + "log channel - defaults to lldb", + value_type=lldb.eArgTypeLogChannel, + groups = [1, 3], + dest = "log_channel", + default = "lldb" + ) + + self.ov_parser.add_option( + "p", + "process-name", + "A process name, defaults to None", + value_type = lldb.eArgTypeProcessName, + dest = "proc_name", + default = None + ) + + self.ov_parser.add_argument_set([self.ov_parser.make_argument_element(lldb.eArgTypeClassName, "plain", [1,2]), + self.ov_parser.make_argument_element(lldb.eArgTypeOffset, "optional", [1,2])]) + + self.ov_parser.add_argument_set([self.ov_parser.make_argument_element(lldb.eArgTypePythonClass, "plain", [3,4]), + self.ov_parser.make_argument_element(lldb.eArgTypePid, "optional", [3,4])]) + + def get_short_help(self): + return "Example command for use in debugging" + + def get_long_help(self): + return self.help_string + +def __lldb_init_module(debugger, dict): + # Register all classes that have a register_lldb_command method + for _name, cls in inspect.getmembers(sys.modules[__name__]): + if inspect.isclass(cls) and callable( + getattr(cls, "register_lldb_command", None) + ): + cls.register_lldb_command(debugger, __name__) diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index 7f3359f6bf26b2..5f0cc4c23db7b2 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -218,6 +218,14 @@ bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallCommandObject( return false; } +bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallParsedCommandObject( + PyObject *implementor, lldb::DebuggerSP debugger, + StructuredDataImpl &args_impl, + lldb_private::CommandReturnObject &cmd_retobj, + lldb::ExecutionContextRefSP exe_ctx_ref_sp) { + return false; +} + bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallModuleInit( const char *python_module_name, const char *session_dictionary_name, lldb::DebuggerSP debugger) { From be9f8ffd8141f7cadf04b1bef5dfc65a7b8a7fd7 Mon Sep 17 00:00:00 2001 From: David Truby Date: Tue, 13 Feb 2024 19:13:54 +0000 Subject: [PATCH 044/240] [mlir][flang][openmp] Rework wsloop reduction operations (#80019) This patch reworks the way that wsloop reduction operations function to better match the expected semantics from the OpenMP specification, following the rework of parallel reductions. The new semantics create a private reduction variable as a block argument which should be used normally for all operations on that variable in the region; this private variable is then combined with the others into the shared variable. This way no special omp.reduction operations are needed inside the region. These block arguments follow the loop control block arguments. --------- Co-authored-by: Kiran Chandramohan --- flang/lib/Lower/OpenMP.cpp | 66 ++- .../Fir/convert-to-llvm-openmp-and-fir.fir | 20 +- .../Lower/OpenMP/FIR/wsloop-reduction-add.f90 | 432 +++++++++------ .../OpenMP/FIR/wsloop-reduction-iand.f90 | 8 +- .../OpenMP/FIR/wsloop-reduction-ieor.f90 | 8 +- .../Lower/OpenMP/FIR/wsloop-reduction-ior.f90 | 6 +- .../FIR/wsloop-reduction-logical-and.f90 | 137 ----- .../FIR/wsloop-reduction-logical-eqv.f90 | 231 ++++---- .../FIR/wsloop-reduction-logical-neqv.f90 | 233 ++++---- .../FIR/wsloop-reduction-logical-or.f90 | 137 ----- .../Lower/OpenMP/FIR/wsloop-reduction-max.f90 | 13 +- .../Lower/OpenMP/FIR/wsloop-reduction-min.f90 | 12 +- .../Lower/OpenMP/FIR/wsloop-reduction-mul.f90 | 274 ---------- flang/test/Lower/OpenMP/default-clause.f90 | 2 +- .../OpenMP/wsloop-reduction-add-hlfir.f90 | 65 ++- .../Lower/OpenMP/wsloop-reduction-add.f90 | 499 +++++++++++------- .../Lower/OpenMP/wsloop-reduction-iand.f90 | 64 ++- .../Lower/OpenMP/wsloop-reduction-ieor.f90 | 9 +- .../Lower/OpenMP/wsloop-reduction-ior.f90 | 65 ++- .../OpenMP/wsloop-reduction-logical-and.f90 | 255 +++++---- .../OpenMP/wsloop-reduction-logical-eqv.f90 | 249 +++++---- .../OpenMP/wsloop-reduction-logical-neqv.f90 | 254 +++++---- .../OpenMP/wsloop-reduction-logical-or.f90 | 251 +++++---- .../Lower/OpenMP/wsloop-reduction-max-2.f90 | 3 +- .../OpenMP/wsloop-reduction-max-hlfir.f90 | 60 ++- .../Lower/OpenMP/wsloop-reduction-max.f90 | 153 ++++-- .../Lower/OpenMP/wsloop-reduction-min.f90 | 155 ++++-- .../Lower/OpenMP/wsloop-reduction-mul.f90 | 468 +++++++++------- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 21 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 36 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 101 +++- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 38 +- .../OpenMPToLLVM/convert-to-llvmir.mlir | 14 +- .../Conversion/SCFToOpenMP/reductions.mlir | 30 +- mlir/test/Dialect/OpenMP/invalid.mlir | 35 +- mlir/test/Dialect/OpenMP/ops.mlir | 26 +- mlir/test/Target/LLVMIR/openmp-reduction.mlir | 44 +- 37 files changed, 2477 insertions(+), 1997 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 delete mode 100644 flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 delete mode 100644 flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 06850bebd7d05a..f7f80ca9c62ee0 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -3352,6 +3352,57 @@ genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, return args; } +static llvm::SmallVector +genLoopAndReductionVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + const llvm::SmallVector &loopArgs, + const llvm::SmallVector &reductionArgs, + llvm::SmallVector &reductionTypes) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + llvm::SmallVector blockArgTypes; + llvm::SmallVector blockArgLocs; + blockArgTypes.reserve(loopArgs.size() + reductionArgs.size()); + blockArgLocs.reserve(blockArgTypes.size()); + mlir::Block *entryBlock; + + if (loopArgs.size()) { + std::size_t loopVarTypeSize = 0; + for (const Fortran::semantics::Symbol *arg : loopArgs) + loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(), + loopVarType); + std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc); + } + if (reductionArgs.size()) { + llvm::copy(reductionTypes, std::back_inserter(blockArgTypes)); + std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc); + } + entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes, + blockArgLocs); + // The argument is not currently in memory, so make a temporary for the + // argument, and store it there, then bind that location to the argument. + if (loopArgs.size()) { + mlir::Operation *storeOp = nullptr; + for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) { + mlir::Value indexVal = + fir::getBase(op->getRegion(0).front().getArgument(argIndex)); + storeOp = + createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); + } + firOpBuilder.setInsertionPointAfter(storeOp); + } + // Bind the reduction arguments to their block arguments + for (auto [arg, prv] : llvm::zip_equal( + reductionArgs, + llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) { + converter.bindSymbol(*arg, prv); + } + + return loopArgs; +} + static void createSimdLoop(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, @@ -3429,6 +3480,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter, llvm::SmallVector linearVars, linearStepVars; llvm::SmallVector iv; llvm::SmallVector reductionDeclSymbols; + llvm::SmallVector reductionSymbols; mlir::omp::ClauseOrderKindAttr orderClauseOperand; mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand; mlir::UnitAttr nowaitClauseOperand, scheduleSimdClauseOperand; @@ -3440,7 +3492,8 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter, cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv, loopVarTypeSize); cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); - cp.processReduction(loc, reductionVars, reductionDeclSymbols); + cp.processReduction(loc, reductionVars, reductionDeclSymbols, + &reductionSymbols); cp.processTODO(loc, ompDirective); @@ -3484,14 +3537,20 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter, auto *nestedEval = getCollapsedLoopEval( eval, Fortran::lower::getCollapseValue(beginClauseList)); + llvm::SmallVector reductionTypes; + reductionTypes.reserve(reductionVars.size()); + llvm::transform(reductionVars, std::back_inserter(reductionTypes), + [](mlir::Value v) { return v.getType(); }); + auto ivCallback = [&](mlir::Operation *op) { - return genLoopVars(op, converter, loc, iv); + return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols, reductionTypes); }; createBodyOfOp( wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) .setClauses(&beginClauseList) .setDataSharingProcessor(&dsp) + .setReductions(&reductionSymbols, &reductionTypes) .setGenRegionEntryCb(ivCallback)); } @@ -3594,12 +3653,11 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, // 2.9.3.1 SIMD construct createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, currentLocation); + genOpenMPReduction(converter, semaCtx, loopOpClauseList); } else { createWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, endClauseList, currentLocation); } - - genOpenMPReduction(converter, semaCtx, loopOpClauseList); } static void diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index beb399ec3ac05e..a1fc614334dbce 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -701,10 +701,17 @@ func.func @_QPsb() { // CHECK-SAME: %[[ARRAY_REF:.*]]: !llvm.ptr // CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %2 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr // CHECK: omp.parallel { -// CHECK: omp.wsloop reduction(@[[EQV_REDUCTION]] -> %[[RED_ACCUMULATOR]] : !llvm.ptr) for +// CHECK: omp.wsloop reduction(@[[EQV_REDUCTION]] %[[RED_ACCUMULATOR]] -> %[[PRV:.+]] : !llvm.ptr) for // CHECK: %[[ARRAY_ELEM_REF:.*]] = llvm.getelementptr %[[ARRAY_REF]][0, %{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[ARRAY_ELEM:.*]] = llvm.load %[[ARRAY_ELEM_REF]] : !llvm.ptr -> i32 -// CHECK: omp.reduction %[[ARRAY_ELEM]], %[[RED_ACCUMULATOR]] : i32, !llvm.ptr +// CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32 +// CHECK: %[[ZERO_1:.*]] = llvm.mlir.constant(0 : i64) : i32 +// CHECK: %[[ARGVAL_1:.*]] = llvm.icmp "ne" %[[LPRV]], %[[ZERO_1]] : i32 +// CHECK: %[[ZERO_2:.*]] = llvm.mlir.constant(0 : i64) : i32 +// CHECK: %[[ARGVAL_2:.*]] = llvm.icmp "ne" %[[ARRAY_ELEM]], %[[ZERO_2]] : i32 +// CHECK: %[[RES:.*]] = llvm.icmp "eq" %[[ARGVAL_2]], %[[ARGVAL_1]] : i1 +// CHECK: %[[RES_EXT:.*]] = llvm.zext %[[RES]] : i1 to i32 +// CHECK: llvm.store %[[RES_EXT]], %[[PRV]] : i32, !llvm.ptr // CHECK: omp.yield // CHECK: omp.terminator // CHECK: llvm.return @@ -733,7 +740,7 @@ func.func @_QPsimple_reduction(%arg0: !fir.ref>> %c1_i32 = arith.constant 1 : i32 %c100_i32 = arith.constant 100 : i32 %c1_i32_0 = arith.constant 1 : i32 - omp.wsloop reduction(@eqv_reduction -> %1 : !fir.ref>) for (%arg1) : i32 = (%c1_i32) to (%c100_i32) inclusive step (%c1_i32_0) { + omp.wsloop reduction(@eqv_reduction %1 -> %prv : !fir.ref>) for (%arg1) : i32 = (%c1_i32) to (%c100_i32) inclusive step (%c1_i32_0) { fir.store %arg1 to %3 : !fir.ref %4 = fir.load %3 : !fir.ref %5 = fir.convert %4 : (i32) -> i64 @@ -741,7 +748,12 @@ func.func @_QPsimple_reduction(%arg0: !fir.ref>> %6 = arith.subi %5, %c1_i64 : i64 %7 = fir.coordinate_of %arg0, %6 : (!fir.ref>>, i64) -> !fir.ref> %8 = fir.load %7 : !fir.ref> - omp.reduction %8, %1 : !fir.logical<4>, !fir.ref> + %lprv = fir.load %prv : !fir.ref> + %lprv1 = fir.convert %lprv : (!fir.logical<4>) -> i1 + %9 = fir.convert %8 : (!fir.logical<4>) -> i1 + %10 = arith.cmpi eq, %9, %lprv1 : i1 + %11 = fir.convert %10 : (i1) -> !fir.logical<4> + fir.store %11 to %prv : !fir.ref> omp.yield } omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 index 62d9af31588e9e..5664529416fe87 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 @@ -1,66 +1,79 @@ ! RUN: bbc -emit-fir -hlfir=false -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_F64_NAME:.*]] : f64 init { -!CHECK: ^bb0(%{{.*}}: f64): -!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f64 -!CHECK: omp.yield(%[[C0_1]] : f64) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: f64, %[[ARG1:.*]]: f64): -!CHECK: %[[RES:.*]] = arith.addf %[[ARG0]], %[[ARG1]] {{.*}}: f64 -!CHECK: omp.yield(%[[RES]] : f64) -!CHECK: } +! The script is designed to make adding checks to +! a test case fast, it is *not* designed to be authoritative +! about what constitutes a good test! The CHECK should be +! minimized and named to reflect the test intent. -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init { -!CHECK: ^bb0(%{{.*}}: i64): -!CHECK: %[[C0_1:.*]] = arith.constant 0 : i64 -!CHECK: omp.yield(%[[C0_1]] : i64) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64): -!CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i64 -!CHECK: omp.yield(%[[RES]] : i64) -!CHECK: } +! CHECK-LABEL: omp.reduction.declare @add_reduction_f_64 : f64 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: f64): +! CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 +! CHECK: omp.yield(%[[VAL_1]] : f64) -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_F32_NAME:.*]] : f32 init { -!CHECK: ^bb0(%{{.*}}: f32): -!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32 -!CHECK: omp.yield(%[[C0_1]] : f32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): -!CHECK: %[[RES:.*]] = arith.addf %[[ARG0]], %[[ARG1]] {{.*}}: f32 -!CHECK: omp.yield(%[[RES]] : f32) -!CHECK: } +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: f64, %[[VAL_1:.*]]: f64): +! CHECK: %[[VAL_2:.*]] = arith.addf %[[VAL_0]], %[[VAL_1]] fastmath : f64 +! CHECK: omp.yield(%[[VAL_2]] : f64) +! CHECK: } -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init { -!CHECK: ^bb0(%{{.*}}: i32): -!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32 -!CHECK: omp.yield(%[[C0_1]] : i32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -!CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32 -!CHECK: omp.yield(%[[RES]] : i32) -!CHECK: } +! CHECK-LABEL: omp.reduction.declare @add_reduction_i_64 : i64 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i64): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i64 +! CHECK: omp.yield(%[[VAL_1]] : i64) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i64, %[[VAL_1:.*]]: i64): +! CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i64 +! CHECK: omp.yield(%[[VAL_2]] : i64) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @add_reduction_f_32 : f32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32): +! CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: omp.yield(%[[VAL_1]] : f32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32): +! CHECK: %[[VAL_2:.*]] = arith.addf %[[VAL_0]], %[[VAL_1]] fastmath : f32 +! CHECK: omp.yield(%[[VAL_2]] : f32) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @add_reduction_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_int_reduction() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"} +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} +! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 +! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32 +! CHECK: fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } -!CHECK-LABEL: func.func @_QPsimple_int_reduction -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} -!CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 -!CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_int_reduction integer :: x x = 0 @@ -73,23 +86,31 @@ subroutine simple_int_reduction !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_real_reduction -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"} -!CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32 -!CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return + +! CHECK-LABEL: func.func @_QPsimple_real_reduction() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"} +! CHECK: %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"} +! CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> f32 +! CHECK: %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath : f32 +! CHECK: fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine simple_real_reduction real :: x x = 0.0 @@ -102,22 +123,29 @@ subroutine simple_real_reduction !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"} -!CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 -!CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"} +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"} +! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 +! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_10]] : i32 +! CHECK: fir.store %[[VAL_11]] to %[[VAL_7]] : !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine simple_int_reduction_switch_order integer :: x x = 0 @@ -130,23 +158,30 @@ subroutine simple_int_reduction_switch_order !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"} -!CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32 -!CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"} +! CHECK: %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"} +! CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32 +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_12:.*]] = arith.addf %[[VAL_10]], %[[VAL_11]] fastmath : f32 +! CHECK: fir.store %[[VAL_12]] to %[[VAL_7]] : !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine simple_real_reduction_switch_order real :: x x = 0.0 @@ -159,23 +194,43 @@ subroutine simple_real_reduction_switch_order !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"} -!CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"} +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"} +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32 +! CHECK: fir.store %[[VAL_4]] to %[[VAL_1]] : !fir.ref +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i32 +! CHECK: fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +! CHECK: fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_16]] : i32 +! CHECK: fir.store %[[VAL_17]] to %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: fir.store %[[VAL_20]] to %[[VAL_12]] : !fir.ref +! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_13]] : !fir.ref +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_21]], %[[VAL_22]] : i32 +! CHECK: fir.store %[[VAL_23]] to %[[VAL_13]] : !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine multiple_int_reductions_same_type integer :: x,y,z x = 0 @@ -192,26 +247,46 @@ subroutine multiple_int_reductions_same_type !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"} -!CHECK: %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"} +! CHECK: %[[VAL_1:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"} +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} +! CHECK: %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} +! CHECK: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: fir.store %[[VAL_4]] to %[[VAL_1]] : !fir.ref +! CHECK: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref +! CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i32) -> f32 +! CHECK: %[[VAL_18:.*]] = arith.addf %[[VAL_15]], %[[VAL_17]] fastmath : f32 +! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_12]] : !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> f32 +! CHECK: %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] fastmath : f32 +! CHECK: fir.store %[[VAL_22]] to %[[VAL_12]] : !fir.ref +! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_13]] : !fir.ref +! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_7]] : !fir.ref +! CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32 +! CHECK: %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath : f32 +! CHECK: fir.store %[[VAL_26]] to %[[VAL_13]] : !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine multiple_real_reductions_same_type real :: x,y,z x = 0.0 @@ -228,29 +303,54 @@ subroutine multiple_real_reductions_same_type !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type -!CHECK: %[[WREF:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"} -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"} -!CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref, @[[RED_F64_NAME]] -> %[[WREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64 -!CHECK: omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : i64, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64 -!CHECK: omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : f64, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_reductions_different_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"} +! CHECK: %[[VAL_1:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"} +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"} +! CHECK: %[[VAL_3:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} +! CHECK: %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i32 +! CHECK: fir.store %[[VAL_5]] to %[[VAL_2]] : !fir.ref +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64 +! CHECK: fir.store %[[VAL_6]] to %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: fir.store %[[VAL_7]] to %[[VAL_4]] : !fir.ref +! CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f64 +! CHECK: fir.store %[[VAL_8]] to %[[VAL_1]] : !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_9:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref) for (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { +! CHECK: fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: fir.store %[[VAL_20]] to %[[VAL_13]] : !fir.ref +! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_14]] : !fir.ref +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_9]] : !fir.ref +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> i64 +! CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_23]] : i64 +! CHECK: fir.store %[[VAL_24]] to %[[VAL_14]] : !fir.ref +! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_15]] : !fir.ref +! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_9]] : !fir.ref +! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> f32 +! CHECK: %[[VAL_28:.*]] = arith.addf %[[VAL_25]], %[[VAL_27]] fastmath : f32 +! CHECK: fir.store %[[VAL_28]] to %[[VAL_15]] : !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_16]] : !fir.ref +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_9]] : !fir.ref +! CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> f64 +! CHECK: %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_31]] fastmath : f64 +! CHECK: fir.store %[[VAL_32]] to %[[VAL_16]] : !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + + subroutine multiple_reductions_different_type integer :: x integer(kind=8) :: y diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 index ecbcac88141f52..9ce1725dbab046 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 @@ -10,13 +10,15 @@ !CHECK: omp.yield(%[[IAND_VAL_I]] : i32) !CHECK-LABEL: @_QPreduction_iand -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> +!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] -> %[[X_REF]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_REF]] : i32, !fir.ref +!CHECK: %[[RES:.+]] = arith.andi %[[LPRV]], %[[Y_I]] : i32 +!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 index beb899fa287ebb..f6027416246af1 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 @@ -10,13 +10,15 @@ !CHECK: omp.yield(%[[IEOR_VAL_I]] : i32) !CHECK-LABEL: @_QPreduction_ieor -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> +!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] -> %[[X_REF]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_REF]] : i32, !fir.ref +!CHECK: %[[RES:.+]] = arith.xori %[[LPRV]], %[[Y_I]] : i32 +!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 index 50291d228990b9..bc143611abe8d9 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 @@ -13,10 +13,12 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] -> %[[X_REF]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_REF]] : i32, !fir.ref +!CHECK: %[[RES:.+]] = arith.ori %[[LPRV]], %[[Y_I]] : i32 +!CHECK: fir.store %[[RES]] to %[[PRV]] : !fir.ref !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 deleted file mode 100644 index 3f40a0597ae51a..00000000000000 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 +++ /dev/null @@ -1,137 +0,0 @@ -! RUN: bbc -emit-fir -hlfir=false -fopenmp %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s - -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_NAME:.*]] : !fir.logical<4> init { -!CHECK: ^bb0(%{{.*}}: !fir.logical<4>): -!CHECK: %true = arith.constant true -!CHECK: %[[true_fir:.*]] = fir.convert %true : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[true_fir]] : !fir.logical<4>) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -!CHECK: %[[arg0_i1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -!CHECK: %[[arg1_i1:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -!CHECK: %[[RES:.*]] = arith.andi %[[arg0_i1]], %[[arg1_i1]] : i1 -!CHECK: %[[RES_logical:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[RES_logical]] : !fir.logical<4>) -!CHECK: } - -!CHECK-LABEL: func.func @_QPsimple_reduction( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 -!CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_PVT_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine simple_reduction(y) - logical :: x, y(100) - x = .true. - !$omp parallel - !$omp do reduction(.and.:x) - do i=1, 100 - x = x .and. y(i) - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 -!CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_PVT_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine simple_reduction_switch_order(y) - logical :: x, y(100) - x = .true. - !$omp parallel - !$omp do reduction(.and.:x) - do i=1, 100 - x = y(i) .and. x - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPmultiple_reductions -!CHECK-SAME %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} -!CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_3]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[ZREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine multiple_reductions(w) - logical :: x,y,z,w(100) - x = .true. - y = .true. - z = .true. - !$omp parallel - !$omp do reduction(.and.:x,y,z) - do i=1, 100 - x = x .and. w(i) - y = y .and. w(i) - z = z .and. w(i) - end do - !$omp end do - !$omp end parallel -end subroutine - diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 index 16180da3ed4894..d5aacd74d8b105 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 @@ -1,42 +1,53 @@ ! RUN: bbc -emit-fir -hlfir=false -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_NAME:.*]] : !fir.logical<4> init { -!CHECK: ^bb0(%{{.*}}: !fir.logical<4>): -!CHECK: %true = arith.constant true -!CHECK: %[[true_fir:.*]] = fir.convert %true : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[true_fir]] : !fir.logical<4>) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -!CHECK: %[[arg0_i1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -!CHECK: %[[arg1_i1:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -!CHECK: %[[RES:.*]] = arith.cmpi eq, %[[arg0_i1]], %[[arg1_i1]] : i1 -!CHECK: %[[RES_logical:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[RES_logical]] : !fir.logical<4>) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @eqv_reduction : !fir.logical<4> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_1:.*]] = arith.constant true +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_2]] : !fir.logical<4>) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>, %[[VAL_1:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_4:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_3]] : i1 +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_5]] : !fir.logical<4>) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_reduction( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} +! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} +! CHECK: %[[VAL_3:.*]] = arith.constant true +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref> +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64 +! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref> +! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1 +! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return -!CHECK-LABEL: func.func @_QPsimple_reduction( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 -!CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_PVT_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_reduction(y) logical :: x, y(100) x = .true. @@ -49,27 +60,36 @@ subroutine simple_reduction(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 -!CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_PVT_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} +! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} +! CHECK: %[[VAL_3:.*]] = arith.constant true +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref +! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64 +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64 +! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref> +! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref> +! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_20:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_19]] : i1 +! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine simple_reduction_switch_order(y) logical :: x, y(100) x = .true. @@ -82,43 +102,68 @@ subroutine simple_reduction_switch_order(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions -!CHECK-SAME %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} -!CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_3]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[ZREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_reductions( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} +! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} +! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} +! CHECK: %[[VAL_5:.*]] = arith.constant true +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref> +! CHECK: %[[VAL_7:.*]] = arith.constant true +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref> +! CHECK: %[[VAL_9:.*]] = arith.constant true +! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_10]] to %[[VAL_4]] : !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref> +! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64 +! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64 +! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref> +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_28:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_27]] : i1 +! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref> +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref> +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64 +! CHECK: %[[VAL_33:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64 +! CHECK: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref> +! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_38]] : i1 +! CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref> +! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref> +! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64 +! CHECK: %[[VAL_44:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64 +! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref> +! CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_48]], %[[VAL_49]] : i1 +! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine multiple_reductions(w) logical :: x,y,z,w(100) x = .true. diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 index 372f131e3d9c4f..9f44e0e26d4073 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 @@ -1,42 +1,54 @@ ! RUN: bbc -emit-fir -hlfir=false -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_NAME:.*]] : !fir.logical<4> init { -!CHECK: ^bb0(%{{.*}}: !fir.logical<4>): -!CHECK: %false = arith.constant false -!CHECK: %[[false_fir:.*]] = fir.convert %false : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[false_fir]] : !fir.logical<4>) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -!CHECK: %[[arg0_i1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -!CHECK: %[[arg1_i1:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -!CHECK: %[[RES:.*]] = arith.cmpi ne, %[[arg0_i1]], %[[arg1_i1]] : i1 -!CHECK: %[[RES_logical:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[RES_logical]] : !fir.logical<4>) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + + +! CHECK-LABEL: omp.reduction.declare @neqv_reduction : !fir.logical<4> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_1:.*]] = arith.constant false +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_2]] : !fir.logical<4>) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>, %[[VAL_1:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_4:.*]] = arith.cmpi ne, %[[VAL_2]], %[[VAL_3]] : i1 +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_5]] : !fir.logical<4>) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_reduction( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} +! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} +! CHECK: %[[VAL_3:.*]] = arith.constant true +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref> +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] : i64 +! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_15]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref> +! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1 +! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return -!CHECK-LABEL: func.func @_QPsimple_reduction( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 -!CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_PVT_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_reduction(y) logical :: x, y(100) x = .true. @@ -49,27 +61,36 @@ subroutine simple_reduction(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 -!CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_PVT_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} +! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} +! CHECK: %[[VAL_3:.*]] = arith.constant true +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref +! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64 +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_12]], %[[VAL_13]] : i64 +! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_14]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]] : !fir.ref> +! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_9]] : !fir.ref> +! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_17]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_18]], %[[VAL_19]] : i1 +! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_21]] to %[[VAL_9]] : !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine simple_reduction_switch_order(y) logical :: x, y(100) x = .true. @@ -82,43 +103,69 @@ subroutine simple_reduction_switch_order(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions -!CHECK-SAME %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} -!CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_3]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[ZREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_reductions( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} +! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} +! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} +! CHECK: %[[VAL_5:.*]] = arith.constant true +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref> +! CHECK: %[[VAL_7:.*]] = arith.constant true +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref> +! CHECK: %[[VAL_9:.*]] = arith.constant true +! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_10]] to %[[VAL_4]] : !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref> +! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64 +! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_21]], %[[VAL_22]] : i64 +! CHECK: %[[VAL_24:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_23]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref> +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_26]], %[[VAL_27]] : i1 +! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_29]] to %[[VAL_15]] : !fir.ref> +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_16]] : !fir.ref> +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64 +! CHECK: %[[VAL_33:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_32]], %[[VAL_33]] : i64 +! CHECK: %[[VAL_35:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_34]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref> +! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_36]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_37]], %[[VAL_38]] : i1 +! CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_40]] to %[[VAL_16]] : !fir.ref> +! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_17]] : !fir.ref> +! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_11]] : !fir.ref +! CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i32) -> i64 +! CHECK: %[[VAL_44:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_43]], %[[VAL_44]] : i64 +! CHECK: %[[VAL_46:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_45]] : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref> +! CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_41]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_47]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_50:.*]] = arith.cmpi ne, %[[VAL_48]], %[[VAL_49]] : i1 +! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[VAL_51]] to %[[VAL_17]] : !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + + subroutine multiple_reductions(w) logical :: x,y,z,w(100) x = .true. diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 deleted file mode 100644 index 597014c099686a..00000000000000 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 +++ /dev/null @@ -1,137 +0,0 @@ -! RUN: bbc -emit-fir -hlfir=false -fopenmp %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s - -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_NAME:.*]] : !fir.logical<4> init { -!CHECK: ^bb0(%{{.*}}: !fir.logical<4>): -!CHECK: %false = arith.constant false -!CHECK: %[[false_fir:.*]] = fir.convert %false : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[false_fir]] : !fir.logical<4>) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -!CHECK: %[[arg0_i1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -!CHECK: %[[arg1_i1:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -!CHECK: %[[RES:.*]] = arith.ori %[[arg0_i1]], %[[arg1_i1]] : i1 -!CHECK: %[[RES_logical:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[RES_logical]] : !fir.logical<4>) -!CHECK: } - -!CHECK-LABEL: func.func @_QPsimple_reduction( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 -!CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_PVT_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine simple_reduction(y) - logical :: x, y(100) - x = .true. - !$omp parallel - !$omp do reduction(.or.:x) - do i=1, 100 - x = x .or. y(i) - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 -!CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_PVT_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine simple_reduction_switch_order(y) - logical :: x, y(100) - x = .true. - !$omp parallel - !$omp do reduction(.or.:x) - do i=1, 100 - x = y(i) .or. x - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPmultiple_reductions -!CHECK-SAME %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} -!CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 -!CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_3]] : !fir.ref> -!CHECK: omp.reduction %[[WVAL]], %[[ZREF]] : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine multiple_reductions(w) - logical :: x,y,z,w(100) - x = .true. - y = .true. - z = .true. - !$omp parallel - !$omp do reduction(.or.:x,y,z) - do i=1, 100 - x = x .or. w(i) - y = y .or. w(i) - z = z .or. w(i) - end do - !$omp end do - !$omp end parallel -end subroutine - diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 index 0f01b4697be86e..af79658491b568 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 @@ -21,21 +21,24 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_I]] -> %[[X_REF]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_REF]] : i32, !fir.ref -!CHECK: omp.yield +!CHECK: %[[RES:.+]] = arith.cmpi sgt, %[[LPRV]], %[[Y_I]] : i32 +!CHECK: %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]] +!CHECK: fir.store %[[SEL]] to %[[PRV]] : !fir.ref !CHECK: omp.terminator !CHECK-LABEL: @_QPreduction_max_real !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_F]] -> %[[X_REF]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_REF]] : f32, !fir.ref +!CHECK: %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32 !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 index 22cdd41c95179b..1095718b4b13fd 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 @@ -21,10 +21,13 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_I]] -> %[[X_REF]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_REF]] : i32, !fir.ref +!CHECK: %[[RES:.+]] = arith.cmpi slt, %[[LPRV]], %[[Y_I]] : i32 +!CHECK: %[[SEL:.+]] = arith.select %[[RES]], %[[LPRV]], %[[Y_I]] +!CHECK: fir.store %[[SEL]] to %[[PRV]] : !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -32,10 +35,11 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_F]] -> %[[X_REF]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_REF]] : f32, !fir.ref +!CHECK: %[[RES:.+]] = arith.cmpf ogt, %[[Y_I]], %[[LPRV]] {{.*}} : f32 !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 deleted file mode 100644 index 1c27f557fb30c9..00000000000000 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 +++ /dev/null @@ -1,274 +0,0 @@ -! RUN: bbc -emit-fir -hlfir=false -fopenmp %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s - -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_F64_NAME:.*]] : f64 init { -!CHECK: ^bb0(%{{.*}}: f64): -!CHECK: %[[C0_1:.*]] = arith.constant 1.000000e+00 : f64 -!CHECK: omp.yield(%[[C0_1]] : f64) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: f64, %[[ARG1:.*]]: f64): -!CHECK: %[[RES:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] {{.*}}: f64 -!CHECK: omp.yield(%[[RES]] : f64) -!CHECK: } - -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init { -!CHECK: ^bb0(%{{.*}}: i64): -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i64 -!CHECK: omp.yield(%[[C1_1]] : i64) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64): -!CHECK: %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i64 -!CHECK: omp.yield(%[[RES]] : i64) -!CHECK: } - -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_F32_NAME:.*]] : f32 init { -!CHECK: ^bb0(%{{.*}}: f32): -!CHECK: %[[C0_1:.*]] = arith.constant 1.000000e+00 : f32 -!CHECK: omp.yield(%[[C0_1]] : f32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): -!CHECK: %[[RES:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] {{.*}}: f32 -!CHECK: omp.yield(%[[RES]] : f32) -!CHECK: } - -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init { -!CHECK: ^bb0(%{{.*}}: i32): -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: omp.yield(%[[C1_1]] : i32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -!CHECK: %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32 -!CHECK: omp.yield(%[[RES]] : i32) -!CHECK: } - -!CHECK-LABEL: func.func @_QPsimple_int_reduction -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C10:.*]] = arith.constant 10 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return - -subroutine simple_int_reduction - integer :: x - x = 1 - !$omp parallel - !$omp do reduction(*:x) - do i=1, 10 - x = x * i - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPsimple_real_reduction -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"} -!CHECK: %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32 -!CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 10 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine simple_real_reduction - real :: x - x = 1.0 - !$omp parallel - !$omp do reduction(*:x) - do i=1, 10 - x = x * i - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"} -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C10:.*]] = arith.constant 10 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine simple_int_reduction_switch_order - integer :: x - x = 1 - !$omp parallel - !$omp do reduction(*:x) - do i=1, 10 - x = i * x - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"} -!CHECK: %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32 -!CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 10 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine simple_real_reduction_switch_order - real :: x - x = 1.0 - !$omp parallel - !$omp do reduction(*:x) - do i=1, 10 - x = i * x - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"} -!CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine multiple_int_reductions_same_type - integer :: x,y,z - x = 1 - y = 1 - z = 1 - !$omp parallel - !$omp do reduction(*:x,y,z) - do i=1, 10 - x = x * i - y = y * i - z = z * i - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"} -!CHECK: %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine multiple_real_reductions_same_type - real :: x,y,z - x = 1 - y = 1 - z = 1 - !$omp parallel - !$omp do reduction(*:x,y,z) - do i=1, 10 - x = x * i - y = y * i - z = z * i - end do - !$omp end do - !$omp end parallel -end subroutine - -!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type -!CHECK: %[[WREF:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"} -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"} -!CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} -!CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %2 : !fir.ref, @[[RED_I64_NAME]] -> %3 : !fir.ref, @[[RED_F32_NAME]] -> %4 : !fir.ref, @[[RED_F64_NAME]] -> %1 : !fir.ref) for (%[[IVAL:.*]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64 -!CHECK: omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : i64, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64 -!CHECK: omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : f64, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return -subroutine multiple_reductions_different_type - integer :: x - integer(kind=8) :: y - real :: z - real(kind=8) :: w - x = 1 - y = 1 - z = 1 - w = 1 - !$omp parallel - !$omp do reduction(*:x,y,z,w) - do i=1, 10 - x = x * i - y = y * i - z = z * i - w = w * i - end do - !$omp end do - !$omp end parallel -end subroutine diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90 index 0a7443eecf52d7..0e118742689d60 100644 --- a/flang/test/Lower/OpenMP/default-clause.f90 +++ b/flang/test/Lower/OpenMP/default-clause.f90 @@ -352,7 +352,7 @@ subroutine skipped_default_clause_checks() type(it)::iii !CHECK: omp.parallel { -!CHECK: omp.wsloop reduction(@min_i_32 -> %[[VAL_Z_DECLARE]]#0 : !fir.ref) for (%[[ARG:.*]]) {{.*}} { +!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref) for (%[[ARG:.*]]) {{.*}} { !CHECK: omp.yield !CHECK: } !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 index 97ee665442e3a8..4d30282fc8c21f 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 @@ -1,35 +1,44 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init { -!CHECK: ^bb0(%{{.*}}: i32): -!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32 -!CHECK: omp.yield(%[[C0_1]] : i32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -!CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32 -!CHECK: omp.yield(%[[RES]] : i32) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @add_reduction_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_int_reduction() +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32 +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + -!CHECK-LABEL: func.func @_QPsimple_int_reduction -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} -!CHECK: %[[XDECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 -!CHECK: hlfir.assign %[[C0_2]] to %[[XDECL]]#0 : i32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XDECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XDECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_int_reduction integer :: x x = 0 diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 index 92c0075b9f72a2..7df4f37b98df8c 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 @@ -1,68 +1,87 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_F64_NAME:.*]] : f64 init { -!CHECK: ^bb0(%{{.*}}: f64): -!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f64 -!CHECK: omp.yield(%[[C0_1]] : f64) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: f64, %[[ARG1:.*]]: f64): -!CHECK: %[[RES:.*]] = arith.addf %[[ARG0]], %[[ARG1]] {{.*}}: f64 -!CHECK: omp.yield(%[[RES]] : f64) -!CHECK: } -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init { -!CHECK: ^bb0(%{{.*}}: i64): -!CHECK: %[[C0_1:.*]] = arith.constant 0 : i64 -!CHECK: omp.yield(%[[C0_1]] : i64) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64): -!CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i64 -!CHECK: omp.yield(%[[RES]] : i64) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_F32_NAME:.*]] : f32 init { -!CHECK: ^bb0(%{{.*}}: f32): -!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32 -!CHECK: omp.yield(%[[C0_1]] : f32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): -!CHECK: %[[RES:.*]] = arith.addf %[[ARG0]], %[[ARG1]] {{.*}}: f32 -!CHECK: omp.yield(%[[RES]] : f32) -!CHECK: } +! The script is designed to make adding checks to +! a test case fast, it is *not* designed to be authoritative +! about what constitutes a good test! The CHECK should be +! minimized and named to reflect the test intent. -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init { -!CHECK: ^bb0(%{{.*}}: i32): -!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32 -!CHECK: omp.yield(%[[C0_1]] : i32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -!CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32 -!CHECK: omp.yield(%[[RES]] : i32) -!CHECK: } -!CHECK-LABEL: func.func @_QPsimple_int_reduction -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 -!CHECK: hlfir.assign %[[C0_2]] to %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[X_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return + +! CHECK-LABEL: omp.reduction.declare @add_reduction_f_64 : f64 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: f64): +! CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 +! CHECK: omp.yield(%[[VAL_1]] : f64) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: f64, %[[VAL_1:.*]]: f64): +! CHECK: %[[VAL_2:.*]] = arith.addf %[[VAL_0]], %[[VAL_1]] fastmath : f64 +! CHECK: omp.yield(%[[VAL_2]] : f64) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @add_reduction_i_64 : i64 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i64): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i64 +! CHECK: omp.yield(%[[VAL_1]] : i64) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i64, %[[VAL_1:.*]]: i64): +! CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i64 +! CHECK: omp.yield(%[[VAL_2]] : i64) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @add_reduction_f_32 : f32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32): +! CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: omp.yield(%[[VAL_1]] : f32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32): +! CHECK: %[[VAL_2:.*]] = arith.addf %[[VAL_0]], %[[VAL_1]] fastmath : f32 +! CHECK: omp.yield(%[[VAL_2]] : f32) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @add_reduction_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_int_reduction() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32 +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine simple_int_reduction integer :: x x = 0 @@ -75,25 +94,35 @@ subroutine simple_int_reduction !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_real_reduction -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32 -!CHECK: hlfir.assign %[[C0_2]] to %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[X_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return + +! CHECK-LABEL: func.func @_QPsimple_real_reduction() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32 +! CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_15]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine simple_real_reduction real :: x x = 0.0 @@ -106,24 +135,34 @@ subroutine simple_real_reduction !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %2 {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 -!CHECK: hlfir.assign %[[C0_2]] to %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[X_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return + +! CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] : i32 +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine simple_int_reduction_switch_order integer :: x x = 0 @@ -136,25 +175,34 @@ subroutine simple_int_reduction_switch_order !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32 -!CHECK: hlfir.assign %[[C0_2]] to %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[X_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32 +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_14]], %[[VAL_15]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine simple_real_reduction_switch_order real :: x x = 0.0 @@ -167,27 +215,51 @@ subroutine simple_real_reduction_switch_order !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[X_DECL]]#0 : !fir.ref, @[[RED_I32_NAME]] -> %[[Y_DECL]]#0 : !fir.ref, @[[RED_I32_NAME]] -> %[[Z_DECL]]#0 : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[Y_DECL]]#0 : i32, !fir.ref -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[Z_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : i32, !fir.ref +! CHECK: %[[VAL_9:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : i32, !fir.ref +! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref +! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref +! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_23]], %[[VAL_24]] : i32 +! CHECK: hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref +! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref +! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_26]], %[[VAL_27]] : i32 +! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_31:.*]] = arith.addi %[[VAL_29]], %[[VAL_30]] : i32 +! CHECK: hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine multiple_int_reductions_same_type integer :: x,y,z x = 0 @@ -204,30 +276,54 @@ subroutine multiple_int_reductions_same_type !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"} -!CHECK: %[[X_DECL]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} -!CHECK: %[[Y_DECL]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} -!CHECK: %[[Z_DECL]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[X_DECL]]#0 : !fir.ref, @[[RED_F32_NAME]] -> %[[Y_DECL]]#0 : !fir.ref, @[[RED_F32_NAME]] -> %[[Z_DECL]]#0 : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL1_F32]], %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL2_F32]], %[[Y_DECL]]#0 : f32, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[Z_DECL]]#0 : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : f32, !fir.ref +! CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : f32, !fir.ref +! CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref +! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref +! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32 +! CHECK: %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref +! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32 +! CHECK: %[[VAL_30:.*]] = arith.addf %[[VAL_27]], %[[VAL_29]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32 +! CHECK: %[[VAL_34:.*]] = arith.addf %[[VAL_31]], %[[VAL_33]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine multiple_real_reductions_same_type real :: x,y,z x = 0.0 @@ -244,34 +340,63 @@ subroutine multiple_real_reductions_same_type !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type -!CHECK: %[[WREF:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"} -!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[WREF]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[X_DECL]]#0 : !fir.ref, @[[RED_I64_NAME]] -> %[[Y_DECL]]#0 : !fir.ref, @[[RED_F32_NAME]] -> %[[Z_DECL]]#0 : !fir.ref, @[[RED_F64_NAME]] -> %[[W_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1_I32]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64 -!CHECK: omp.reduction %[[I_PVT_VAL2_I64]], %[[Y_DECL]]#0 : i64, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[Z_DECL]]#0 : f32, !fir.ref -!CHECK: %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64 -!CHECK: omp.reduction %[[I_PVT_VAL4_F64]], %[[W_DECL]]#0 : f64, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_reductions_different_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_8:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_5]]#0 : i32, !fir.ref +! CHECK: %[[VAL_11:.*]] = arith.constant 0 : i64 +! CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_7]]#0 : i64, !fir.ref +! CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_9]]#0 : f32, !fir.ref +! CHECK: %[[VAL_13:.*]] = arith.constant 0.000000e+00 : f64 +! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_17:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) for (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { +! CHECK: fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref +! CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_30:.*]] = arith.addi %[[VAL_28]], %[[VAL_29]] : i32 +! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64 +! CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_31]], %[[VAL_33]] : i64 +! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref +! CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref +! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32 +! CHECK: %[[VAL_38:.*]] = arith.addf %[[VAL_35]], %[[VAL_37]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref +! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref +! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64 +! CHECK: %[[VAL_42:.*]] = arith.addf %[[VAL_39]], %[[VAL_41]] fastmath : f64 +! CHECK: hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } + subroutine multiple_reductions_different_type integer :: x integer(kind=8) :: y diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 index 29cd53616b5cb4..9588531f6c909a 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 @@ -1,32 +1,48 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK: omp.reduction.declare @[[IAND_DECLARE_I:.*]] : i32 init { -!CHECK: %[[ZERO_VAL_I:.*]] = arith.constant -1 : i32 -!CHECK: omp.yield(%[[ZERO_VAL_I]] : i32) -!CHECK: combiner -!CHECK: ^bb0(%[[ARG0_I:.*]]: i32, %[[ARG1_I:.*]]: i32): -!CHECK: %[[IAND_VAL_I:.*]] = arith.andi %[[ARG0_I]], %[[ARG1_I]] : i32 -!CHECK: omp.yield(%[[IAND_VAL_I]] : i32) +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @iand_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant -1 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.andi %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPreduction_iand( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_iandEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_iandEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator -!CHECK-LABEL: @_QPreduction_iand -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> -!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_iandEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) -!CHECK: omp.parallel -!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_iandEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] -> %[[X_DECL]]#0 : !fir.ref) for -!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref -!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref -!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box>, i64) -> !fir.ref -!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator subroutine reduction_iand(y) integer :: x, y(:) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 index 3131d1b551737e..a14a37101874c8 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 @@ -10,7 +10,7 @@ !CHECK: omp.yield(%[[IEOR_VAL_I]] : i32) !CHECK-LABEL: @_QPreduction_ieor -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> +!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"} !CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_ieorEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_ieorEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) @@ -19,13 +19,16 @@ !CHECK: omp.parallel !CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] -> %[[X_DECL]]#0 : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref) for !CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref +!CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref !CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64 !CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box>, i64) -> !fir.ref +!CHECK: %[[LPRV:.+]] = fir.load %[[PRV_DECL]]#0 : !fir.ref !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_DECL]]#0 : i32, !fir.ref +!CHECK: %[[RES:.+]] = arith.xori %[[LPRV]], %[[Y_I]] : i32 +!CHECK: hlfir.assign %[[RES]] to %[[PRV_DECL]]#0 : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 index 5e3d5bdd6c52c4..3b5e327439358d 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 @@ -1,31 +1,48 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK: omp.reduction.declare @[[IOR_DECLARE_I:.*]] : i32 init { -!CHECK: %[[ZERO_VAL_I:.*]] = arith.constant 0 : i32 -!CHECK: omp.yield(%[[ZERO_VAL_I]] : i32) -!CHECK: combiner -!CHECK: ^bb0(%[[ARG0_I:.*]]: i32, %[[ARG1_I:.*]]: i32): -!CHECK: %[[IOR_VAL_I:.*]] = arith.ori %[[ARG0_I]], %[[ARG1_I]] : i32 -!CHECK: omp.yield(%[[IOR_VAL_I]] : i32) +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @ior_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.ori %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPreduction_ior( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_iorEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_iorEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref +! CHECK: omp.parallel +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) +! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.ori %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_14]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator + -!CHECK-LABEL: @_QPreduction_ior -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> -!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_iorEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) -!CHECK: omp.parallel -!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_iorEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] -> %[[X_DECL]]#0 : !fir.ref) for -!CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref -!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref -!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box>, i64) -> !fir.ref -!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator subroutine reduction_ior(y) integer :: x, y(:) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 index 243c8a1f874d51..17d321620cca8b 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 @@ -1,77 +1,106 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_NAME:.*]] : !fir.logical<4> init { -!CHECK: ^bb0(%{{.*}}: !fir.logical<4>): -!CHECK: %true = arith.constant true -!CHECK: %[[true_fir:.*]] = fir.convert %true : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[true_fir]] : !fir.logical<4>) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -!CHECK: %[[arg0_i1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -!CHECK: %[[arg1_i1:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -!CHECK: %[[RES:.*]] = arith.andi %[[arg0_i1]], %[[arg1_i1]] : i1 -!CHECK: %[[RES_logical:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[RES_logical]] : !fir.logical<4>) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @and_reduction : !fir.logical<4> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_1:.*]] = arith.constant true +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_2]] : !fir.logical<4>) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>, %[[VAL_1:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_4:.*]] = arith.andi %[[VAL_2]], %[[VAL_3]] : i1 +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_5]] : !fir.logical<4>) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_reduction( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_8:.*]] = arith.constant true +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64 +! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref> +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1 +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return -!CHECK-LABEL: func.func @_QPsimple_reduction( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%4) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_64:.*]] = fir.convert %[[I_PVT]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_PVT_64]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[Y_I_VAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[Y_I_VAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_reduction(y) logical :: x, y(100) x = .true. !$omp parallel !$omp do reduction(.and.:x) do i=1, 100 - x = x .and. y(i) + x = x .and. y(i) end do !$omp end do !$omp end parallel -end subroutine +end subroutine simple_reduction + + +! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_8:.*]] = arith.constant true +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64 +! CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref> +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1 +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return -!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%{{.*}}) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[CONVI_64]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_reduction_switch_order(y) logical :: x, y(100) x = .true. @@ -84,43 +113,75 @@ subroutine simple_reduction_switch_order(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions -!CHECK-SAME %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%{{.*}}) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>, @[[RED_NAME]] -> %[[Y_DECL]]#0 : !fir.ref>, @[[RED_NAME]] -> %[[Z_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_1]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_2]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[Y_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_2]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[Z_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_reductions( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_12:.*]] = arith.constant true +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_14:.*]] = arith.constant true +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_16:.*]] = arith.constant true +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref +! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref> +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64 +! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref> +! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1 +! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref> +! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64 +! CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref> +! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_46:.*]] = arith.andi %[[VAL_44]], %[[VAL_45]] : i1 +! CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref> +! CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64 +! CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref> +! CHECK: %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_55:.*]] = arith.andi %[[VAL_53]], %[[VAL_54]] : i1 +! CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + + + subroutine multiple_reductions(w) logical :: x,y,z,w(100) x = .true. diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 index f25ed6300501b2..8204e4c878cb04 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 @@ -1,43 +1,58 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_NAME:.*]] : !fir.logical<4> init { -!CHECK: ^bb0(%{{.*}}: !fir.logical<4>): -!CHECK: %true = arith.constant true -!CHECK: %[[true_fir:.*]] = fir.convert %true : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[true_fir]] : !fir.logical<4>) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -!CHECK: %[[arg0_i1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -!CHECK: %[[arg1_i1:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -!CHECK: %[[RES:.*]] = arith.cmpi eq, %[[arg0_i1]], %[[arg1_i1]] : i1 -!CHECK: %[[RES_logical:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[RES_logical]] : !fir.logical<4>) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @eqv_reduction : !fir.logical<4> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_1:.*]] = arith.constant true +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_2]] : !fir.logical<4>) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>, %[[VAL_1:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_4:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_3]] : i1 +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_5]] : !fir.logical<4>) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_reduction( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_8:.*]] = arith.constant true +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64 +! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref> +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1 +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return -!CHECK-LABEL: func.func @_QPsimple_reduction( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%4) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_64:.*]] = fir.convert %[[I_PVT]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_PVT_64]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[Y_I_VAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[Y_I_VAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_reduction(y) logical :: x, y(100) x = .true. @@ -50,28 +65,41 @@ subroutine simple_reduction(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%{{.*}}) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[CONVI_64]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_8:.*]] = arith.constant true +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64 +! CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref> +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_24]] : i1 +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine simple_reduction_switch_order(y) logical :: x, y(100) x = .true. @@ -84,44 +112,73 @@ subroutine simple_reduction_switch_order(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions -!CHECK-SAME %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%{{.*}}) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>, @[[RED_NAME]] -> %[[Y_DECL]]#0 : -!!fir.ref>, @[[RED_NAME]] -> %[[Z_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_1]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_2]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[Y_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_2]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[Z_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_reductions( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_12:.*]] = arith.constant true +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_14:.*]] = arith.constant true +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_16:.*]] = arith.constant true +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref +! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref> +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64 +! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref> +! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_36]] : i1 +! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref> +! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64 +! CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref> +! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_45]] : i1 +! CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref> +! CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64 +! CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref> +! CHECK: %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_53]], %[[VAL_54]] : i1 +! CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine multiple_reductions(w) logical :: x,y,z,w(100) x = .true. diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 index 54227cbed56029..623368a50e8645 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 @@ -1,43 +1,58 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_NAME:.*]] : !fir.logical<4> init { -!CHECK: ^bb0(%{{.*}}: !fir.logical<4>): -!CHECK: %false = arith.constant false -!CHECK: %[[false_fir:.*]] = fir.convert %false : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[false_fir]] : !fir.logical<4>) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -!CHECK: %[[arg0_i1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -!CHECK: %[[arg1_i1:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -!CHECK: %[[RES:.*]] = arith.cmpi ne, %[[arg0_i1]], %[[arg1_i1]] : i1 -!CHECK: %[[RES_logical:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[RES_logical]] : !fir.logical<4>) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @neqv_reduction : !fir.logical<4> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_1:.*]] = arith.constant false +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_2]] : !fir.logical<4>) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>, %[[VAL_1:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_4:.*]] = arith.cmpi ne, %[[VAL_2]], %[[VAL_3]] : i1 +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_5]] : !fir.logical<4>) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_reduction( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_8:.*]] = arith.constant true +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64 +! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref> +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1 +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return -!CHECK-LABEL: func.func @_QPsimple_reduction( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%4) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_64:.*]] = fir.convert %[[I_PVT]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_PVT_64]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[Y_I_VAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[Y_I_VAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_reduction(y) logical :: x, y(100) x = .true. @@ -50,28 +65,43 @@ subroutine simple_reduction(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%{{.*}}) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[CONVI_64]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return + +! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_8:.*]] = arith.constant true +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64 +! CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref> +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_25:.*]] = arith.cmpi ne, %[[VAL_23]], %[[VAL_24]] : i1 +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + + subroutine simple_reduction_switch_order(y) logical :: x, y(100) x = .true. @@ -84,44 +114,76 @@ subroutine simple_reduction_switch_order(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions -!CHECK-SAME %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%{{.*}}) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>, @[[RED_NAME]] -> %[[Y_DECL]]#0 : -!!fir.ref>, @[[RED_NAME]] -> %[[Z_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_1]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_2]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[Y_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_2]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[Z_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return + +! CHECK-LABEL: func.func @_QPmultiple_reductions( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_12:.*]] = arith.constant true +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_14:.*]] = arith.constant true +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_16:.*]] = arith.constant true +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref +! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref> +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64 +! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref> +! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_37:.*]] = arith.cmpi ne, %[[VAL_35]], %[[VAL_36]] : i1 +! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref> +! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64 +! CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref> +! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_46:.*]] = arith.cmpi ne, %[[VAL_44]], %[[VAL_45]] : i1 +! CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref> +! CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64 +! CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref> +! CHECK: %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_55:.*]] = arith.cmpi ne, %[[VAL_53]], %[[VAL_54]] : i1 +! CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return +! CHECK: } + + subroutine multiple_reductions(w) logical :: x,y,z,w(100) x = .true. diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 index 4f59ea778cf885..f1ae1bc687cd55 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 @@ -1,43 +1,58 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_NAME:.*]] : !fir.logical<4> init { -!CHECK: ^bb0(%{{.*}}: !fir.logical<4>): -!CHECK: %false = arith.constant false -!CHECK: %[[false_fir:.*]] = fir.convert %false : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[false_fir]] : !fir.logical<4>) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -!CHECK: %[[arg0_i1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -!CHECK: %[[arg1_i1:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -!CHECK: %[[RES:.*]] = arith.ori %[[arg0_i1]], %[[arg1_i1]] : i1 -!CHECK: %[[RES_logical:.*]] = fir.convert %[[RES]] : (i1) -> !fir.logical<4> -!CHECK: omp.yield(%[[RES_logical]] : !fir.logical<4>) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @or_reduction : !fir.logical<4> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_1:.*]] = arith.constant false +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_2]] : !fir.logical<4>) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.logical<4>, %[[VAL_1:.*]]: !fir.logical<4>): +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_4:.*]] = arith.ori %[[VAL_2]], %[[VAL_3]] : i1 +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4> +! CHECK: omp.yield(%[[VAL_5]] : !fir.logical<4>) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_reduction( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_8:.*]] = arith.constant true +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64 +! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref> +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_18]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1 +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return -!CHECK-LABEL: func.func @_QPsimple_reduction( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%4) {uniq_name = "_QFsimple_reductionEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_64:.*]] = fir.convert %[[I_PVT]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_PVT_64]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[Y_I_VAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[Y_I_VAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return subroutine simple_reduction(y) logical :: x, y(100) x = .true. @@ -50,28 +65,41 @@ subroutine simple_reduction(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( -!CHECK-SAME: %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%{{.*}}) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[CONVI_64]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[YVAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[YVAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_reduction_switch_order( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "_QFsimple_reduction_switch_orderEy"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_8:.*]] = arith.constant true +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> i64 +! CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_19]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref> +! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_25:.*]] = arith.ori %[[VAL_23]], %[[VAL_24]] : i1 +! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_17]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine simple_reduction_switch_order(y) logical :: x, y(100) x = .true. @@ -84,44 +112,75 @@ subroutine simple_reduction_switch_order(y) !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions -!CHECK-SAME %[[ARRAY:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { -!CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[IREF]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[ARRAY]](%{{.*}}) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) -!CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[X_DECL]]#0 : !fir.ref>, @[[RED_NAME]] -> %[[Y_DECL]]#0 : -!!fir.ref>, @[[RED_NAME]] -> %[[Z_DECL]]#0 : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_1]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[X_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_2]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[Y_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 -!CHECK: %[[W_I_REF:.*]] = hlfir.designate %[[W_DECL]]#0 (%[[CONVI_64_2]]) : (!fir.ref>>, i64) -> !fir.ref> -!CHECK: %[[W_I_VAL:.*]] = fir.load %[[W_I_REF]] : !fir.ref> -!CHECK: omp.reduction %[[W_I_VAL]], %[[Z_DECL]]#0 : !fir.logical<4>, !fir.ref> -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_reductions( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>> {fir.bindc_name = "w"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductionsEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_4]]) {uniq_name = "_QFmultiple_reductionsEw"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFmultiple_reductionsEx"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_12:.*]] = arith.constant true +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_7]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_14:.*]] = arith.constant true +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_9]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_16:.*]] = arith.constant true +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_11]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.parallel { +! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_reductionsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref +! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFmultiple_reductionsEz"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref> +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> i64 +! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_32]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref> +! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_30]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_34]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_37:.*]] = arith.ori %[[VAL_35]], %[[VAL_36]] : i1 +! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_27]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_28]]#0 : !fir.ref> +! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64 +! CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_41]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref> +! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_39]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_43]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_46:.*]] = arith.ori %[[VAL_44]], %[[VAL_45]] : i1 +! CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_46]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_28]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_48:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref> +! CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref +! CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_49]] : (i32) -> i64 +! CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_50]]) : (!fir.ref>>, i64) -> !fir.ref> +! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_51]] : !fir.ref> +! CHECK: %[[VAL_53:.*]] = fir.convert %[[VAL_48]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_55:.*]] = arith.ori %[[VAL_53]], %[[VAL_54]] : i1 +! CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_29]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + + + subroutine multiple_reductions(w) logical :: x,y,z,w(100) x = .true. diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-2.f90 index 7e079470df847f..1f4d61985689fc 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max-2.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-2.f90 @@ -2,7 +2,8 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s ! CHECK: omp.wsloop reduction(@max_i_32 -! CHECK: omp.reduction +! CHECK: arith.cmpi sgt +! CHECK: arith.select module m1 intrinsic max diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 index 0c5d99226600bf..ed25cedae90c62 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 @@ -1,26 +1,48 @@ ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s -!CHECK: omp.reduction.declare @[[MAX_DECLARE_I:.*]] : i32 init { -!CHECK: %[[MINIMUM_VAL_I:.*]] = arith.constant -2147483648 : i32 -!CHECK: omp.yield(%[[MINIMUM_VAL_I]] : i32) -!CHECK: combiner -!CHECK: ^bb0(%[[ARG0_I:.*]]: i32, %[[ARG1_I:.*]]: i32): -!CHECK: %[[COMB_VAL_I:.*]] = arith.maxsi %[[ARG0_I]], %[[ARG1_I]] : i32 -!CHECK: omp.yield(%[[COMB_VAL_I]] : i32) +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! CHECK-LABEL: omp.reduction.declare @max_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant -2147483648 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.maxsi %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPreduction_max_int( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_intEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_intEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator -!CHECK-LABEL: @_QPreduction_max_int -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> -!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_max_intEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) -!CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_I]] -> %[[X_DECL]]#0 : !fir.ref) for -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 ({{.*}}) : (!fir.box>, i64) -> !fir.ref -!CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator subroutine reduction_max_int(y) integer :: x, y(:) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 index a2c4b5470c26cf..ea3b1bebce0388 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 @@ -1,56 +1,114 @@ ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s -!CHECK: omp.reduction.declare @[[MAX_DECLARE_F:.*]] : f32 init { -!CHECK: %[[MINIMUM_VAL_F:.*]] = arith.constant -3.40282347E+38 : f32 -!CHECK: omp.yield(%[[MINIMUM_VAL_F]] : f32) -!CHECK: combiner -!CHECK: ^bb0(%[[ARG0_F:.*]]: f32, %[[ARG1_F:.*]]: f32): -!CHECK: %[[COMB_VAL_F:.*]] = arith.maximumf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32 -!CHECK: omp.yield(%[[COMB_VAL_F]] : f32) +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py -!CHECK: omp.reduction.declare @[[MAX_DECLARE_I:.*]] : i32 init { -!CHECK: %[[MINIMUM_VAL_I:.*]] = arith.constant -2147483648 : i32 -!CHECK: omp.yield(%[[MINIMUM_VAL_I]] : i32) -!CHECK: combiner -!CHECK: ^bb0(%[[ARG0_I:.*]]: i32, %[[ARG1_I:.*]]: i32): -!CHECK: %[[COMB_VAL_I:.*]] = arith.maxsi %[[ARG0_I]], %[[ARG1_I]] : i32 -!CHECK: omp.yield(%[[COMB_VAL_I]] : i32) +! CHECK-LABEL: omp.reduction.declare @max_f_32 : f32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32): +! CHECK: %[[VAL_1:.*]] = arith.constant -3.40282347E+38 : f32 +! CHECK: omp.yield(%[[VAL_1]] : f32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32): +! CHECK: %[[VAL_2:.*]] = arith.maximumf %[[VAL_0]], %[[VAL_1]] fastmath : f32 +! CHECK: omp.yield(%[[VAL_2]] : f32) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @max_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant -2147483648 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.maxsi %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPreduction_max_int( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_intEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_intEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator + +! CHECK-LABEL: func.func @_QPreduction_max_real( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_max_realEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_max_realEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.cmpf ogt, %[[VAL_18]], %[[VAL_19]] fastmath : f32 +! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32 +! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: omp.parallel { +! CHECK: %[[VAL_30:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_33:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_34:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) for (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { +! CHECK: fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref +! CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref +! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64 +! CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref +! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref +! CHECK: %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath : f32 +! CHECK: fir.if %[[VAL_43]] { +! CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref +! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64 +! CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref +! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref +! CHECK: } else { +! CHECK: } +! CHECK: omp.yield +! CHECK: omp.terminator -!CHECK-LABEL: @_QPreduction_max_int -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> -!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_max_intEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) -!CHECK: omp.parallel -!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_I]] -> %[[X_DECL]]#0 : !fir.ref) for -!CHECK: fir.store %arg1 to %[[I_DECL]]#1 : !fir.ref -!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref -!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box>, i64) -> !fir.ref -!CHECK: %[[Y_I_VAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I_VAL]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK-LABEL: @_QPreduction_max_real -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> -!CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_max_realEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) -!CHECK: omp.parallel -!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_max_realEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_F]] -> %[[X_DECL]]#0 : !fir.ref) for -!CHECK: fir.store %arg1 to %[[I_DECL]]#1 : !fir.ref -!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref -!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box>, i64) -> !fir.ref -!CHECK: %[[Y_I_VAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator subroutine reduction_max_int(y) integer :: x, y(:) @@ -80,7 +138,6 @@ subroutine reduction_max_real(y) !$omp parallel !$omp do reduction(max:x) do i=1, 100 - !CHECK-NOT: omp.reduction if (y(i) .gt. x) x = y(i) end do !$omp end do diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 index af7f718b0b26d0..3aa9001869dc59 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 @@ -1,56 +1,116 @@ ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s -!CHECK: omp.reduction.declare @[[MIN_DECLARE_F:.*]] : f32 init { -!CHECK: %[[MAXIMUM_VAL_F:.*]] = arith.constant 3.40282347E+38 : f32 -!CHECK: omp.yield(%[[MAXIMUM_VAL_F]] : f32) -!CHECK: combiner -!CHECK: ^bb0(%[[ARG0_F:.*]]: f32, %[[ARG1_F:.*]]: f32): -!CHECK: %[[COMB_VAL_F:.*]] = arith.minimumf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32 -!CHECK: omp.yield(%[[COMB_VAL_F]] : f32) +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py -!CHECK: omp.reduction.declare @[[MIN_DECLARE_I:.*]] : i32 init { -!CHECK: %[[MAXIMUM_VAL_I:.*]] = arith.constant 2147483647 : i32 -!CHECK: omp.yield(%[[MAXIMUM_VAL_I]] : i32) -!CHECK: combiner -!CHECK: ^bb0(%[[ARG0_I:.*]]: i32, %[[ARG1_I:.*]]: i32): -!CHECK: %[[COMB_VAL_I:.*]] = arith.minsi %[[ARG0_I]], %[[ARG1_I]] : i32 -!CHECK: omp.yield(%[[COMB_VAL_I]] : i32) +! CHECK-LABEL: omp.reduction.declare @min_f_32 : f32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32): +! CHECK: %[[VAL_1:.*]] = arith.constant 3.40282347E+38 : f32 +! CHECK: omp.yield(%[[VAL_1]] : f32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32): +! CHECK: %[[VAL_2:.*]] = arith.minimumf %[[VAL_0]], %[[VAL_1]] fastmath : f32 +! CHECK: omp.yield(%[[VAL_2]] : f32) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @min_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant 2147483647 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.minsi %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPreduction_min_int( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_min_intEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_min_intEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32 +! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator + +! CHECK-LABEL: func.func @_QPreduction_min_real( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "y"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFreduction_min_realEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFreduction_min_realEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_4]]#0 : f32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]] : !fir.ref +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +! CHECK: %[[VAL_20:.*]] = arith.cmpf olt, %[[VAL_18]], %[[VAL_19]] fastmath : f32 +! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : f32 +! CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : f32, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: omp.parallel { +! CHECK: %[[VAL_30:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_31:.*]]:2 = hlfir.declare %[[VAL_30]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_33:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_34:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) for (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { +! CHECK: fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref +! CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref +! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> i64 +! CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_39]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref +! CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref +! CHECK: %[[VAL_43:.*]] = arith.cmpf ogt, %[[VAL_41]], %[[VAL_42]] fastmath : f32 +! CHECK: fir.if %[[VAL_43]] { +! CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref +! CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64 +! CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]]) : (!fir.box>, i64) -> !fir.ref +! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref +! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref +! CHECK: } else { +! CHECK: } +! CHECK: omp.yield +! CHECK: omp.terminator -!CHECK-LABEL: @_QPreduction_min_int -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> -!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_min_intEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) -!CHECK: omp.parallel -!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_I]] -> %[[X_DECL]]#0 : !fir.ref) for -!CHECK: fir.store %arg1 to %[[I_DECL]]#1 : !fir.ref -!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref -!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box>, i64) -> !fir.ref -!CHECK: %[[Y_I_VAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.reduction %[[Y_I_VAL]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK-LABEL: @_QPreduction_min_real -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> -!CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_BOX]] {uniq_name = "_QFreduction_min_realEy"} : (!fir.box>) -> (!fir.box>, !fir.box>) -!CHECK: omp.parallel -!CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_min_realEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_F]] -> %[[X_DECL]]#0 : !fir.ref) for -!CHECK: fir.store %arg1 to %[[I_DECL]]#1 : !fir.ref -!CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref -!CHECK: %[[I_64:.*]] = fir.convert %[[I_32]] : (i32) -> i64 -!CHECK: %[[Y_I_REF:.*]] = hlfir.designate %[[Y_DECL]]#0 (%[[I_64]]) : (!fir.box>, i64) -> !fir.ref -!CHECK: %[[Y_I_VAL:.*]] = fir.load %[[Y_I_REF]] : !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator subroutine reduction_min_int(y) integer :: x, y(:) @@ -80,7 +140,6 @@ subroutine reduction_min_real(y) !$omp parallel !$omp do reduction(min:x) do i=1, 100 - !CHECK-NOT: omp.reduction if (y(i) .gt. x) x = y(i) end do !$omp end do diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 index 7dc8aeeb85592b..4774fba3f33e96 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 @@ -1,68 +1,76 @@ ! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s ! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_F64_NAME:.*]] : f64 init { -!CHECK: ^bb0(%{{.*}}: f64): -!CHECK: %[[C0_1:.*]] = arith.constant 1.000000e+00 : f64 -!CHECK: omp.yield(%[[C0_1]] : f64) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: f64, %[[ARG1:.*]]: f64): -!CHECK: %[[RES:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] {{.*}}: f64 -!CHECK: omp.yield(%[[RES]] : f64) -!CHECK: } -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init { -!CHECK: ^bb0(%{{.*}}: i64): -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i64 -!CHECK: omp.yield(%[[C1_1]] : i64) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64): -!CHECK: %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i64 -!CHECK: omp.yield(%[[RES]] : i64) -!CHECK: } +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_F32_NAME:.*]] : f32 init { -!CHECK: ^bb0(%{{.*}}: f32): -!CHECK: %[[C0_1:.*]] = arith.constant 1.000000e+00 : f32 -!CHECK: omp.yield(%[[C0_1]] : f32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): -!CHECK: %[[RES:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] {{.*}}: f32 -!CHECK: omp.yield(%[[RES]] : f32) -!CHECK: } +! CHECK-LABEL: omp.reduction.declare @multiply_reduction_f_64 : f64 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: f64): +! CHECK: %[[VAL_1:.*]] = arith.constant 1.000000e+00 : f64 +! CHECK: omp.yield(%[[VAL_1]] : f64) -!CHECK-LABEL: omp.reduction.declare -!CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init { -!CHECK: ^bb0(%{{.*}}: i32): -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: omp.yield(%[[C1_1]] : i32) -!CHECK: } combiner { -!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -!CHECK: %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32 -!CHECK: omp.yield(%[[RES]] : i32) -!CHECK: } +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: f64, %[[VAL_1:.*]]: f64): +! CHECK: %[[VAL_2:.*]] = arith.mulf %[[VAL_0]], %[[VAL_1]] fastmath : f64 +! CHECK: omp.yield(%[[VAL_2]] : f64) +! CHECK: } -!CHECK-LABEL: func.func @_QPsimple_int_reduction -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: hlfir.assign %[[C1_2]] to %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C10:.*]] = arith.constant 10 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[X_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: omp.reduction.declare @multiply_reduction_i_64 : i64 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i64): +! CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +! CHECK: omp.yield(%[[VAL_1]] : i64) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i64, %[[VAL_1:.*]]: i64): +! CHECK: %[[VAL_2:.*]] = arith.muli %[[VAL_0]], %[[VAL_1]] : i64 +! CHECK: omp.yield(%[[VAL_2]] : i64) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @multiply_reduction_f_32 : f32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32): +! CHECK: %[[VAL_1:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: omp.yield(%[[VAL_1]] : f32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32): +! CHECK: %[[VAL_2:.*]] = arith.mulf %[[VAL_0]], %[[VAL_1]] fastmath : f32 +! CHECK: omp.yield(%[[VAL_2]] : f32) +! CHECK: } + +! CHECK-LABEL: omp.reduction.declare @multiply_reduction_i_32 : i32 init { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32): +! CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32 +! CHECK: omp.yield(%[[VAL_1]] : i32) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +! CHECK: %[[VAL_2:.*]] = arith.muli %[[VAL_0]], %[[VAL_1]] : i32 +! CHECK: omp.yield(%[[VAL_2]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPsimple_int_reduction() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reductionEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reductionEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32 +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return subroutine simple_int_reduction integer :: x @@ -76,25 +84,31 @@ subroutine simple_int_reduction !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_real_reduction -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32 -!CHECK: hlfir.assign %[[C0_2]] to %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 10 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[X_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_real_reduction() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reductionEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reductionEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> f32 +! CHECK: %[[VAL_16:.*]] = arith.mulf %[[VAL_13]], %[[VAL_15]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine simple_real_reduction real :: x x = 1.0 @@ -107,24 +121,30 @@ subroutine simple_real_reduction !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: hlfir.assign %[[C1_2]] to %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C10:.*]] = arith.constant 10 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[X_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_int_reduction_switch_order() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_int_reduction_switch_orderEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_int_reduction_switch_orderEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_int_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_14]] : i32 +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine simple_int_reduction_switch_order integer :: x x = 1 @@ -137,25 +157,31 @@ subroutine simple_int_reduction_switch_order !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_2:.*]] = arith.constant 1.000000e+00 : f32 -!CHECK: hlfir.assign %[[C1_2]] to %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 10 : i32 -!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[X_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPsimple_real_reduction_switch_order() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_real_reduction_switch_orderEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFsimple_real_reduction_switch_orderEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_4]] to %[[VAL_3]]#0 : f32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFsimple_real_reduction_switch_orderEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f32 +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_16:.*]] = arith.mulf %[[VAL_14]], %[[VAL_15]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : f32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine simple_real_reduction_switch_order real :: x x = 1.0 @@ -168,27 +194,48 @@ subroutine simple_real_reduction_switch_order !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[X_DECL]]#0 : !fir.ref, @[[RED_I32_NAME]] -> %[[Y_DECL]]#0 : !fir.ref, @[[RED_I32_NAME]] -> %[[Z_DECL]]#0 : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[Y_DECL]]#0 : i32, !fir.ref -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[Z_DECL]]#0 : i32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_int_reductions_same_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_int_reductions_same_typeEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_int_reductions_same_typeEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : i32, !fir.ref +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : i32, !fir.ref +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 +! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : i32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_int_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref +! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_int_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref +! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_23]], %[[VAL_24]] : i32 +! CHECK: hlfir.assign %[[VAL_25]] to %[[VAL_20]]#0 : i32, !fir.ref +! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref +! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_28:.*]] = arith.muli %[[VAL_26]], %[[VAL_27]] : i32 +! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_21]]#0 : i32, !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_31:.*]] = arith.muli %[[VAL_29]], %[[VAL_30]] : i32 +! CHECK: hlfir.assign %[[VAL_31]] to %[[VAL_22]]#0 : i32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine multiple_int_reductions_same_type integer :: x,y,z x = 1 @@ -205,30 +252,51 @@ subroutine multiple_int_reductions_same_type !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type -!CHECK: %[[XREF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[X_DECL]]#0 : !fir.ref, @[[RED_F32_NAME]] -> %[[Y_DECL]]#0 : !fir.ref, @[[RED_F32_NAME]] -> %[[Z_DECL]]#0 : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL1_F32]], %[[X_DECL]]#0 : f32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL2_F32]], %[[Y_DECL]]#0 : f32, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[Z_DECL]]#0 : f32, !fir.ref -!CHECK: omp.yield -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_real_reductions_same_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_real_reductions_same_typeEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFmultiple_real_reductions_same_typeEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_8:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_3]]#0 : f32, !fir.ref +! CHECK: %[[VAL_9:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_5]]#0 : f32, !fir.ref +! CHECK: %[[VAL_10:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : f32, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFmultiple_real_reductions_same_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref +! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFmultiple_real_reductions_same_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_20]]#0 : !fir.ref +! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i32) -> f32 +! CHECK: %[[VAL_26:.*]] = arith.mulf %[[VAL_23]], %[[VAL_25]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_20]]#0 : f32, !fir.ref +! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_21]]#0 : !fir.ref +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (i32) -> f32 +! CHECK: %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_21]]#0 : f32, !fir.ref +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref +! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> f32 +! CHECK: %[[VAL_34:.*]] = arith.mulf %[[VAL_31]], %[[VAL_33]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_22]]#0 : f32, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + subroutine multiple_real_reductions_same_type real :: x,y,z x = 1 @@ -245,33 +313,61 @@ subroutine multiple_real_reductions_same_type !$omp end parallel end subroutine -!CHECK-LABEL: func.func @_QPmultiple_reductions_different_type -!CHECK: %[[WREF:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"} -!CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[WREF]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"} -!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[XREF]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} -!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[YREF]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} -!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[ZREF]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[X_DECL]]#0 : !fir.ref, @[[RED_I64_NAME]] -> %[[Y_DECL]]#0 : !fir.ref, @[[RED_F32_NAME]] -> %[[Z_DECL]]#0 : !fir.ref, @[[RED_F64_NAME]] -> %[[W_DECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1_I32]], %[[X_DECL]]#0 : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64 -!CHECK: omp.reduction %[[I_PVT_VAL2_I64]], %[[Y_DECL]]#0 : i64, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 -!CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[Z_DECL]]#0 : f32, !fir.ref -!CHECK: %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64 -!CHECK: omp.reduction %[[I_PVT_VAL4_F64]], %[[W_DECL]]#0 : f64, !fir.ref -!CHECK: omp.terminator -!CHECK: return +! CHECK-LABEL: func.func @_QPmultiple_reductions_different_type() { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFmultiple_reductions_different_typeEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca f64 {bindc_name = "w", uniq_name = "_QFmultiple_reductions_different_typeEw"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductions_different_typeEx"} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_8:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 +! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_5]]#0 : i32, !fir.ref +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i64 +! CHECK: hlfir.assign %[[VAL_11]] to %[[VAL_7]]#0 : i64, !fir.ref +! CHECK: %[[VAL_12:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_9]]#0 : f32, !fir.ref +! CHECK: %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f64 +! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_3]]#0 : f64, !fir.ref +! CHECK: omp.parallel { +! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFmultiple_reductions_different_typeEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_17:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) for (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { +! CHECK: fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref +! CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_21]] {uniq_name = "_QFmultiple_reductions_different_typeEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFmultiple_reductions_different_typeEw"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_24]]#0 : !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_30:.*]] = arith.muli %[[VAL_28]], %[[VAL_29]] : i32 +! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_24]]#0 : i32, !fir.ref +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref +! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> i64 +! CHECK: %[[VAL_34:.*]] = arith.muli %[[VAL_31]], %[[VAL_33]] : i64 +! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_25]]#0 : i64, !fir.ref +! CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_26]]#0 : !fir.ref +! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> f32 +! CHECK: %[[VAL_38:.*]] = arith.mulf %[[VAL_35]], %[[VAL_37]] fastmath : f32 +! CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_26]]#0 : f32, !fir.ref +! CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_27]]#0 : !fir.ref +! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> f64 +! CHECK: %[[VAL_42:.*]] = arith.mulf %[[VAL_39]], %[[VAL_41]] fastmath : f64 +! CHECK: hlfir.assign %[[VAL_42]] to %[[VAL_27]]#0 : f64, !fir.ref +! CHECK: omp.yield +! CHECK: omp.terminator +! CHECK: return + + subroutine multiple_reductions_different_type integer :: x integer(kind=8) :: y diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index c7a32de256e2a5..0adf186ae0c7e9 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -525,13 +525,11 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, accumulator variables in `reduction_vars` and symbols referring to reduction declarations in the `reductions` attribute. Each reduction is identified by the accumulator it uses and accumulators must not be repeated in the same - reduction. The `omp.reduction` operation accepts the accumulator and a - partial value which is considered to be produced by the current loop - iteration for the given reduction. If multiple values are produced for the - same accumulator, i.e. there are multiple `omp.reduction`s, the last value - is taken. The reduction declaration specifies how to combine the values from - each iteration into the final value, which is available in the accumulator - after the loop completes. + reduction. A private variable corresponding to the accumulator is used in + place of the accumulator inside the body of the worksharing-loop. The + reduction declaration specifies how to combine the values from each + iteration into the final value, which is available in the accumulator after + the loop completes. The optional `schedule_val` attribute specifies the loop schedule for this loop, determining how the loop is distributed across the parallel threads. @@ -597,12 +595,9 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, |`nowait` $nowait |`ordered` `(` $ordered_val `)` |`order` `(` custom($order_val) `)` - |`reduction` `(` - custom( - $reduction_vars, type($reduction_vars), $reductions - ) `)` - ) `for` custom($region, $lowerBound, $upperBound, $step, - type($step), $inclusive) attr-dict + ) custom($region, $lowerBound, $upperBound, $step, + type($step), $reduction_vars, type($reduction_vars), $reductions, + $inclusive) attr-dict }]; let hasVerifier = 1; } diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 2f8b3f7e11de15..ea5f31ee8c6aa7 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -367,9 +367,11 @@ struct ParallelOpLowering : public OpRewritePattern { // TODO: consider checking it here is already a compatible reduction // declaration and use it instead of redeclaring. SmallVector reductionDeclSymbols; + SmallVector ompReductionDecls; auto reduce = cast(parallelOp.getBody()->getTerminator()); for (int64_t i = 0, e = parallelOp.getNumReductions(); i < e; ++i) { omp::ReductionDeclareOp decl = declareReduction(rewriter, reduce, i); + ompReductionDecls.push_back(decl); if (!decl) return failure(); reductionDeclSymbols.push_back( @@ -398,11 +400,39 @@ struct ParallelOpLowering : public OpRewritePattern { // Replace the reduction operations contained in this loop. Must be done // here rather than in a separate pattern to have access to the list of // reduction variables. - for (auto [x, y] : - llvm::zip_equal(reductionVariables, reduce.getOperands())) { + for (auto [x, y, rD] : llvm::zip_equal( + reductionVariables, reduce.getOperands(), ompReductionDecls)) { OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(reduce); - rewriter.create(reduce.getLoc(), y, x); + Region &redRegion = rD.getReductionRegion(); + // The SCF dialect by definition contains only structured operations + // and hence the SCF reduction region will contain a single block. + // The ompReductionDecls region is a copy of the SCF reduction region + // and hence has the same property. + assert(redRegion.hasOneBlock() && + "expect reduction region to have one block"); + Value pvtRedVar = parallelOp.getRegion().addArgument(x.getType(), loc); + Value pvtRedVal = rewriter.create(reduce.getLoc(), + rD.getType(), pvtRedVar); + // Make a copy of the reduction combiner region in the body + mlir::OpBuilder builder(rewriter.getContext()); + builder.setInsertionPoint(reduce); + mlir::IRMapping mapper; + assert(redRegion.getNumArguments() == 2 && + "expect reduction region to have two arguments"); + mapper.map(redRegion.getArgument(0), pvtRedVal); + mapper.map(redRegion.getArgument(1), y); + for (auto &op : redRegion.getOps()) { + Operation *cloneOp = builder.clone(op, mapper); + if (auto yieldOp = dyn_cast(*cloneOp)) { + assert(yieldOp && yieldOp.getResults().size() == 1 && + "expect YieldOp in reduction region to return one result"); + Value redVal = yieldOp.getResults()[0]; + rewriter.create(loc, redVal, pvtRedVar); + rewriter.eraseOp(yieldOp); + break; + } + } } rewriter.eraseOp(reduce); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 849449f9127dd8..13fc01d58eced5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include +#include #include #include "mlir/Dialect/OpenMP/OpenMPOpsDialect.cpp.inc" @@ -459,17 +460,16 @@ parseReductionClause(OpAsmParser &parser, Region ®ion, return success(); } -static void printReductionClause(OpAsmPrinter &p, Operation *op, Region ®ion, - ValueRange operands, TypeRange types, - ArrayAttr reductionSymbols) { +static void printReductionClause(OpAsmPrinter &p, Operation *op, + ValueRange reductionArgs, ValueRange operands, + TypeRange types, ArrayAttr reductionSymbols) { p << "reduction("; - llvm::interleaveComma(llvm::zip_equal(reductionSymbols, operands, - region.front().getArguments(), types), - p, [&p](auto t) { - auto [sym, op, arg, type] = t; - p << sym << " " << op << " -> " << arg << " : " - << type; - }); + llvm::interleaveComma( + llvm::zip_equal(reductionSymbols, operands, reductionArgs, types), p, + [&p](auto t) { + auto [sym, op, arg, type] = t; + p << sym << " " << op << " -> " << arg << " : " << type; + }); p << ") "; } @@ -490,7 +490,8 @@ static void printParallelRegion(OpAsmPrinter &p, Operation *op, Region ®ion, ValueRange operands, TypeRange types, ArrayAttr reductionSymbols) { if (reductionSymbols) - printReductionClause(p, op, region, operands, types, reductionSymbols); + printReductionClause(p, op, region.front().getArguments(), operands, types, + reductionSymbols); p.printRegion(region, /*printEntryBlockArgs=*/false); } @@ -1158,6 +1159,84 @@ LogicalResult SingleOp::verify() { // WsLoopOp //===----------------------------------------------------------------------===// +/// loop-control ::= `(` ssa-id-list `)` `:` type `=` loop-bounds +/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps +/// steps := `step` `(`ssa-id-list`)` +ParseResult +parseWsLoop(OpAsmParser &parser, Region ®ion, + SmallVectorImpl &lowerBound, + SmallVectorImpl &upperBound, + SmallVectorImpl &steps, + SmallVectorImpl &loopVarTypes, + SmallVectorImpl &reductionOperands, + SmallVectorImpl &reductionTypes, ArrayAttr &reductionSymbols, + UnitAttr &inclusive) { + + // Parse an optional reduction clause + llvm::SmallVector privates; + bool hasReduction = succeeded( + parseReductionClause(parser, region, reductionOperands, reductionTypes, + reductionSymbols, privates)); + + if (parser.parseKeyword("for")) + return failure(); + + // Parse an opening `(` followed by induction variables followed by `)` + SmallVector ivs; + Type loopVarType; + if (parser.parseArgumentList(ivs, OpAsmParser::Delimiter::Paren) || + parser.parseColonType(loopVarType) || + // Parse loop bounds. + parser.parseEqual() || + parser.parseOperandList(lowerBound, ivs.size(), + OpAsmParser::Delimiter::Paren) || + parser.parseKeyword("to") || + parser.parseOperandList(upperBound, ivs.size(), + OpAsmParser::Delimiter::Paren)) + return failure(); + + if (succeeded(parser.parseOptionalKeyword("inclusive"))) + inclusive = UnitAttr::get(parser.getBuilder().getContext()); + + // Parse step values. + if (parser.parseKeyword("step") || + parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren)) + return failure(); + + // Now parse the body. + loopVarTypes = SmallVector(ivs.size(), loopVarType); + for (auto &iv : ivs) + iv.type = loopVarType; + + SmallVector regionArgs{ivs}; + if (hasReduction) + llvm::copy(privates, std::back_inserter(regionArgs)); + + return parser.parseRegion(region, regionArgs); +} + +void printWsLoop(OpAsmPrinter &p, Operation *op, Region ®ion, + ValueRange lowerBound, ValueRange upperBound, ValueRange steps, + TypeRange loopVarTypes, ValueRange reductionOperands, + TypeRange reductionTypes, ArrayAttr reductionSymbols, + UnitAttr inclusive) { + if (reductionSymbols) { + auto reductionArgs = + region.front().getArguments().drop_front(loopVarTypes.size()); + printReductionClause(p, op, reductionArgs, reductionOperands, + reductionTypes, reductionSymbols); + } + + p << " for "; + auto args = region.front().getArguments().drop_back(reductionOperands.size()); + p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound + << ") to (" << upperBound << ") "; + if (inclusive) + p << "inclusive "; + p << "step (" << steps << ") "; + p.printRegion(region, /*printEntryBlockArgs=*/false); +} + /// loop-control ::= `(` ssa-id-list `)` `:` type `=` loop-bounds /// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps /// steps := `step` `(`ssa-id-list`)` diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index c87e895bb5404c..78a2ad76a1e3b8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -786,17 +786,17 @@ allocReductionVars(T loop, llvm::IRBuilderBase &builder, SmallVector &reductionDecls, SmallVector &privateReductionVariables, DenseMap &reductionVariableMap) { - unsigned numReductions = loop.getNumReductionVars(); - privateReductionVariables.reserve(numReductions); - if (numReductions != 0) { - llvm::IRBuilderBase::InsertPointGuard guard(builder); - builder.restoreIP(allocaIP); - for (unsigned i = 0; i < numReductions; ++i) { - llvm::Value *var = builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType())); - privateReductionVariables.push_back(var); - reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); - } + llvm::IRBuilderBase::InsertPointGuard guard(builder); + builder.restoreIP(allocaIP); + auto args = + loop.getRegion().getArguments().take_back(loop.getNumReductionVars()); + + for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) { + llvm::Value *var = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); + moduleTranslation.mapValue(args[i], var); + privateReductionVariables.push_back(var); + reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); } } @@ -1018,19 +1018,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // Allocate reduction vars SmallVector privateReductionVariables; DenseMap reductionVariableMap; - { - llvm::IRBuilderBase::InsertPointGuard guard(builder); - builder.restoreIP(allocaIP); - auto args = opInst.getRegion().getArguments(); - - for (std::size_t i = 0; i < opInst.getNumReductionVars(); ++i) { - llvm::Value *var = builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType())); - moduleTranslation.mapValue(args[i], var); - privateReductionVariables.push_back(var); - reductionVariableMap.try_emplace(opInst.getReductionVars()[i], var); - } - } + allocReductionVars(opInst, builder, moduleTranslation, allocaIP, + reductionDecls, privateReductionVariables, + reductionVariableMap); // Store the mapping between reduction variables and their private copies on // ModuleTranslation stack. It can be then recovered when translating diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index 3fbeaebb592a4d..ae3bb6ccea7a8b 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -320,8 +320,11 @@ llvm.func @_QPsb() { // CHECK-LABEL: @_QPsimple_reduction // CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr // CHECK: omp.parallel -// CHECK: omp.wsloop reduction(@eqv_reduction -> %[[RED_ACCUMULATOR]] : !llvm.ptr) for -// CHECK: omp.reduction %{{.*}}, %[[RED_ACCUMULATOR]] : i32, !llvm.ptr +// CHECK: omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) for +// CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32 +// CHECK: %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32 +// CHECK: %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32 +// CHECK: llvm.store %[[ZEXT]], %[[PRV]] : i32, !llvm.ptr // CHECK: omp.yield // CHECK: omp.terminator // CHECK: llvm.return @@ -350,14 +353,17 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) { llvm.store %5, %4 : i32, !llvm.ptr omp.parallel { %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array, pinned} : (i64) -> !llvm.ptr - omp.wsloop reduction(@eqv_reduction -> %4 : !llvm.ptr) for (%arg1) : i32 = (%1) to (%0) inclusive step (%1) { + omp.wsloop reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) for (%arg1) : i32 = (%1) to (%0) inclusive step (%1) { llvm.store %arg1, %6 : i32, !llvm.ptr %7 = llvm.load %6 : !llvm.ptr -> i32 %8 = llvm.sext %7 : i32 to i64 %9 = llvm.sub %8, %3 : i64 %10 = llvm.getelementptr %arg0[0, %9] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<100 x i32> %11 = llvm.load %10 : !llvm.ptr -> i32 - omp.reduction %11, %4 : i32, !llvm.ptr + %12 = llvm.load %prv : !llvm.ptr -> i32 + %13 = llvm.icmp "eq" %11, %12 : i32 + %14 = llvm.zext %13 : i1 to i32 + llvm.store %14, %prv : i32, !llvm.ptr omp.yield } omp.terminator diff --git a/mlir/test/Conversion/SCFToOpenMP/reductions.mlir b/mlir/test/Conversion/SCFToOpenMP/reductions.mlir index faf5ec4aba7d4d..a6704644873f09 100644 --- a/mlir/test/Conversion/SCFToOpenMP/reductions.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/reductions.mlir @@ -27,13 +27,15 @@ func.func @reduction1(%arg0 : index, %arg1 : index, %arg2 : index, %zero = arith.constant 0.0 : f32 // CHECK: omp.parallel // CHECK: omp.wsloop - // CHECK-SAME: reduction(@[[$REDF]] -> %[[BUF]] + // CHECK-SAME: reduction(@[[$REDF]] %[[BUF]] -> %[[PVT_BUF:[a-z0-9]+]] // CHECK: memref.alloca_scope scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %step) init (%zero) -> (f32) { // CHECK: %[[CST_INNER:.*]] = arith.constant 1.0 %one = arith.constant 1.0 : f32 - // CHECK: omp.reduction %[[CST_INNER]], %[[BUF]] + // CHECK: %[[PVT_VAL:.*]] = llvm.load %[[PVT_BUF]] : !llvm.ptr -> f32 + // CHECK: %[[ADD_RESULT:.*]] = arith.addf %[[PVT_VAL]], %[[CST_INNER]] : f32 + // CHECK: llvm.store %[[ADD_RESULT]], %[[PVT_BUF]] : f32, !llvm.ptr scf.reduce(%one : f32) { ^bb0(%lhs : f32, %rhs: f32): %res = arith.addf %lhs, %rhs : f32 @@ -103,10 +105,15 @@ func.func @reduction_muli(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) { %step = arith.constant 1 : index %one = arith.constant 1 : i32 + // CHECK: %[[RED_VAR:.*]] = llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr + // CHECK: omp.wsloop reduction(@[[$REDI]] %[[RED_VAR]] -> %[[RED_PVT_VAR:.*]] : !llvm.ptr) scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %step) init (%one) -> (i32) { - // CHECK: omp.reduction + // CHECK: %[[C2:.*]] = arith.constant 2 : i32 %pow2 = arith.constant 2 : i32 + // CHECK: %[[RED_PVT_VAL:.*]] = llvm.load %[[RED_PVT_VAR]] : !llvm.ptr -> i32 + // CHECK: %[[MUL_RESULT:.*]] = arith.muli %[[RED_PVT_VAL]], %[[C2]] : i32 + // CHECK: llvm.store %[[MUL_RESULT]], %[[RED_PVT_VAR]] : i32, !llvm.ptr scf.reduce(%pow2 : i32) { ^bb0(%lhs : i32, %rhs: i32): %res = arith.muli %lhs, %rhs : i32 @@ -199,16 +206,23 @@ func.func @reduction4(%arg0 : index, %arg1 : index, %arg2 : index, // CHECK: omp.parallel // CHECK: omp.wsloop - // CHECK-SAME: reduction(@[[$REDF1]] -> %[[BUF1]] - // CHECK-SAME: @[[$REDF2]] -> %[[BUF2]] + // CHECK-SAME: reduction(@[[$REDF1]] %[[BUF1]] -> %[[PVT_BUF1:[a-z0-9]+]] + // CHECK-SAME: @[[$REDF2]] %[[BUF2]] -> %[[PVT_BUF2:[a-z0-9]+]] // CHECK: memref.alloca_scope %res:2 = scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %step) init (%zero, %ione) -> (f32, i64) { + // CHECK: %[[CST_ONE:.*]] = arith.constant 1.0{{.*}} : f32 %one = arith.constant 1.0 : f32 - // CHECK: arith.fptosi + // CHECK: %[[CST_INT_ONE:.*]] = arith.fptosi %1 = arith.fptosi %one : f32 to i64 - // CHECK: omp.reduction %{{.*}}, %[[BUF1]] - // CHECK: omp.reduction %{{.*}}, %[[BUF2]] + // CHECK: %[[PVT_VAL1:.*]] = llvm.load %[[PVT_BUF1]] : !llvm.ptr -> f32 + // CHECK: %[[TEMP1:.*]] = arith.cmpf oge, %[[PVT_VAL1]], %[[CST_ONE]] : f32 + // CHECK: %[[CMP_VAL1:.*]] = arith.select %[[TEMP1]], %[[PVT_VAL1]], %[[CST_ONE]] : f32 + // CHECK: llvm.store %[[CMP_VAL1]], %[[PVT_BUF1]] : f32, !llvm.ptr + // CHECK: %[[PVT_VAL2:.*]] = llvm.load %[[PVT_BUF2]] : !llvm.ptr -> i64 + // CHECK: %[[TEMP2:.*]] = arith.cmpi slt, %[[PVT_VAL2]], %[[CST_INT_ONE]] : i64 + // CHECK: %[[CMP_VAL2:.*]] = arith.select %[[TEMP2]], %[[CST_INT_ONE]], %[[PVT_VAL2]] : i64 + // CHECK: llvm.store %[[CMP_VAL2]], %[[PVT_BUF2]] : i64, !llvm.ptr scf.reduce(%one, %1 : f32, i64) { ^bb0(%lhs : f32, %rhs: f32): %cmp = arith.cmpf oge, %lhs, %rhs : f32 diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 1c1b6ea58e02ee..523a4038b7c32a 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -436,42 +436,13 @@ atomic { // ----- -omp.reduction.declare @add_f32 : f32 -init { -^bb0(%arg: f32): - %0 = arith.constant 0.0 : f32 - omp.yield (%0 : f32) -} -combiner { -^bb1(%arg0: f32, %arg1: f32): - %1 = arith.addf %arg0, %arg1 : f32 - omp.yield (%1 : f32) -} - -func.func @foo(%lb : index, %ub : index, %step : index) { - %c1 = arith.constant 1 : i32 - %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr - %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr - - omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr) - for (%iv) : index = (%lb) to (%ub) step (%step) { - %2 = arith.constant 2.0 : f32 - // expected-error @below {{accumulator is not used by the parent}} - omp.reduction %2, %1 : f32, !llvm.ptr - omp.yield - } - return -} - -// ----- - func.func @foo(%lb : index, %ub : index, %step : index) { %c1 = arith.constant 1 : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr // expected-error @below {{expected symbol reference @foo to point to a reduction declaration}} - omp.wsloop reduction(@foo -> %0 : !llvm.ptr) + omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) for (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %1 : f32, !llvm.ptr @@ -499,7 +470,7 @@ func.func @foo(%lb : index, %ub : index, %step : index) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr // expected-error @below {{accumulator variable used more than once}} - omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr, @add_f32 -> %0 : !llvm.ptr) + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) for (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %0 : f32, !llvm.ptr @@ -532,7 +503,7 @@ func.func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) { %c1 = arith.constant 1 : i32 // expected-error @below {{expected accumulator ('memref<1xf32>') to be the same type as reduction declaration ('!llvm.ptr')}} - omp.wsloop reduction(@add_f32 -> %mem : memref<1xf32>) + omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) for (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %mem : f32, memref<1xf32> diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 3bb4a288376ede..99ca802089b8d3 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -625,12 +625,17 @@ atomic { func.func @wsloop_reduction(%lb : index, %ub : index, %step : index) { %c1 = arith.constant 1 : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr - // CHECK: reduction(@add_f32 -> %{{.+}} : !llvm.ptr) - omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr) + // CHECK: reduction(@add_f32 %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) for (%iv) : index = (%lb) to (%ub) step (%step) { - %1 = arith.constant 2.0 : f32 - // CHECK: omp.reduction %{{.+}}, %{{.+}} - omp.reduction %1, %0 : f32, !llvm.ptr + // CHECK: %[[CST:.+]] = arith.constant 2.0{{.*}} : f32 + %cst = arith.constant 2.0 : f32 + // CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> f32 + %lprv = llvm.load %prv : !llvm.ptr -> f32 + // CHECK: %[[RES:.+]] = llvm.fadd %[[LPRV]], %[[CST]] : f32 + %res = llvm.fadd %lprv, %cst: f32 + // CHECK: llvm.store %[[RES]], %[[PRV]] : f32, !llvm.ptr + llvm.store %res, %prv : f32, !llvm.ptr omp.yield } return @@ -788,12 +793,15 @@ combiner { // CHECK-LABEL: func @wsloop_reduction2 func.func @wsloop_reduction2(%lb : index, %ub : index, %step : index) { %0 = memref.alloca() : memref<1xf32> - // CHECK: omp.wsloop reduction(@add2_f32 -> %{{.+}} : memref<1xf32>) - omp.wsloop reduction(@add2_f32 -> %0 : memref<1xf32>) + // CHECK: omp.wsloop reduction(@add2_f32 %{{.+}} -> %{{.+}} : memref<1xf32>) + omp.wsloop reduction(@add2_f32 %0 -> %prv : memref<1xf32>) for (%iv) : index = (%lb) to (%ub) step (%step) { %1 = arith.constant 2.0 : f32 - // CHECK: omp.reduction - omp.reduction %1, %0 : f32, memref<1xf32> + %2 = arith.constant 0 : index + %3 = memref.load %prv[%2] : memref<1xf32> + // CHECK: llvm.fadd + %4 = llvm.fadd %1, %3 : f32 + memref.store %4, %prv[%2] : memref<1xf32> omp.yield } return diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir index dae83c0cf92ed8..8c3c9cd1aa26b8 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir @@ -26,10 +26,12 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) { %c1 = llvm.mlir.constant(1 : i32) : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr) + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) for (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 - omp.reduction %1, %0 : f32, !llvm.ptr + %2 = llvm.load %prv : !llvm.ptr -> f32 + %3 = llvm.fadd %1, %2 : f32 + llvm.store %3, %prv : f32, !llvm.ptr omp.yield } omp.terminator @@ -67,7 +69,7 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) { // Update of the private variable using the reduction region // (the body block currently comes after all the other blocks). // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] -// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00 +// CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]] // CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]] // Reduction function. @@ -103,11 +105,15 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr, @add_f32 -> %2 : !llvm.ptr) + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) for (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 - omp.reduction %1, %0 : f32, !llvm.ptr - omp.reduction %1, %2 : f32, !llvm.ptr + %3 = llvm.load %prv0 : !llvm.ptr -> f32 + %4 = llvm.fadd %3, %1 : f32 + llvm.store %4, %prv0 : f32, !llvm.ptr + %5 = llvm.load %prv1 : !llvm.ptr -> f32 + %6 = llvm.fadd %5, %1 : f32 + llvm.store %6, %prv1 : f32, !llvm.ptr omp.yield } omp.terminator @@ -189,10 +195,12 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr, @add_f32 -> %2 : !llvm.ptr) + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) for (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 - omp.reduction %1, %0 : f32, !llvm.ptr + %3 = llvm.load %prv0 : !llvm.ptr -> f32 + %4 = llvm.fadd %3, %1 : f32 + llvm.store %4, %prv0 : f32, !llvm.ptr omp.yield } omp.terminator @@ -272,11 +280,15 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) { %c1 = llvm.mlir.constant(1 : i32) : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr) + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) for (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 - omp.reduction %1, %0 : f32, !llvm.ptr - omp.reduction %1, %0 : f32, !llvm.ptr + %2 = llvm.load %prv : !llvm.ptr -> f32 + %3 = llvm.fadd %2, %1 : f32 + llvm.store %3, %prv : f32, !llvm.ptr + %4 = llvm.load %prv : !llvm.ptr -> f32 + %5 = llvm.fadd %4, %1 : f32 + llvm.store %5, %prv : f32, !llvm.ptr omp.yield } omp.terminator @@ -362,11 +374,15 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr, @mul_f32 -> %2 : !llvm.ptr) + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr) for (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 - omp.reduction %1, %0 : f32, !llvm.ptr - omp.reduction %1, %2 : f32, !llvm.ptr + %3 = llvm.load %prv0 : !llvm.ptr -> f32 + %4 = llvm.fadd %3, %1 : f32 + llvm.store %4, %prv0 : f32, !llvm.ptr + %5 = llvm.load %prv1 : !llvm.ptr -> f32 + %6 = llvm.fmul %5, %1 : f32 + llvm.store %6, %prv1 : f32, !llvm.ptr omp.yield } omp.terminator From 3985eda8ee729b9fc36aa1b1ef6afa716df5bbe9 Mon Sep 17 00:00:00 2001 From: Richard Dzenis Date: Tue, 13 Feb 2024 21:19:49 +0200 Subject: [PATCH 045/240] [docs] Update links for C++ compiler supported features in CodingStandards (#81587) --- llvm/docs/CodingStandards.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/docs/CodingStandards.rst b/llvm/docs/CodingStandards.rst index 7ba20c09d04f68..63df5af2523db6 100644 --- a/llvm/docs/CodingStandards.rst +++ b/llvm/docs/CodingStandards.rst @@ -73,7 +73,10 @@ Each toolchain provides a good reference for what it accepts: * libstdc++: https://gcc.gnu.org/onlinedocs/libstdc++/manual/status.html#status.iso.2017 -* MSVC: https://msdn.microsoft.com/en-us/library/hh567368.aspx +* MSVC: https://learn.microsoft.com/cpp/overview/visual-cpp-language-conformance + +Additionally, there are compiler comparison tables of supported C++ features on +`cppreference.com `_. C++ Standard Library From 1b65742f8c71f576381fe85d5e34579b24f2d874 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 13 Feb 2024 11:22:49 -0800 Subject: [PATCH 046/240] [SeparateConstOffsetFromGEP] Reorder trivial GEP chains to separate constants (#73056) In this case, a trivial GEP chain has the form: ``` %ptr = getelementptr sameType, %base, constant %val = getelementptr sameType, %ptr, %variable ``` That is, a one-index GEP consumes another (of the same basis and result type) one-index GEP, where the inner GEP uses a constant index and the outer GEP uses a variable index. For chains of this type, it is trivial to reorder them (by simply swapping the indexes). The result of doing so is better AddrMode matching for users of the ultimate ptr produced by GEP chain. Future patches can extend this to support non-trivial GEP chains (e.g. those with different basis types and/or multiple indices). --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 73 ++++- ...ne-sink-temporal-divergence-swdev407790.ll | 34 ++- .../AMDGPU/splitkit-getsubrangeformask.ll | 251 ++++++++---------- llvm/test/CodeGen/PowerPC/licm-remat.ll | 2 +- .../AMDGPU/reorder-gep-inbounds.ll | 51 ++++ .../AMDGPU/reorder-gep.ll | 175 ++++++++++++ .../NVPTX/lower-gep-reorder.ll | 65 +++++ .../SeparateConstOffsetFromGEP/reorder-gep.ll | 188 +++++++++++++ 8 files changed, 682 insertions(+), 157 deletions(-) create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4481375054ecf1..5124909696aadb 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -391,6 +391,11 @@ class SeparateConstOffsetFromGEP { /// and returns true if the splitting succeeds. bool splitGEP(GetElementPtrInst *GEP); + /// Tries to reorder the given GEP with the GEP that produces the base if + /// doing so results in producing a constant offset as the outermost + /// index. + bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI); + /// Lower a GEP with multiple indices into multiple GEPs with a single index. /// Function splitGEP already split the original GEP into a variadic part and /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the @@ -964,6 +969,66 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, Variadic->eraseFromParent(); } +bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, + TargetTransformInfo &TTI) { + Type *GEPType = GEP->getResultElementType(); + // TODO: support reordering for non-trivial GEP chains + if (GEPType->isAggregateType() || GEP->getNumIndices() != 1) + return false; + + auto PtrGEP = dyn_cast(GEP->getPointerOperand()); + if (!PtrGEP) + return false; + Type *PtrGEPType = PtrGEP->getResultElementType(); + // TODO: support reordering for non-trivial GEP chains + if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1) + return false; + + // TODO: support reordering for non-trivial GEP chains + if (PtrGEPType != GEPType || + PtrGEP->getSourceElementType() != GEP->getSourceElementType()) + return false; + + bool NestedNeedsExtraction; + int64_t NestedByteOffset = + accumulateByteOffset(PtrGEP, NestedNeedsExtraction); + if (!NestedNeedsExtraction) + return false; + + unsigned AddrSpace = PtrGEP->getPointerAddressSpace(); + if (!TTI.isLegalAddressingMode(GEP->getResultElementType(), + /*BaseGV=*/nullptr, NestedByteOffset, + /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) + return false; + + IRBuilder<> Builder(GEP); + Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); + bool GEPInBounds = GEP->isInBounds(); + bool PtrGEPInBounds = PtrGEP->isInBounds(); + bool IsChainInBounds = GEPInBounds && PtrGEPInBounds; + if (IsChainInBounds) { + auto GEPIdx = GEP->indices().begin(); + auto KnownGEPIdx = computeKnownBits(GEPIdx->get(), *DL); + IsChainInBounds &= KnownGEPIdx.isNonNegative(); + if (IsChainInBounds) { + auto PtrGEPIdx = GEP->indices().begin(); + auto KnownPtrGEPIdx = computeKnownBits(PtrGEPIdx->get(), *DL); + IsChainInBounds &= KnownPtrGEPIdx.isNonNegative(); + } + } + + // For trivial GEP chains, we can swap the indicies. + auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(), + SmallVector(GEP->indices())); + cast(NewSrc)->setIsInBounds(IsChainInBounds); + auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, + SmallVector(PtrGEP->indices())); + cast(NewGEP)->setIsInBounds(IsChainInBounds); + GEP->replaceAllUsesWith(NewGEP); + RecursivelyDeleteTriviallyDeadInstructions(GEP); + return true; +} + bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Skip vector GEPs. if (GEP->getType()->isVectorTy()) @@ -979,11 +1044,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { bool NeedsExtraction; int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); - if (!NeedsExtraction) - return Changed; - TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); + if (!NeedsExtraction) { + Changed |= reorderGEP(GEP, TTI); + return Changed; + } + // If LowerGEP is disabled, before really splitting the GEP, check whether the // backend supports the addressing mode we are about to produce. If no, this // splitting probably won't be beneficial. diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 138a6a86cee984..0bb5288f43efc8 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -273,11 +273,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v58 ; CHECK-NEXT: s_branch .LBB0_7 -; CHECK-NEXT: .LBB0_16: ; %Flow43 +; CHECK-NEXT: .LBB0_16: ; %Flow45 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53 ; CHECK-NEXT: v_mov_b32_e32 v57, v0 -; CHECK-NEXT: .LBB0_17: ; %Flow44 +; CHECK-NEXT: .LBB0_17: ; %Flow46 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: s_mov_b32 s49, exec_lo @@ -323,11 +323,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v57 ; CHECK-NEXT: s_branch .LBB0_19 -; CHECK-NEXT: .LBB0_22: ; %Flow41 +; CHECK-NEXT: .LBB0_22: ; %Flow43 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 -; CHECK-NEXT: .LBB0_23: ; %Flow42 +; CHECK-NEXT: .LBB0_23: ; %Flow44 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s49 ; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1 @@ -340,7 +340,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_or_b32 s43, s4, s43 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s43 ; CHECK-NEXT: s_cbranch_execnz .LBB0_5 -; CHECK-NEXT: .LBB0_25: ; %Flow49 +; CHECK-NEXT: .LBB0_25: ; %Flow51 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s42 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 @@ -362,12 +362,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v41 ; CHECK-NEXT: s_cbranch_execz .LBB0_33 ; CHECK-NEXT: ; %bb.26: -; CHECK-NEXT: s_add_u32 s42, s44, 8 -; CHECK-NEXT: s_addc_u32 s43, s45, 0 -; CHECK-NEXT: s_mov_b32 s44, 0 +; CHECK-NEXT: s_mov_b32 s42, 0 ; CHECK-NEXT: s_branch .LBB0_28 ; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s43 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_add_u32 s8, s34, 40 @@ -383,12 +381,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7] ; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41 -; CHECK-NEXT: s_or_b32 s44, vcc_lo, s44 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44 +; CHECK-NEXT: s_or_b32 s42, vcc_lo, s42 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s42 ; CHECK-NEXT: s_cbranch_execz .LBB0_33 ; CHECK-NEXT: .LBB0_28: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41 -; CHECK-NEXT: s_mov_b32 s45, exec_lo +; CHECK-NEXT: s_mov_b32 s43, exec_lo ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0 @@ -397,15 +395,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62 ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72 -; CHECK-NEXT: v_add_co_u32 v2, s4, s42, v1 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s43, 0, s4 +; CHECK-NEXT: v_add_co_u32 v2, s4, s44, v1 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s45, 0, s4 ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 ; CHECK-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; CHECK-NEXT: s_clause 0x1 -; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:8 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_xor_b32_e32 v46, v9, v5 ; CHECK-NEXT: v_xor_b32_e32 v45, v8, v4 @@ -417,8 +415,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_cbranch_execz .LBB0_27 ; CHECK-NEXT: ; %bb.29: ; in Loop: Header=BB0_28 Depth=1 ; CHECK-NEXT: s_clause 0x1 -; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16 -; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16 +; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:24 +; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:24 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v45 ; CHECK-NEXT: v_alignbit_b32 v1, v46, v45, 12 ; CHECK-NEXT: v_and_b32_e32 v2, 0xf0000, v45 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index 8c806e76bde6ec..b87439a9d6fae7 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -31,205 +31,188 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10 ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8 ; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: KILL undef %125:sgpr_128 ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc + ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc + ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc + ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: KILL undef %74:sreg_64 - ; CHECK-NEXT: KILL undef %132:sgpr_128 ; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: KILL undef %89:sgpr_128 - ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc - ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc + ; CHECK-NEXT: KILL undef %118:sgpr_128 ; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY6]], 64, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 0, 0 :: (invariant load (s128) from %ir.87, addrspace 4) - ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY7]].sub0, 224, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY7]].sub0, 576, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4) + ; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1 + ; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1 + ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %312:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %301:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %367:sgpr_128, undef %368:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %378:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 0, 0 :: (invariant load (s128) from %ir.92, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 0, 0 :: (invariant load (s128) from %ir.97, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.104, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.109, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.114, addrspace 4) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %356:sgpr_128, undef %357:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %367:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %373:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %351:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 0, 0 :: (invariant load (s128) from %ir.121, addrspace 4) - ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %394:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.126, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.127, addrspace 4) + ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %383:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.132, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.137, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -313, implicit-def dead $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4) ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], 160, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_25:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_24]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_25:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_26:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_24]], [[S_LSHL_B32_4]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_26:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_27:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 168, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.147, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_28:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_5]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_28:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_28]], 0, 0 :: (invariant load (s32) from %ir.269, align 8, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.154, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 0, 0 :: (invariant load (s128) from %ir.159, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 0, 0 :: (invariant load (s128) from %ir.167, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4) ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1 ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_18]], 0, 0 :: (invariant load (s128) from %ir.175, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_19]], 0, 0 :: (invariant load (s128) from %ir.180, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_29:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_6]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_29:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_29]], 0, 0 :: (invariant load (s64) from %ir.277, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_20]], 0, 0 :: (invariant load (s128) from %ir.202, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_21]], 0, 0 :: (invariant load (s128) from %ir.208, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 0, 0 :: (invariant load (s128) from %ir.213, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4) ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 0, 0 :: (invariant load (s128) from %ir.218, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_30:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_7]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_30:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_30]], 0, 0 :: (invariant load (s64) from %ir.287, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0 ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_25]], 0, 0 :: (invariant load (s128) from %ir.253, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %484:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: KILL [[S_ADD_U32_25]].sub0, [[S_ADD_U32_25]].sub1 - ; CHECK-NEXT: KILL undef %484:sreg_64 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %469:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1 + ; CHECK-NEXT: KILL undef %469:sreg_64 ; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3 ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_26]], 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_31:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_8]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_31:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_31]], 0, 0 :: (invariant load (s32) from %ir.298, align 8, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] @@ -241,24 +224,22 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_32:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 96, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_33:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_33:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_33]], 0, 0 :: (invariant load (s128) from %ir.316, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_34:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_34:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_34]], 0, 0 :: (invariant load (s128) from %ir.321, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_35:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_35:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_35]], 0, 0 :: (invariant load (s128) from %ir.326, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.329, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.335, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] - ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] + ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec @@ -370,13 +351,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec - ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %559:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %542:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec ; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec ; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 [[V_CNDMASK_B32_e64_]], undef %573:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) + ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 [[V_CNDMASK_B32_e64_]], undef %556:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/CodeGen/PowerPC/licm-remat.ll b/llvm/test/CodeGen/PowerPC/licm-remat.ll index ffdaf5d2481e3b..cf3e3ac089a498 100644 --- a/llvm/test/CodeGen/PowerPC/licm-remat.ll +++ b/llvm/test/CodeGen/PowerPC/licm-remat.ll @@ -21,7 +21,7 @@ define linkonce_odr void @ZN6snappyDecompressor_(ptr %this, ptr %writer) { ; CHECK-LABEL: ZN6snappyDecompressor_: ; CHECK: # %bb.0: # %entry ; CHECK: addis 4, 2, .L__ModuleStringPool@toc@ha -; CHECK: addi 25, 4, .L__ModuleStringPool@toc@l +; CHECK: addi 26, 4, .L__ModuleStringPool@toc@l ; CHECK: .LBB0_2: # %for.cond ; CHECK-NOT: addis {{[0-9]+}}, 2, .L__ModuleStringPool@toc@ha ; CHECK: bctrl diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll new file mode 100644 index 00000000000000..c24bbd5f658f94 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -S -passes=separate-const-offset-from-gep < %s | FileCheck %s + +define void @inboundsPossiblyNegative(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsPossiblyNegative( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1 + ret void +} + +define void @inboundsNonNegative(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegative( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i32 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1.nneg + ret void +} + +define void @inboundsNonchained(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonchained( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i32 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 + %idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll new file mode 100644 index 00000000000000..7137f0fb66fdb9 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --start-before=separate-const-offset-from-gep < %s | FileCheck %s + +define protected amdgpu_kernel void @sink_addr(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: sink_addr: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_lshl_b32 s3, s1, 1 +; CHECK-NEXT: s_add_i32 s0, s0, s3 +; CHECK-NEXT: s_lshl_b32 s2, s2, 1 +; CHECK-NEXT: s_add_i32 s0, s0, s2 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v12 +; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512 +; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB0_2: ; %end +; CHECK-NEXT: s_add_i32 s1, s0, 0x200 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: s_add_i32 s2, s0, 0x400 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: s_add_i32 s3, s0, 0x600 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i32 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i32 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} + +define protected amdgpu_kernel void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: illegal_addr_mode: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_lshl_b32 s0, s5, 1 +; CHECK-NEXT: s_lshl_b32 s1, s6, 1 +; CHECK-NEXT: s_add_i32 s3, s4, s0 +; CHECK-NEXT: s_add_i32 s3, s3, s1 +; CHECK-NEXT: s_add_i32 s2, s3, 0x12a60 +; CHECK-NEXT: s_add_i32 s1, s3, 0x12c60 +; CHECK-NEXT: s_add_i32 s0, s3, 0x12ed8 +; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_mov_b32_e32 v4, s2 +; CHECK-NEXT: v_mov_b32_e32 v8, s1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v0 +; CHECK-NEXT: ds_read_b128 v[4:7], v4 +; CHECK-NEXT: ds_read_b128 v[8:11], v8 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB1_2: ; %end +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i32 38192 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i32 38448 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i32 38764 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll new file mode 100644 index 00000000000000..c46f4e79ba432a --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -mtriple=nvptx64-nvidia-cuda -S -passes=separate-const-offset-from-gep < %s | FileCheck %s + +define protected amdgpu_kernel void @sink_addr(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define protected amdgpu_kernel void @sink_addr( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i64 256 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i64 512 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 768 +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[TMP2]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[TMP5]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[TMP8]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %idx0 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 %in.idx1 + %const1 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 256 + %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 + %const2 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 512 + %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 + %const3 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 768 + %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll new file mode 100644 index 00000000000000..a15f11a634db5d --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=separate-const-offset-from-gep < %s | FileCheck %s + +define void @illegal_addr_mode(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @illegal_addr_mode( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i64, ptr [[BASE]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i64, ptr [[BASE]], i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i64, ptr [[BASE]], i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i64, ptr [[BASE]], i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr i64, ptr %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i64, ptr %base, i64 %in.idx1 + %const1 = getelementptr i64, ptr %base, i64 256 + %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 + %const2 = getelementptr i64, ptr %base, i64 512 + %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 + %const3 = getelementptr i64, ptr %base, i64 768 + %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} + + +define void @multi_index_reorder(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @multi_index_reorder( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %idx0 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 %in.idx1 + %const1 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 256 + %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 + %const2 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 512 + %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 + %const3 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 768 + %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} + + +define void @different_type_reorder(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @different_type_reorder( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i64, ptr [[BASE]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i8, ptr [[BASE]], i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i8, ptr [[BASE]], i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i8, ptr [[BASE]], i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr i64, ptr %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i64, ptr %base, i64 %in.idx1 + %const1 = getelementptr i8, ptr %base, i64 256 + %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 + %const2 = getelementptr i8, ptr %base, i64 512 + %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 + %const3 = getelementptr i8, ptr %base, i64 768 + %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} From 3a48630a4b25d50abefd945742c247f17bd61156 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Tue, 13 Feb 2024 14:25:56 -0500 Subject: [PATCH 047/240] [Clang][Sema] Diagnose friend declarations with enum elaborated-type-specifier in all language modes (#80171) According to [dcl.type.elab] p4: > If an _elaborated-type-specifier_ appears with the `friend` specifier as an entire _member-declaration_, the _member-declaration_ shall have one of the following forms: > `friend` _class-key_ _nested-name-specifier_(opt) _identifier_ `;` > `friend` _class-key_ _simple-template-id_ `;` > `friend` _class-key_ _nested-name-specifier_ `template`(opt) _simple-template-id_ `;` Notably absent from this list is the `enum` form of an _elaborated-type-specifier_ "`enum` _nested-name-specifier_(opt) _identifier_", which appears to be intentional per the resolution of CWG2363. Most major implementations accept these declarations, so the diagnostic is a pedantic warning across all C++ versions. In addition to the trivial cases previously diagnosed in C++98, we now diagnose cases where the _elaborated-type-specifier_ has a dependent _nested-name-specifier_: ``` template struct A { enum class E; }; struct B { template friend enum A::E; // pedantic warning: elaborated enumeration type cannot be a friend }; template struct C { friend enum T::E; // pedantic warning: elaborated enumeration type cannot be a friend }; ``` --- clang/docs/ReleaseNotes.rst | 2 + .../clang/Basic/DiagnosticSemaKinds.td | 8 +- clang/include/clang/Sema/DeclSpec.h | 13 +- clang/include/clang/Sema/Sema.h | 3 - clang/lib/Parse/ParseTentative.cpp | 6 +- clang/lib/Sema/DeclSpec.cpp | 9 +- clang/lib/Sema/SemaDecl.cpp | 20 +++ clang/lib/Sema/SemaDeclCXX.cpp | 137 +++++------------- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 7 +- .../dcl.spec/dcl.type/dcl.type.elab/p3.cpp | 8 +- .../dcl.spec/dcl.type/dcl.type.elab/p4.cpp | 40 +++++ clang/test/CXX/drs/dr16xx.cpp | 8 +- clang/test/CXX/drs/dr23xx.cpp | 38 ++++- .../temp.class/temp.mem.enum/p1.cpp | 8 +- clang/test/FixIt/fixit-c++11.cpp | 6 +- clang/test/Parser/cxx-decl.cpp | 3 - clang/test/Parser/cxx0x-decl.cpp | 2 +- clang/test/SemaCXX/cxx98-compat.cpp | 3 +- clang/test/SemaCXX/enum-scoped.cpp | 10 ++ 19 files changed, 185 insertions(+), 146 deletions(-) create mode 100644 clang/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p4.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index dc2fb3b25e3a54..e12a802e2e9ede 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -159,6 +159,8 @@ Improvements to Clang's diagnostics - The ``-Wshorten-64-to-32`` diagnostic is now grouped under ``-Wimplicit-int-conversion`` instead of ``-Wconversion``. Fixes `#69444 `_. +- Clang now diagnoses friend declarations with an ``enum`` elaborated-type-specifier in language modes after C++98. + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 754733a6c5fffd..40b47c3ca92e7d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1637,10 +1637,10 @@ def err_inline_namespace_std : Error< def err_unexpected_friend : Error< "friends can only be classes or functions">; def ext_enum_friend : ExtWarn< - "befriending enumeration type %0 is a C++11 extension">, InGroup; -def warn_cxx98_compat_enum_friend : Warning< - "befriending enumeration type %0 is incompatible with C++98">, - InGroup, DefaultIgnore; + "elaborated enum specifier cannot be declared as a friend">, + InGroup>; +def note_enum_friend : Note< + "remove 'enum%select{| struct| class}0' to befriend an enum">; def ext_nonclass_type_friend : ExtWarn< "non-class friend type %0 is a C++11 extension">, InGroup; def warn_cxx98_compat_nonclass_type_friend : Warning< diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index d161147527dc34..316e8071169a3a 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -346,10 +346,7 @@ class DeclSpec { // FIXME: Attributes should be included here. }; - enum FriendSpecified : bool { - No, - Yes, - }; + enum FriendSpecified : bool { No, Yes }; private: // storage-class-specifier @@ -400,7 +397,7 @@ class DeclSpec { // friend-specifier LLVM_PREFERRED_TYPE(bool) - unsigned Friend_specified : 1; + unsigned FriendSpecifiedFirst : 1; // constexpr-specifier LLVM_PREFERRED_TYPE(ConstexprSpecKind) @@ -491,7 +488,7 @@ class DeclSpec { TypeSpecPipe(false), TypeSpecSat(false), ConstrainedAuto(false), TypeQualifiers(TQ_unspecified), FS_inline_specified(false), FS_forceinline_specified(false), FS_virtual_specified(false), - FS_noreturn_specified(false), Friend_specified(false), + FS_noreturn_specified(false), FriendSpecifiedFirst(false), ConstexprSpecifier( static_cast(ConstexprSpecKind::Unspecified)), Attrs(attrFactory), writtenBS(), ObjCQualifiers(nullptr) {} @@ -818,9 +815,11 @@ class DeclSpec { const char *&PrevSpec, unsigned &DiagID); FriendSpecified isFriendSpecified() const { - return static_cast(Friend_specified); + return static_cast(FriendLoc.isValid()); } + bool isFriendSpecifiedFirst() const { return FriendSpecifiedFirst; } + SourceLocation getFriendSpecLoc() const { return FriendLoc; } bool isModulePrivateSpecified() const { return ModulePrivateLoc.isValid(); } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ed933f27f8df6b..978949a9803ac8 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -8039,9 +8039,6 @@ class Sema final { SourceLocation RParenLoc, bool Failed); void DiagnoseStaticAssertDetails(const Expr *E); - FriendDecl *CheckFriendTypeDecl(SourceLocation LocStart, - SourceLocation FriendLoc, - TypeSourceInfo *TSInfo); Decl *ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, MultiTemplateParamsArg TemplateParams); NamedDecl *ActOnFriendFunctionDecl(Scope *S, Declarator &D, diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index f1737cb8447677..47c85030f4f6c5 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -79,9 +79,9 @@ bool Parser::isCXXDeclarationStatement( getCurScope(), *II, Tok.getLocation(), SS, /*Template=*/nullptr); if (Actions.isCurrentClassName(*II, getCurScope(), &SS) || isDeductionGuide) { - if (isConstructorDeclarator(/*Unqualified=*/SS.isEmpty(), - isDeductionGuide, - DeclSpec::FriendSpecified::No)) + if (isConstructorDeclarator( + /*Unqualified=*/SS.isEmpty(), isDeductionGuide, + /*IsFriend=*/DeclSpec::FriendSpecified::No)) return true; } else if (SS.isNotEmpty()) { // If the scope is not empty, it could alternatively be something like diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp index 313f073445e8f2..aede602f1de84c 100644 --- a/clang/lib/Sema/DeclSpec.cpp +++ b/clang/lib/Sema/DeclSpec.cpp @@ -1102,18 +1102,13 @@ bool DeclSpec::setFunctionSpecNoreturn(SourceLocation Loc, bool DeclSpec::SetFriendSpec(SourceLocation Loc, const char *&PrevSpec, unsigned &DiagID) { - if (Friend_specified) { + if (isFriendSpecified()) { PrevSpec = "friend"; - // Keep the later location, so that we can later diagnose ill-formed - // declarations like 'friend class X friend;'. Per [class.friend]p3, - // 'friend' must be the first token in a friend declaration that is - // not a function declaration. - FriendLoc = Loc; DiagID = diag::warn_duplicate_declspec; return true; } - Friend_specified = true; + FriendSpecifiedFirst = isEmpty(); FriendLoc = Loc; return false; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index be23c0fffe0576..375f92e4ac573c 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -17264,6 +17264,26 @@ Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc, return true; } + if (TUK == TUK_Friend && Kind == TagTypeKind::Enum) { + // C++23 [dcl.type.elab]p4: + // If an elaborated-type-specifier appears with the friend specifier as + // an entire member-declaration, the member-declaration shall have one + // of the following forms: + // friend class-key nested-name-specifier(opt) identifier ; + // friend class-key simple-template-id ; + // friend class-key nested-name-specifier template(opt) + // simple-template-id ; + // + // Since enum is not a class-key, so declarations like "friend enum E;" + // are ill-formed. Although CWG2363 reaffirms that such declarations are + // invalid, most implementations accept so we issue a pedantic warning. + Diag(KWLoc, diag::ext_enum_friend) << FixItHint::CreateRemoval( + ScopedEnum ? SourceRange(KWLoc, ScopedEnumKWLoc) : KWLoc); + assert(ScopedEnum || !ScopedEnumUsesClassTag); + Diag(KWLoc, diag::note_enum_friend) + << (ScopedEnum + ScopedEnumUsesClassTag); + } + // Figure out the underlying type if this a enum declaration. We need to do // this early, because it's needed to detect if this is an incompatible // redeclaration. diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index ba233c9e2f35d6..79263bc3ff671d 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -17545,79 +17545,6 @@ Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc, return Decl; } -/// Perform semantic analysis of the given friend type declaration. -/// -/// \returns A friend declaration that. -FriendDecl *Sema::CheckFriendTypeDecl(SourceLocation LocStart, - SourceLocation FriendLoc, - TypeSourceInfo *TSInfo) { - assert(TSInfo && "NULL TypeSourceInfo for friend type declaration"); - - QualType T = TSInfo->getType(); - SourceRange TypeRange = TSInfo->getTypeLoc().getSourceRange(); - - // C++03 [class.friend]p2: - // An elaborated-type-specifier shall be used in a friend declaration - // for a class.* - // - // * The class-key of the elaborated-type-specifier is required. - if (!CodeSynthesisContexts.empty()) { - // Do not complain about the form of friend template types during any kind - // of code synthesis. For template instantiation, we will have complained - // when the template was defined. - } else { - if (!T->isElaboratedTypeSpecifier()) { - // If we evaluated the type to a record type, suggest putting - // a tag in front. - if (const RecordType *RT = T->getAs()) { - RecordDecl *RD = RT->getDecl(); - - SmallString<16> InsertionText(" "); - InsertionText += RD->getKindName(); - - Diag(TypeRange.getBegin(), - getLangOpts().CPlusPlus11 ? - diag::warn_cxx98_compat_unelaborated_friend_type : - diag::ext_unelaborated_friend_type) - << (unsigned) RD->getTagKind() - << T - << FixItHint::CreateInsertion(getLocForEndOfToken(FriendLoc), - InsertionText); - } else { - Diag(FriendLoc, - getLangOpts().CPlusPlus11 ? - diag::warn_cxx98_compat_nonclass_type_friend : - diag::ext_nonclass_type_friend) - << T - << TypeRange; - } - } else if (T->getAs()) { - Diag(FriendLoc, - getLangOpts().CPlusPlus11 ? - diag::warn_cxx98_compat_enum_friend : - diag::ext_enum_friend) - << T - << TypeRange; - } - - // C++11 [class.friend]p3: - // A friend declaration that does not declare a function shall have one - // of the following forms: - // friend elaborated-type-specifier ; - // friend simple-type-specifier ; - // friend typename-specifier ; - if (getLangOpts().CPlusPlus11 && LocStart != FriendLoc) - Diag(FriendLoc, diag::err_friend_not_first_in_declaration) << T; - } - - // If the type specifier in a friend declaration designates a (possibly - // cv-qualified) class type, that class is declared as a friend; otherwise, - // the friend declaration is ignored. - return FriendDecl::Create(Context, CurContext, - TSInfo->getTypeLoc().getBeginLoc(), TSInfo, - FriendLoc); -} - /// Handle a friend tag declaration where the scope specifier was /// templated. DeclResult Sema::ActOnTemplatedFriendTag( @@ -17755,6 +17682,7 @@ DeclResult Sema::ActOnTemplatedFriendTag( Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, MultiTemplateParamsArg TempParams) { SourceLocation Loc = DS.getBeginLoc(); + SourceLocation FriendLoc = DS.getFriendSpecLoc(); assert(DS.isFriendSpecified()); assert(DS.getStorageClassSpec() == DeclSpec::SCS_unspecified); @@ -17766,9 +17694,10 @@ Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, // friend simple-type-specifier ; // friend typename-specifier ; // - // Any declaration with a type qualifier does not have that form. (It's - // legal to specify a qualified type as a friend, you just can't write the - // keywords.) + // If the friend keyword isn't first, or if the declarations has any type + // qualifiers, then the declaration doesn't have that form. + if (getLangOpts().CPlusPlus11 && !DS.isFriendSpecifiedFirst()) + Diag(FriendLoc, diag::err_friend_not_first_in_declaration); if (DS.getTypeQualifiers()) { if (DS.getTypeQualifiers() & DeclSpec::TQ_const) Diag(DS.getConstSpecLoc(), diag::err_friend_decl_spec) << "const"; @@ -17795,24 +17724,35 @@ Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, if (DiagnoseUnexpandedParameterPack(Loc, TSI, UPPC_FriendDeclaration)) return nullptr; - // This is definitely an error in C++98. It's probably meant to - // be forbidden in C++0x, too, but the specification is just - // poorly written. - // - // The problem is with declarations like the following: - // template friend A::foo; - // where deciding whether a class C is a friend or not now hinges - // on whether there exists an instantiation of A that causes - // 'foo' to equal C. There are restrictions on class-heads - // (which we declare (by fiat) elaborated friend declarations to - // be) that makes this tractable. - // - // FIXME: handle "template <> friend class A;", which - // is possibly well-formed? Who even knows? - if (TempParams.size() && !T->isElaboratedTypeSpecifier()) { - Diag(Loc, diag::err_tagless_friend_type_template) - << DS.getSourceRange(); - return nullptr; + if (!T->isElaboratedTypeSpecifier()) { + if (TempParams.size()) { + // C++23 [dcl.pre]p5: + // In a simple-declaration, the optional init-declarator-list can be + // omitted only when declaring a class or enumeration, that is, when + // the decl-specifier-seq contains either a class-specifier, an + // elaborated-type-specifier with a class-key, or an enum-specifier. + // + // The declaration of a template-declaration or explicit-specialization + // is never a member-declaration, so this must be a simple-declaration + // with no init-declarator-list. Therefore, this is ill-formed. + Diag(Loc, diag::err_tagless_friend_type_template) << DS.getSourceRange(); + return nullptr; + } else if (const RecordDecl *RD = T->getAsRecordDecl()) { + SmallString<16> InsertionText(" "); + InsertionText += RD->getKindName(); + + Diag(Loc, getLangOpts().CPlusPlus11 + ? diag::warn_cxx98_compat_unelaborated_friend_type + : diag::ext_unelaborated_friend_type) + << (unsigned)RD->getTagKind() << T + << FixItHint::CreateInsertion(getLocForEndOfToken(FriendLoc), + InsertionText); + } else { + Diag(FriendLoc, getLangOpts().CPlusPlus11 + ? diag::warn_cxx98_compat_nonclass_type_friend + : diag::ext_nonclass_type_friend) + << T << DS.getSourceRange(); + } } // C++98 [class.friend]p1: A friend of a class is a function @@ -17828,12 +17768,11 @@ Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, Decl *D; if (!TempParams.empty()) - D = FriendTemplateDecl::Create(Context, CurContext, Loc, - TempParams, - TSI, - DS.getFriendSpecLoc()); + D = FriendTemplateDecl::Create(Context, CurContext, Loc, TempParams, TSI, + FriendLoc); else - D = CheckFriendTypeDecl(Loc, DS.getFriendSpecLoc(), TSI); + D = FriendDecl::Create(Context, CurContext, TSI->getTypeLoc().getBeginLoc(), + TSI, FriendLoc); if (!D) return nullptr; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index d67b21b4449e01..9c696e072ba4a7 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1407,11 +1407,8 @@ Decl *TemplateDeclInstantiator::VisitFriendDecl(FriendDecl *D) { if (!InstTy) return nullptr; - FriendDecl *FD = SemaRef.CheckFriendTypeDecl(D->getBeginLoc(), - D->getFriendLoc(), InstTy); - if (!FD) - return nullptr; - + FriendDecl *FD = FriendDecl::Create( + SemaRef.Context, Owner, D->getLocation(), InstTy, D->getFriendLoc()); FD->setAccess(AS_public); FD->setUnsupportedFriend(D->isUnsupportedFriend()); Owner->addDecl(FD); diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p3.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p3.cpp index 19406518402ff9..8bdd490531119b 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p3.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p3.cpp @@ -16,10 +16,10 @@ class A1 { friend union A; // expected-error {{use of 'A' with tag type that does not match previous declaration}} friend enum A; // expected-error {{use of 'A' with tag type that does not match previous declaration}} - friend enum E; -#if __cplusplus <= 199711L // C++03 or earlier modes - // expected-warning@-2 {{befriending enumeration type 'enum E' is a C++11 extension}} -#endif + // expected-warning@-1 {{cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum' to befriend an enum}} + friend enum E; // expected-warning {{cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} }; template struct B { // expected-note {{previous use is here}} diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p4.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p4.cpp new file mode 100644 index 00000000000000..b516b1fe15dacc --- /dev/null +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p4.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -verify %s -std=c++11 -pedantic-errors + +enum class E; + +template +struct A { + enum class F; +}; + +struct B { + template + friend enum A::F; // expected-error {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} + + // FIXME: Per [temp.expl.spec]p19, a friend declaration cannot be an explicit specialization + template<> + friend enum A::F; // expected-error {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} + + enum class G; + + friend enum E; // expected-error {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} +}; + +template +struct C { + friend enum T::G; // expected-error {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} + friend enum A::G; // expected-error {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} +}; + +struct D { + friend enum B::G; // expected-error {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} + friend enum class B::G; // expected-error {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum class' to befriend an enum}} + // expected-error@-2 {{reference to enumeration must use 'enum' not 'enum class'}} +}; diff --git a/clang/test/CXX/drs/dr16xx.cpp b/clang/test/CXX/drs/dr16xx.cpp index 6ce77fbba7ceec..2dd7d1502e59fb 100644 --- a/clang/test/CXX/drs/dr16xx.cpp +++ b/clang/test/CXX/drs/dr16xx.cpp @@ -61,7 +61,7 @@ namespace dr1631 { // dr1631: 3.7 void f(B, int); // TODO: expected- note {{candidate function}} void f(int, A); // #dr1631-f void f(int, A, int = 0); // #dr1631-f-int - + void test() { f({0}, {{1}}); // since-cxx11-error@-1 {{call to 'f' is ambiguous}} @@ -107,6 +107,8 @@ namespace dr1638 { // dr1638: 3.1 struct B { friend enum class A::E; // since-cxx11-error@-1 {{reference to enumeration must use 'enum' not 'enum class'}} + // since-cxx11-error@-2 {{elaborated enum specifier cannot be declared as a friend}} + // since-cxx11-note@-3 {{remove 'enum class' to befriend an enum}} }; #endif } @@ -179,7 +181,7 @@ namespace dr1658 { // dr1658: 5 // In all other cases, we are not so lucky. struct E : A { E(); virtual void foo() = 0; }; // #dr1658-E1 E::E() = default; // #dr1658-E1-ctor - // cxx98-error@-1 {{defaulted function definitions are a C++11 extension}} + // cxx98-error@-1 {{defaulted function definitions are a C++11 extension}} // cxx98-error@-2 {{base class 'A' has private default constructor}} // cxx98-note@-3 {{in defaulted default constructor for 'dr1658::DefCtor::E' first required here}} // cxx98-note@#dr1658-A1 {{implicitly declared private here}} @@ -188,7 +190,7 @@ namespace dr1658 { // dr1658: 5 struct F : virtual A { F(); }; // #dr1658-F1 F::F() = default; // #dr1658-F1-ctor // cxx98-error@-1 {{defaulted function definitions are a C++11 extension}} - // cxx98-error@-2 {{inherited virtual base class 'A' has private default constructor}} + // cxx98-error@-2 {{inherited virtual base class 'A' has private default constructor}} // cxx98-note@-3 {{in defaulted default constructor for 'dr1658::DefCtor::F' first required here}} // cxx98-note@#dr1658-A1 {{implicitly declared private here}} // since-cxx11-error@#dr1658-F1-ctor {{defaulting this default constructor would delete it after its first declaration}} diff --git a/clang/test/CXX/drs/dr23xx.cpp b/clang/test/CXX/drs/dr23xx.cpp index c0463730b6a23a..38c6f8a915600d 100644 --- a/clang/test/CXX/drs/dr23xx.cpp +++ b/clang/test/CXX/drs/dr23xx.cpp @@ -261,9 +261,9 @@ namespace dr2396 { // dr2396: no // FIXME: per P1787 "Calling a conversion function" example, all of the // examples below are well-formed, with B resolving to A::B, but currently - // it's been resolved to dr2396::B. + // it's been resolved to dr2396::B. - // void f(A a) { a.operator B B::*(); } + // void f(A a) { a.operator B B::*(); } // void g(A a) { a.operator decltype(B()) B::*(); } // void g2(A a) { a.operator B decltype(B())::*(); } } @@ -277,4 +277,38 @@ namespace dr2397 { // dr2397: 17 auto (*c)[5] = &a; } } // namespace dr2397 + +// CWG2363 was closed as NAD, but its resolution does affirm that +// a friend declaration cannot have an opaque-enumm-specifier. +namespace dr2363 { // dr2363: yes + +enum class E0; +enum E1 : int; + +struct A { + friend enum class E0; + // since-cxx11-error@-1 {{reference to enumeration must use 'enum' not 'enum class'}} + // expected-error@-2 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-3 {{remove 'enum class' to befriend an enum}} + + friend enum E0; + // expected-error@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum' to befriend an enum}} + + friend enum class E1; + // since-cxx11-error@-1 {{reference to enumeration must use 'enum' not 'enum class'}} + // expected-error@-2 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-3 {{remove 'enum class' to befriend an enum}} + + friend enum E1; + // expected-error@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum' to befriend an enum}} + + friend enum class E2; + // since-cxx11-error@-1 {{reference to enumeration must use 'enum' not 'enum class'}} + // expected-error@-2 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-3 {{remove 'enum class' to befriend an enum}} +}; +} // namespace dr2363 + #endif diff --git a/clang/test/CXX/temp/temp.decls/temp.class/temp.mem.enum/p1.cpp b/clang/test/CXX/temp/temp.decls/temp.class/temp.mem.enum/p1.cpp index 2884be146c7c34..e5807993a7a18f 100644 --- a/clang/test/CXX/temp/temp.decls/temp.class/temp.mem.enum/p1.cpp +++ b/clang/test/CXX/temp/temp.decls/temp.class/temp.mem.enum/p1.cpp @@ -101,10 +101,14 @@ template<> enum class D::E; struct F { // Per C++11 [class.friend]p3, these friend declarations have no effect. // Only classes and functions can be friends. - template friend enum D::E; - template<> friend enum D::E; + template friend enum D::E; // expected-warning {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} + template<> friend enum D::E; // expected-warning {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} template<> friend enum D::E { e3 }; // expected-error {{cannot define a type in a friend declaration}} + // expected-warning@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum' to befriend an enum}} private: static const int n = 1; // expected-note {{private here}} diff --git a/clang/test/FixIt/fixit-c++11.cpp b/clang/test/FixIt/fixit-c++11.cpp index a5a47b7c937bab..10f4a9d0554ccc 100644 --- a/clang/test/FixIt/fixit-c++11.cpp +++ b/clang/test/FixIt/fixit-c++11.cpp @@ -44,11 +44,13 @@ namespace ScopedEnum { enum class E b = E::a; // expected-error {{must use 'enum' not 'enum class'}} struct S { friend enum class E; // expected-error {{must use 'enum' not 'enum class'}} + // expected-warning@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum class' to befriend an enum}} }; } -struct S2 { - void f(int i); +struct S2 { + void f(int i); void g(int i); }; diff --git a/clang/test/Parser/cxx-decl.cpp b/clang/test/Parser/cxx-decl.cpp index 8a6e6546cd3ed9..4c4bb87b1b9531 100644 --- a/clang/test/Parser/cxx-decl.cpp +++ b/clang/test/Parser/cxx-decl.cpp @@ -252,9 +252,6 @@ namespace DuplicateFriend { struct A { friend void friend f(); // expected-warning {{duplicate 'friend' declaration specifier}} friend struct B friend; // expected-warning {{duplicate 'friend' declaration specifier}} -#if __cplusplus >= 201103L - // expected-error@-2 {{'friend' must appear first in a non-function declaration}} -#endif }; } diff --git a/clang/test/Parser/cxx0x-decl.cpp b/clang/test/Parser/cxx0x-decl.cpp index 18095a4d989dda..a0b3266c738ff5 100644 --- a/clang/test/Parser/cxx0x-decl.cpp +++ b/clang/test/Parser/cxx0x-decl.cpp @@ -157,7 +157,7 @@ namespace DuplicateSpecifier { struct A { friend constexpr int constexpr friend f(); // expected-warning {{duplicate 'friend' declaration specifier}} \ // expected-error {{duplicate 'constexpr' declaration specifier}} - friend struct A friend; // expected-warning {{duplicate 'friend'}} expected-error {{'friend' must appear first}} + friend struct A friend; // expected-warning {{duplicate 'friend'}} }; constinit constexpr int n1 = 0; // expected-error {{cannot combine with previous 'constinit'}} diff --git a/clang/test/SemaCXX/cxx98-compat.cpp b/clang/test/SemaCXX/cxx98-compat.cpp index d26e3a1e684d54..b31bee672bbe32 100644 --- a/clang/test/SemaCXX/cxx98-compat.cpp +++ b/clang/test/SemaCXX/cxx98-compat.cpp @@ -220,7 +220,8 @@ struct HasExplicitConversion { struct Struct {}; enum Enum { enum_val = 0 }; struct BadFriends { - friend enum ::Enum; // expected-warning {{befriending enumeration type 'enum ::Enum' is incompatible with C++98}} + friend enum ::Enum; // expected-warning {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-1 {{remove 'enum' to befriend an enum}} friend int; // expected-warning {{non-class friend type 'int' is incompatible with C++98}} friend Struct; // expected-warning {{befriending 'Struct' without 'struct' keyword is incompatible with C++98}} }; diff --git a/clang/test/SemaCXX/enum-scoped.cpp b/clang/test/SemaCXX/enum-scoped.cpp index a4da0607d74ae5..b1d9a215c437c7 100644 --- a/clang/test/SemaCXX/enum-scoped.cpp +++ b/clang/test/SemaCXX/enum-scoped.cpp @@ -174,11 +174,21 @@ namespace N2764 { struct S { friend enum class E; // expected-error {{reference to enumeration must use 'enum' not 'enum class'}} + // expected-warning@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum class' to befriend an enum}} friend enum class F; // expected-error {{reference to enumeration must use 'enum' not 'enum class'}} + // expected-warning@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum class' to befriend an enum}} friend enum G {}; // expected-error {{forward reference}} expected-error {{cannot define a type in a friend declaration}} + // expected-warning@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum' to befriend an enum}} friend enum class H {}; // expected-error {{forward reference}} expected-error {{cannot define a type in a friend declaration}} + // expected-warning@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum' to befriend an enum}} friend enum I : int {}; // expected-error {{forward reference}} expected-error {{cannot define a type in a friend declaration}} + // expected-warning@-1 {{elaborated enum specifier cannot be declared as a friend}} + // expected-note@-2 {{remove 'enum' to befriend an enum}} enum A : int; A a; From 27726920e0366c99f5c940d304ea442515738974 Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Tue, 13 Feb 2024 19:29:23 +0000 Subject: [PATCH 048/240] [Flang][OpenMP] NFC: Code-format changes --- flang/lib/Lower/OpenMP.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index f7f80ca9c62ee0..24f91765cb439b 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -3353,11 +3353,12 @@ genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, } static llvm::SmallVector -genLoopAndReductionVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, - const llvm::SmallVector &loopArgs, - const llvm::SmallVector &reductionArgs, - llvm::SmallVector &reductionTypes) { +genLoopAndReductionVars( + mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + const llvm::SmallVector &loopArgs, + const llvm::SmallVector &reductionArgs, + llvm::SmallVector &reductionTypes) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); llvm::SmallVector blockArgTypes; @@ -3372,7 +3373,7 @@ genLoopAndReductionVars(mlir::Operation *op, Fortran::lower::AbstractConverter & loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(), - loopVarType); + loopVarType); std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc); } if (reductionArgs.size()) { @@ -3386,12 +3387,12 @@ genLoopAndReductionVars(mlir::Operation *op, Fortran::lower::AbstractConverter & if (loopArgs.size()) { mlir::Operation *storeOp = nullptr; for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) { - mlir::Value indexVal = + mlir::Value indexVal = fir::getBase(op->getRegion(0).front().getArgument(argIndex)); storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } - firOpBuilder.setInsertionPointAfter(storeOp); + firOpBuilder.setInsertionPointAfter(storeOp); } // Bind the reduction arguments to their block arguments for (auto [arg, prv] : llvm::zip_equal( @@ -3543,14 +3544,15 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter, [](mlir::Value v) { return v.getType(); }); auto ivCallback = [&](mlir::Operation *op) { - return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols, reductionTypes); + return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols, + reductionTypes); }; createBodyOfOp( wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) .setClauses(&beginClauseList) .setDataSharingProcessor(&dsp) - .setReductions(&reductionSymbols, &reductionTypes) + .setReductions(&reductionSymbols, &reductionTypes) .setGenRegionEntryCb(ivCallback)); } From f5cc961240b327cac83041f04ea339859e7d5c9c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 13 Feb 2024 11:41:31 -0800 Subject: [PATCH 049/240] [mlir] Fix a warning This patch fixes: mlir/lib/Target/LLVMIR/AttrKindDetail.h:65:1: error: unused function 'getAttrNameToKindMapping' [-Werror,-Wunused-function] --- mlir/lib/Target/LLVMIR/AttrKindDetail.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Target/LLVMIR/AttrKindDetail.h b/mlir/lib/Target/LLVMIR/AttrKindDetail.h index b01858ea814380..ddc6d46b90bb2b 100644 --- a/mlir/lib/Target/LLVMIR/AttrKindDetail.h +++ b/mlir/lib/Target/LLVMIR/AttrKindDetail.h @@ -61,7 +61,8 @@ getAttrKindToNameMapping() { /// Returns a dense map from LLVM attribute name to their kind in LLVM IR /// dialect. -static llvm::DenseMap +[[maybe_unused]] static llvm::DenseMap getAttrNameToKindMapping() { static auto attrNameToKindMapping = []() { llvm::DenseMap nameKindMap; From ec0aa1646e9953d1a8d0d15dc381d3250c854572 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 13 Feb 2024 11:44:04 -0800 Subject: [PATCH 050/240] [SeparateConstOffsetFromGEP] Fix test after 1b65742f8c71f576381fe85d5e34579b24f2d874 Change-Id: I7ced7774c80997d21969ab7886fc30c0c1e1cc81 --- .../NVPTX/lower-gep-reorder.ll | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll index c46f4e79ba432a..516f395e061180 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll @@ -6,22 +6,19 @@ define protected amdgpu_kernel void @sink_addr(ptr %in.ptr, i64 %in.idx0, i64 %i ; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i64 256 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i64 512 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 768 +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] ; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 ; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] ; CHECK: bb.1: ; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 -; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[TMP2]], align 16 -; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[TMP5]], align 16 -; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 ; CHECK-NEXT: call void asm sideeffect " ; CHECK-NEXT: call void asm sideeffect " ; CHECK-NEXT: call void asm sideeffect " From ac97562c99c3ae97f063048ccaf08ebdae60ac30 Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Tue, 13 Feb 2024 15:11:24 -0500 Subject: [PATCH 051/240] [OpenMP][AIX]Define struct kmp_base_tas_lock with the order of two members swapped for big-endian (#79188) The direct lock data structure has bit `0` (the least significant bit) of the first 32-bit word set to `1` to indicate it is a direct lock. On the other hand, the first word (in 32-bit mode) or first two words (in 64-bit mode) of an indirect lock are the address of the entry allocated from the indirect lock table. The runtime checks bit `0` of the first 32-bit word to tell if this is a direct or an indirect lock. This works fine for 32-bit and 64-bit little-endian because its memory layout of a 64-bit address is (`low word`, `high word`). However, this causes problems for big-endian where the memory layout of a 64-bit address is (`high word`, `low word`). If an address of the indirect lock table entry is something like `0x110035300`, i.e., (`0x1`, `0x10035300`), it is treated as a direct lock. This patch defines `struct kmp_base_tas_lock` with the ordering of the two 32-bit members flipped for big-endian PPC64 so that when checking/setting tags in member `poll`, the second word (the low word) is used. This patch also changes places where `poll` is not already explicitly specified for checking/setting tags. --- openmp/runtime/src/kmp_csupport.cpp | 5 +++-- openmp/runtime/src/kmp_gsupport.cpp | 2 +- openmp/runtime/src/kmp_lock.cpp | 6 +++--- openmp/runtime/src/kmp_lock.h | 17 +++++++++++++---- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 9eeaeb88fb9ec7..878e78b5c7ad2d 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -1533,8 +1533,9 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); if (*lk == 0) { if (KMP_IS_D_LOCK(lockseq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, - KMP_GET_D_TAG(lockseq)); + KMP_COMPARE_AND_STORE_ACQ32( + (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0, + KMP_GET_D_TAG(lockseq)); } else { __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); } diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index 88189659a23416..4dc8a90f83b4ea 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -144,7 +144,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) { // Mutual exclusion -// The symbol that icc/ifort generates for unnamed for unnamed critical sections +// The symbol that icc/ifort generates for unnamed critical sections // - .gomp_critical_user_ - is defined using .comm in any objects reference it. // We can't reference it directly here in C code, as the symbol contains a ".". // diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 85c54f4cdc7e96..0ad14f862bcb9b 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -2689,7 +2689,7 @@ void __kmp_spin_backoff(kmp_backoff_t *boff) { // lock word. static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { - TCW_4(*lck, KMP_GET_D_TAG(seq)); + TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq)); KA_TRACE( 20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); @@ -3180,8 +3180,8 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, lck->type = tag; if (OMP_LOCK_T_SIZE < sizeof(void *)) { - *((kmp_lock_index_t *)user_lock) = idx - << 1; // indirect lock word must be even + *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) = + idx << 1; // indirect lock word must be even } else { *((kmp_indirect_lock_t **)user_lock) = lck; } diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index f21179b4eb68a1..e2a0cda01a9718 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -50,7 +50,7 @@ typedef struct ident ident_t; // recent versions), but we are bounded by the pointer-sized chunks that // the Intel compiler allocates. -#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) +#if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_GOMP_COMPAT) #define OMP_LOCK_T_SIZE sizeof(int) #define OMP_NEST_LOCK_T_SIZE sizeof(void *) #else @@ -120,8 +120,15 @@ extern void __kmp_validate_locks(void); struct kmp_base_tas_lock { // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__ + // Flip the ordering of the high and low 32-bit member to be consistent + // with the memory layout of the address in 64-bit big-endian. + kmp_int32 depth_locked; // depth locked, for nested locks only + std::atomic poll; +#else std::atomic poll; kmp_int32 depth_locked; // depth locked, for nested locks only +#endif }; typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; @@ -1138,11 +1145,13 @@ extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32); // Extracts direct lock tag from a user lock pointer #define KMP_EXTRACT_D_TAG(l) \ - (*((kmp_dyna_lock_t *)(l)) & ((1 << KMP_LOCK_SHIFT) - 1) & \ - -(*((kmp_dyna_lock_t *)(l)) & 1)) + ((kmp_dyna_lock_t)((kmp_base_tas_lock_t *)(l))->poll & \ + ((1 << KMP_LOCK_SHIFT) - 1) & \ + -((kmp_dyna_lock_t)((kmp_tas_lock_t *)(l))->lk.poll & 1)) // Extracts indirect lock index from a user lock pointer -#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) +#define KMP_EXTRACT_I_INDEX(l) \ + ((kmp_lock_index_t)((kmp_base_tas_lock_t *)(l))->poll >> 1) // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t // *) and op (operation type). From c1a99b2c77499176362f24f09a39850867122ea4 Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Tue, 13 Feb 2024 15:40:51 -0500 Subject: [PATCH 052/240] [Sparc] limit MaxAtomicSizeInBitsSupported to 32 for 32-bit Sparc. (#81655) When in 32-bit mode, the backend doesn't currently implement 64-bit atomics, even though the hardware is capable if you have specified a V9 CPU. Thus, limit the width to 32-bit, for now, leaving behind a TODO. This fixes a regression triggered by PR #73176. --- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 11 +- llvm/test/CodeGen/SPARC/64atomics.ll | 54 ++++---- .../CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll | 120 +++++++++--------- 3 files changed, 96 insertions(+), 89 deletions(-) diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index bdefb0841a124b..13184a1eb0b101 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -1764,9 +1764,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, // Atomics are supported on SparcV9. 32-bit atomics are also // supported by some Leon SparcV8 variants. Otherwise, atomics // are unsupported. - if (Subtarget->isV9()) - setMaxAtomicSizeInBitsSupported(64); - else if (Subtarget->hasLeonCasa()) + if (Subtarget->isV9()) { + // TODO: we _ought_ to be able to support 64-bit atomics on 32-bit sparcv9, + // but it hasn't been implemented in the backend yet. + if (Subtarget->is64Bit()) + setMaxAtomicSizeInBitsSupported(64); + else + setMaxAtomicSizeInBitsSupported(32); + } else if (Subtarget->hasLeonCasa()) setMaxAtomicSizeInBitsSupported(32); else setMaxAtomicSizeInBitsSupported(0); diff --git a/llvm/test/CodeGen/SPARC/64atomics.ll b/llvm/test/CodeGen/SPARC/64atomics.ll index 2c00f955f497b1..feb37fdae52b03 100644 --- a/llvm/test/CodeGen/SPARC/64atomics.ll +++ b/llvm/test/CodeGen/SPARC/64atomics.ll @@ -1,12 +1,14 @@ -; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC,SPARC32 +; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC,SPARC64 -; CHECK-LABEL: test_atomic_i64 -; CHECK: ldx [%o0] -; CHECK: membar -; CHECK: ldx [%o1] -; CHECK: membar -; CHECK: membar -; CHECK: stx {{.+}}, [%o2] +; SPARC-LABEL: test_atomic_i64 +; SPARC32: __atomic_load_8 +; SPARC64: ldx [%o0] +; SPARC64: membar +; SPARC64: ldx [%o1] +; SPARC64: membar +; SPARC64: membar +; SPARC64: stx {{.+}}, [%o2] define i64 @test_atomic_i64(ptr %ptr1, ptr %ptr2, ptr %ptr3) { entry: %0 = load atomic i64, ptr %ptr1 acquire, align 8 @@ -16,9 +18,10 @@ entry: ret i64 %2 } -; CHECK-LABEL: test_cmpxchg_i64 -; CHECK: mov 123, [[R:%[gilo][0-7]]] -; CHECK: casx [%o1], %o0, [[R]] +; SPARC-LABEL: test_cmpxchg_i64 +; SPARC32: __atomic_compare_exchange_8 +; SPARC64: mov 123, [[R:%[gilo][0-7]]] +; SPARC64: casx [%o1], %o0, [[R]] define i64 @test_cmpxchg_i64(i64 %a, ptr %ptr) { entry: @@ -27,8 +30,9 @@ entry: ret i64 %b } -; CHECK-LABEL: test_swap_i64 -; CHECK: casx [%o1], +; SPARC-LABEL: test_swap_i64 +; SPARC32: __atomic_exchange_8 +; SPARC64: casx [%o1], define i64 @test_swap_i64(i64 %a, ptr %ptr) { entry: @@ -36,23 +40,25 @@ entry: ret i64 %b } -; CHECK-LABEL: test_load_sub_64 -; CHECK: membar -; CHECK: sub -; CHECK: casx [%o0] -; CHECK: membar +; SPARC-LABEL: test_load_sub_64 +; SPARC32: __atomic_fetch_sub_8 +; SPARC64: membar +; SPARC64: sub +; SPARC64: casx [%o0] +; SPARC64: membar define zeroext i64 @test_load_sub_64(ptr %p, i64 zeroext %v) { entry: %0 = atomicrmw sub ptr %p, i64 %v seq_cst ret i64 %0 } -; CHECK-LABEL: test_load_max_64 -; CHECK: membar -; CHECK: cmp -; CHECK: movg %xcc -; CHECK: casx [%o0] -; CHECK: membar +; SPARC-LABEL: test_load_max_64 +; SPARC32: __atomic_compare_exchange_8 +; SPARC64: membar +; SPARC64: cmp +; SPARC64: movg %xcc +; SPARC64: casx [%o0] +; SPARC64: membar define zeroext i64 @test_load_max_64(ptr %p, i64 zeroext %v) { entry: %0 = atomicrmw max ptr %p, i64 %v seq_cst diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll index 9b49035c460407..0f9feeb17716af 100644 --- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll @@ -117,43 +117,41 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i64: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: save %sp, -96, %sp +; CHECK-NEXT: save %sp, -104, %sp ; CHECK-NEXT: .cfi_def_cfa_register %fp ; CHECK-NEXT: .cfi_window_save ; CHECK-NEXT: .cfi_register %o7, %i7 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore -; CHECK-NEXT: ldd [%i0], %i4 +; CHECK-NEXT: ldd [%i0], %g2 +; CHECK-NEXT: add %fp, -8, %i3 +; CHECK-NEXT: mov 5, %i4 ; CHECK-NEXT: .LBB3_1: ! %atomicrmw.start ; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mov %g0, %i3 -; CHECK-NEXT: mov %g0, %g2 -; CHECK-NEXT: addcc %i5, 1, %o4 -; CHECK-NEXT: addxcc %i4, 0, %o3 -; CHECK-NEXT: cmp %i4, %i1 -; CHECK-NEXT: movcc %icc, 1, %i3 -; CHECK-NEXT: cmp %i5, %i2 -; CHECK-NEXT: movcc %icc, 1, %g2 -; CHECK-NEXT: cmp %i4, %i1 -; CHECK-NEXT: move %icc, %g2, %i3 -; CHECK-NEXT: cmp %i3, 0 +; CHECK-NEXT: mov %g0, %i5 +; CHECK-NEXT: mov %g0, %g4 +; CHECK-NEXT: addcc %g3, 1, %o3 +; CHECK-NEXT: addxcc %g2, 0, %o2 +; CHECK-NEXT: cmp %g2, %i1 +; CHECK-NEXT: movcc %icc, 1, %i5 +; CHECK-NEXT: cmp %g3, %i2 +; CHECK-NEXT: movcc %icc, 1, %g4 +; CHECK-NEXT: cmp %g2, %i1 +; CHECK-NEXT: move %icc, %g4, %i5 +; CHECK-NEXT: cmp %i5, 0 +; CHECK-NEXT: movne %icc, 0, %o2 ; CHECK-NEXT: movne %icc, 0, %o3 -; CHECK-NEXT: movne %icc, 0, %o4 +; CHECK-NEXT: std %g2, [%fp+-8] ; CHECK-NEXT: mov %i0, %o0 -; CHECK-NEXT: mov %i4, %o1 -; CHECK-NEXT: call __sync_val_compare_and_swap_8 -; CHECK-NEXT: mov %i5, %o2 -; CHECK-NEXT: xor %o0, %i4, %i3 -; CHECK-NEXT: xor %o1, %i5, %i4 -; CHECK-NEXT: or %i4, %i3, %i3 -; CHECK-NEXT: mov %o1, %i5 -; CHECK-NEXT: cmp %i3, 0 -; CHECK-NEXT: bne %icc, .LBB3_1 -; CHECK-NEXT: mov %o0, %i4 +; CHECK-NEXT: mov %i3, %o1 +; CHECK-NEXT: mov %i4, %o4 +; CHECK-NEXT: call __atomic_compare_exchange_8 +; CHECK-NEXT: mov %i4, %o5 +; CHECK-NEXT: cmp %o0, 0 +; CHECK-NEXT: be %icc, .LBB3_1 +; CHECK-NEXT: ldd [%fp+-8], %g2 ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore -; CHECK-NEXT: mov %i4, %i0 +; CHECK-NEXT: mov %g2, %i0 ; CHECK-NEXT: ret -; CHECK-NEXT: restore %g0, %i5, %o1 +; CHECK-NEXT: restore %g0, %g3, %o1 %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst ret i64 %result } @@ -280,48 +278,46 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i64: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: save %sp, -96, %sp +; CHECK-NEXT: save %sp, -104, %sp ; CHECK-NEXT: .cfi_def_cfa_register %fp ; CHECK-NEXT: .cfi_window_save ; CHECK-NEXT: .cfi_register %o7, %i7 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore -; CHECK-NEXT: ldd [%i0], %i4 +; CHECK-NEXT: ldd [%i0], %g2 +; CHECK-NEXT: add %fp, -8, %i3 +; CHECK-NEXT: mov 5, %i4 ; CHECK-NEXT: .LBB7_1: ! %atomicrmw.start ; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mov %g0, %i3 -; CHECK-NEXT: mov %g0, %g2 -; CHECK-NEXT: mov %g0, %g3 -; CHECK-NEXT: addcc %i5, -1, %o4 -; CHECK-NEXT: addxcc %i4, -1, %o3 -; CHECK-NEXT: or %i5, %i4, %g4 -; CHECK-NEXT: cmp %g4, 0 -; CHECK-NEXT: move %icc, 1, %i3 -; CHECK-NEXT: cmp %i4, %i1 -; CHECK-NEXT: movgu %icc, 1, %g2 -; CHECK-NEXT: cmp %i5, %i2 -; CHECK-NEXT: movgu %icc, 1, %g3 -; CHECK-NEXT: cmp %i4, %i1 -; CHECK-NEXT: move %icc, %g3, %g2 -; CHECK-NEXT: or %i3, %g2, %i3 -; CHECK-NEXT: cmp %i3, 0 -; CHECK-NEXT: movne %icc, %i1, %o3 -; CHECK-NEXT: movne %icc, %i2, %o4 +; CHECK-NEXT: mov %g0, %i5 +; CHECK-NEXT: mov %g0, %g4 +; CHECK-NEXT: mov %g0, %l0 +; CHECK-NEXT: addcc %g3, -1, %o3 +; CHECK-NEXT: addxcc %g2, -1, %o2 +; CHECK-NEXT: or %g3, %g2, %l1 +; CHECK-NEXT: cmp %l1, 0 +; CHECK-NEXT: move %icc, 1, %i5 +; CHECK-NEXT: cmp %g2, %i1 +; CHECK-NEXT: movgu %icc, 1, %g4 +; CHECK-NEXT: cmp %g3, %i2 +; CHECK-NEXT: movgu %icc, 1, %l0 +; CHECK-NEXT: cmp %g2, %i1 +; CHECK-NEXT: move %icc, %l0, %g4 +; CHECK-NEXT: or %i5, %g4, %i5 +; CHECK-NEXT: cmp %i5, 0 +; CHECK-NEXT: movne %icc, %i1, %o2 +; CHECK-NEXT: movne %icc, %i2, %o3 +; CHECK-NEXT: std %g2, [%fp+-8] ; CHECK-NEXT: mov %i0, %o0 -; CHECK-NEXT: mov %i4, %o1 -; CHECK-NEXT: call __sync_val_compare_and_swap_8 -; CHECK-NEXT: mov %i5, %o2 -; CHECK-NEXT: xor %o0, %i4, %i3 -; CHECK-NEXT: xor %o1, %i5, %i4 -; CHECK-NEXT: or %i4, %i3, %i3 -; CHECK-NEXT: mov %o1, %i5 -; CHECK-NEXT: cmp %i3, 0 -; CHECK-NEXT: bne %icc, .LBB7_1 -; CHECK-NEXT: mov %o0, %i4 +; CHECK-NEXT: mov %i3, %o1 +; CHECK-NEXT: mov %i4, %o4 +; CHECK-NEXT: call __atomic_compare_exchange_8 +; CHECK-NEXT: mov %i4, %o5 +; CHECK-NEXT: cmp %o0, 0 +; CHECK-NEXT: be %icc, .LBB7_1 +; CHECK-NEXT: ldd [%fp+-8], %g2 ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore -; CHECK-NEXT: mov %i4, %i0 +; CHECK-NEXT: mov %g2, %i0 ; CHECK-NEXT: ret -; CHECK-NEXT: restore %g0, %i5, %o1 +; CHECK-NEXT: restore %g0, %g3, %o1 %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst ret i64 %result } From d0a1bf8b306afa565951c65b662713882a0d2481 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 13 Feb 2024 12:49:15 -0800 Subject: [PATCH 053/240] [TypePromotion] Remove an unreachable 'return false'. NFC The if and the else above this both return so this is unreachable. Delete it and remove the else after return. --- llvm/lib/CodeGen/TypePromotion.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 7a3bc6c2043f4c..48ad8de778010e 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -359,22 +359,21 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) { if (!OverflowConst.isNonPositive()) return false; + SafeWrap.insert(I); + // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that: // zext(x) + sext(C1) s C2 // zext(x) + sext(C1) Date: Tue, 13 Feb 2024 16:04:21 -0500 Subject: [PATCH 054/240] [libc] Allow BigInt class to use base word types other than uint64_t. (#81634) This will allow DyadicFloat class to replace NormalFloat class. --- libc/src/__support/FPUtil/dyadic_float.h | 6 +- libc/src/__support/UInt.h | 761 ++++++++++++----------- libc/src/__support/float_to_string.h | 22 +- libc/src/__support/integer_utils.h | 46 +- libc/test/src/__support/uint_test.cpp | 48 +- 5 files changed, 493 insertions(+), 390 deletions(-) diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index 888d7ffec241ea..a8b3ad7a16d3bb 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -216,7 +216,7 @@ constexpr DyadicFloat quick_add(DyadicFloat a, if (result.mantissa.add(b.mantissa)) { // Mantissa addition overflow. result.shift_right(1); - result.mantissa.val[DyadicFloat::MantissaType::WORDCOUNT - 1] |= + result.mantissa.val[DyadicFloat::MantissaType::WORD_COUNT - 1] |= (uint64_t(1) << 63); } // Result is already normalized. @@ -243,7 +243,7 @@ constexpr DyadicFloat quick_add(DyadicFloat a, // result.mantissa = quick_mul_hi(a.mantissa + b.mantissa) // ~ (full product a.mantissa * b.mantissa) >> Bits. // The errors compared to the mathematical product is bounded by: -// 2 * errors of quick_mul_hi = 2 * (UInt::WORDCOUNT - 1) in ULPs. +// 2 * errors of quick_mul_hi = 2 * (UInt::WORD_COUNT - 1) in ULPs. // Assume inputs are normalized (by constructors or other functions) so that we // don't need to normalize the inputs again in this function. If the inputs are // not normalized, the results might lose precision significantly. @@ -258,7 +258,7 @@ constexpr DyadicFloat quick_mul(DyadicFloat a, result.mantissa = a.mantissa.quick_mul_hi(b.mantissa); // Check the leading bit directly, should be faster than using clz in // normalize(). - if (result.mantissa.val[DyadicFloat::MantissaType::WORDCOUNT - 1] >> + if (result.mantissa.val[DyadicFloat::MantissaType::WORD_COUNT - 1] >> 63 == 0) result.shift_left(1); diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index 7726b6d88f0d21..0828a34ba1a934 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -25,88 +25,104 @@ namespace LIBC_NAMESPACE::cpp { -template struct BigInt { +namespace internal { +template struct half_width; + +template <> struct half_width : type_identity {}; +template <> struct half_width : type_identity {}; +template <> struct half_width : type_identity {}; +#ifdef __SIZEOF_INT128__ +template <> struct half_width<__uint128_t> : type_identity {}; +#endif // __SIZEOF_INT128__ - // This being hardcoded as 64 is okay because we're using uint64_t as our - // internal type which will always be 64 bits. - using word_type = uint64_t; - LIBC_INLINE_VAR static constexpr size_t WORD_SIZE = - sizeof(word_type) * CHAR_BIT; +template using half_width_t = typename half_width::type; +} // namespace internal - // TODO: Replace references to 64 with WORD_SIZE, and uint64_t with word_type. - static_assert(Bits > 0 && Bits % 64 == 0, - "Number of bits in BigInt should be a multiple of 64."); - LIBC_INLINE_VAR static constexpr size_t WORDCOUNT = Bits / 64; - cpp::array val{}; +template +struct BigInt { + static_assert(is_integral_v && is_unsigned_v, + "WordType must be unsigned integer."); - LIBC_INLINE_VAR static constexpr uint64_t MASK32 = 0xFFFFFFFFu; + LIBC_INLINE_VAR + static constexpr size_t WORD_SIZE = sizeof(WordType) * CHAR_BIT; - LIBC_INLINE static constexpr uint64_t low(uint64_t v) { return v & MASK32; } - LIBC_INLINE static constexpr uint64_t high(uint64_t v) { - return (v >> 32) & MASK32; - } + static_assert(Bits > 0 && Bits % WORD_SIZE == 0, + "Number of bits in BigInt should be a multiple of WORD_SIZE."); + + LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE; + cpp::array val{}; LIBC_INLINE constexpr BigInt() = default; - LIBC_INLINE constexpr BigInt(const BigInt &other) = default; + LIBC_INLINE constexpr BigInt(const BigInt &other) = + default; template - LIBC_INLINE constexpr BigInt(const BigInt &other) { + LIBC_INLINE constexpr BigInt( + const BigInt &other) { if (OtherBits >= Bits) { - for (size_t i = 0; i < WORDCOUNT; ++i) + for (size_t i = 0; i < WORD_COUNT; ++i) val[i] = other[i]; } else { size_t i = 0; for (; i < OtherBits / 64; ++i) val[i] = other[i]; - uint64_t sign = 0; + WordType sign = 0; if constexpr (Signed && OtherSigned) { - sign = static_cast( - -static_cast(other[OtherBits / 64 - 1] >> 63)); + sign = static_cast(-static_cast>( + other[OtherBits / WORD_SIZE - 1] >> (WORD_SIZE - 1))); } - for (; i < WORDCOUNT; ++i) + for (; i < WORD_COUNT; ++i) val[i] = sign; } } // Construct a BigInt from a C array. - template = 0> - LIBC_INLINE constexpr BigInt(const uint64_t (&nums)[N]) { - size_t min_wordcount = N < WORDCOUNT ? N : WORDCOUNT; + template = 0> + LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) { + size_t min_wordcount = N < WORD_COUNT ? N : WORD_COUNT; size_t i = 0; for (; i < min_wordcount; ++i) val[i] = nums[i]; // If nums doesn't completely fill val, then fill the rest with zeroes. - for (; i < WORDCOUNT; ++i) + for (; i < WORD_COUNT; ++i) val[i] = 0; } // Initialize the first word to |v| and the rest to 0. - template && sizeof(T) <= 16>> + template >> LIBC_INLINE constexpr BigInt(T v) { - val[0] = static_cast(v); + val[0] = static_cast(v); - if constexpr (Bits == 64) + if constexpr (WORD_COUNT == 1) return; - // Bits is at least 128. - size_t i = 1; - if constexpr (sizeof(T) == 16) { - val[1] = static_cast(v >> 64); - i = 2; + if constexpr (Bits < sizeof(T) * CHAR_BIT) { + for (int i = 1; i < WORD_COUNT; ++i) { + v >>= WORD_SIZE; + val[i] = static_cast(v); + } + return; } - uint64_t sign = (Signed && (v < 0)) ? 0xffff'ffff'ffff'ffff : 0; - for (; i < WORDCOUNT; ++i) { + size_t i = 1; + + if constexpr (WORD_SIZE < sizeof(T) * CHAR_BIT) + for (; i < sizeof(T) * CHAR_BIT / WORD_SIZE; ++i) { + v >>= WORD_SIZE; + val[i] = static_cast(v); + } + + WordType sign = (Signed && (v < 0)) ? ~WordType(0) : WordType(0); + for (; i < WORD_COUNT; ++i) { val[i] = sign; } } LIBC_INLINE constexpr explicit BigInt( - const cpp::array &words) { - for (size_t i = 0; i < WORDCOUNT; ++i) + const cpp::array &words) { + for (size_t i = 0; i < WORD_COUNT; ++i) val[i] = words[i]; } @@ -116,36 +132,37 @@ template struct BigInt { template LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && sizeof(T) <= 8 && !cpp::is_same_v, T> - to() const { - return static_cast(val[0]); - } - template - LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && sizeof(T) == 16, T> + cpp::is_integral_v && !cpp::is_same_v, T> to() const { - // T is 128-bit. T lo = static_cast(val[0]); - if constexpr (Bits == 64) { - if constexpr (Signed) { - // Extend sign for negative numbers. - return (val[0] >> 63) ? ((T(-1) << 64) + lo) : lo; - } else { - return lo; - } - } else { - return static_cast((static_cast(val[1]) << 64) + lo); + constexpr size_t T_BITS = sizeof(T) * CHAR_BIT; + + if constexpr (T_BITS <= WORD_SIZE) + return lo; + + constexpr size_t MAX_COUNT = + T_BITS > Bits ? WORD_COUNT : T_BITS / WORD_SIZE; + for (size_t i = 1; i < MAX_COUNT; ++i) + lo += static_cast(val[i]) << (WORD_SIZE * i); + + if constexpr (Signed && (T_BITS > Bits)) { + // Extend sign for negative numbers. + constexpr T MASK = (~T(0) << Bits); + if (val[WORD_COUNT - 1] >> (WORD_SIZE - 1)) + lo |= MASK; } + + return lo; } LIBC_INLINE constexpr explicit operator bool() const { return !is_zero(); } - LIBC_INLINE BigInt & - operator=(const BigInt &other) = default; + LIBC_INLINE BigInt & + operator=(const BigInt &other) = default; LIBC_INLINE constexpr bool is_zero() const { - for (size_t i = 0; i < WORDCOUNT; ++i) { + for (size_t i = 0; i < WORD_COUNT; ++i) { if (val[i] != 0) return false; } @@ -154,20 +171,20 @@ template struct BigInt { // Add x to this number and store the result in this number. // Returns the carry value produced by the addition operation. - LIBC_INLINE constexpr uint64_t add(const BigInt &x) { - SumCarry s{0, 0}; - for (size_t i = 0; i < WORDCOUNT; ++i) { + LIBC_INLINE constexpr WordType add(const BigInt &x) { + SumCarry s{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { s = add_with_carry_const(val[i], x.val[i], s.carry); val[i] = s.sum; } return s.carry; } - LIBC_INLINE constexpr BigInt - operator+(const BigInt &other) const { - BigInt result; - SumCarry s{0, 0}; - for (size_t i = 0; i < WORDCOUNT; ++i) { + LIBC_INLINE constexpr BigInt + operator+(const BigInt &other) const { + BigInt result; + SumCarry s{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { s = add_with_carry(val[i], other.val[i], s.carry); result.val[i] = s.sum; } @@ -176,58 +193,58 @@ template struct BigInt { // This will only apply when initializing a variable from constant values, so // it will always use the constexpr version of add_with_carry. - LIBC_INLINE constexpr BigInt - operator+(BigInt &&other) const { - BigInt result; - SumCarry s{0, 0}; - for (size_t i = 0; i < WORDCOUNT; ++i) { + LIBC_INLINE constexpr BigInt + operator+(BigInt &&other) const { + BigInt result; + SumCarry s{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { s = add_with_carry_const(val[i], other.val[i], s.carry); result.val[i] = s.sum; } return result; } - LIBC_INLINE constexpr BigInt & - operator+=(const BigInt &other) { + LIBC_INLINE constexpr BigInt & + operator+=(const BigInt &other) { add(other); // Returned carry value is ignored. return *this; } // Subtract x to this number and store the result in this number. // Returns the carry value produced by the subtraction operation. - LIBC_INLINE constexpr uint64_t sub(const BigInt &x) { - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORDCOUNT; ++i) { + LIBC_INLINE constexpr WordType sub(const BigInt &x) { + DiffBorrow d{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { d = sub_with_borrow_const(val[i], x.val[i], d.borrow); val[i] = d.diff; } return d.borrow; } - LIBC_INLINE constexpr BigInt - operator-(const BigInt &other) const { - BigInt result; - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORDCOUNT; ++i) { + LIBC_INLINE constexpr BigInt + operator-(const BigInt &other) const { + BigInt result; + DiffBorrow d{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { d = sub_with_borrow(val[i], other.val[i], d.borrow); result.val[i] = d.diff; } return result; } - LIBC_INLINE constexpr BigInt - operator-(BigInt &&other) const { - BigInt result; - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORDCOUNT; ++i) { + LIBC_INLINE constexpr BigInt + operator-(BigInt &&other) const { + BigInt result; + DiffBorrow d{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { d = sub_with_borrow_const(val[i], other.val[i], d.borrow); result.val[i] = d.diff; } return result; } - LIBC_INLINE constexpr BigInt & - operator-=(const BigInt &other) { + LIBC_INLINE constexpr BigInt & + operator-=(const BigInt &other) { // TODO(lntue): Set overflow flag / errno when carry is true. sub(other); return *this; @@ -239,12 +256,12 @@ template struct BigInt { // the operations using 64-bit numbers. This ensures that we don't lose the // carry bits. // Returns the carry value produced by the multiplication operation. - LIBC_INLINE constexpr uint64_t mul(uint64_t x) { - BigInt<128, Signed> partial_sum(0); - uint64_t carry = 0; - for (size_t i = 0; i < WORDCOUNT; ++i) { - NumberPair prod = full_mul(val[i], x); - BigInt<128, Signed> tmp({prod.lo, prod.hi}); + LIBC_INLINE constexpr WordType mul(WordType x) { + BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); + WordType carry = 0; + for (size_t i = 0; i < WORD_COUNT; ++i) { + NumberPair prod = full_mul(val[i], x); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); carry += partial_sum.add(tmp); val[i] = partial_sum.val[0]; partial_sum.val[0] = partial_sum.val[1]; @@ -254,33 +271,33 @@ template struct BigInt { return partial_sum.val[1]; } - LIBC_INLINE constexpr BigInt - operator*(const BigInt &other) const { + LIBC_INLINE constexpr BigInt + operator*(const BigInt &other) const { if constexpr (Signed) { - BigInt a(*this); - BigInt b(other); - bool a_neg = (a.val[WORDCOUNT - 1] >> 63); - bool b_neg = (b.val[WORDCOUNT - 1] >> 63); + BigInt a(*this); + BigInt b(other); + bool a_neg = (a.val[WORD_COUNT - 1] >> (WORD_SIZE - 1)); + bool b_neg = (b.val[WORD_COUNT - 1] >> (WORD_SIZE - 1)); if (a_neg) a = -a; if (b_neg) b = -b; - BigInt prod = a * b; + BigInt prod = a * b; if (a_neg != b_neg) prod = -prod; - return static_cast>(prod); + return static_cast>(prod); } else { - if constexpr (WORDCOUNT == 1) { + if constexpr (WORD_COUNT == 1) { return {val[0] * other.val[0]}; } else { - BigInt result(0); - BigInt<128, Signed> partial_sum(0); - uint64_t carry = 0; - for (size_t i = 0; i < WORDCOUNT; ++i) { + BigInt result(0); + BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); + WordType carry = 0; + for (size_t i = 0; i < WORD_COUNT; ++i) { for (size_t j = 0; j <= i; j++) { - NumberPair prod = full_mul(val[j], other.val[i - j]); - BigInt<128, Signed> tmp({prod.lo, prod.hi}); + NumberPair prod = full_mul(val[j], other.val[i - j]); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); carry += partial_sum.add(tmp); } result.val[i] = partial_sum.val[0]; @@ -295,19 +312,20 @@ template struct BigInt { // Return the full product, only unsigned for now. template - LIBC_INLINE constexpr BigInt - ful_mul(const BigInt &other) const { - BigInt result(0); - BigInt<128, Signed> partial_sum(0); - uint64_t carry = 0; - constexpr size_t OTHER_WORDCOUNT = BigInt::WORDCOUNT; - for (size_t i = 0; i <= WORDCOUNT + OTHER_WORDCOUNT - 2; ++i) { + LIBC_INLINE constexpr BigInt + ful_mul(const BigInt &other) const { + BigInt result(0); + BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); + WordType carry = 0; + constexpr size_t OTHER_WORDCOUNT = + BigInt::WORD_COUNT; + for (size_t i = 0; i <= WORD_COUNT + OTHER_WORDCOUNT - 2; ++i) { const size_t lower_idx = i < OTHER_WORDCOUNT ? 0 : i - OTHER_WORDCOUNT + 1; - const size_t upper_idx = i < WORDCOUNT ? i : WORDCOUNT - 1; + const size_t upper_idx = i < WORD_COUNT ? i : WORD_COUNT - 1; for (size_t j = lower_idx; j <= upper_idx; ++j) { - NumberPair prod = full_mul(val[j], other.val[i - j]); - BigInt<128, Signed> tmp({prod.lo, prod.hi}); + NumberPair prod = full_mul(val[j], other.val[i - j]); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); carry += partial_sum.add(tmp); } result.val[i] = partial_sum.val[0]; @@ -315,7 +333,7 @@ template struct BigInt { partial_sum.val[1] = carry; carry = 0; } - result.val[WORDCOUNT + OTHER_WORDCOUNT - 1] = partial_sum.val[0]; + result.val[WORD_COUNT + OTHER_WORDCOUNT - 1] = partial_sum.val[0]; return result; } @@ -323,7 +341,7 @@ template struct BigInt { // `Bits` least significant bits of the full product, while this function will // approximate `Bits` most significant bits of the full product with errors // bounded by: - // 0 <= (a.full_mul(b) >> Bits) - a.quick_mul_hi(b)) <= WORDCOUNT - 1. + // 0 <= (a.full_mul(b) >> Bits) - a.quick_mul_hi(b)) <= WORD_COUNT - 1. // // An example usage of this is to quickly (but less accurately) compute the // product of (normalized) mantissas of floating point numbers: @@ -335,44 +353,44 @@ template struct BigInt { // // Performance summary: // Number of 64-bit x 64-bit -> 128-bit multiplications performed. - // Bits WORDCOUNT ful_mul quick_mul_hi Error bound + // Bits WORD_COUNT ful_mul quick_mul_hi Error bound // 128 2 4 3 1 // 196 3 9 6 2 // 256 4 16 10 3 // 512 8 64 36 7 - LIBC_INLINE constexpr BigInt - quick_mul_hi(const BigInt &other) const { - BigInt result(0); - BigInt<128, Signed> partial_sum(0); - uint64_t carry = 0; - // First round of accumulation for those at WORDCOUNT - 1 in the full + LIBC_INLINE constexpr BigInt + quick_mul_hi(const BigInt &other) const { + BigInt result(0); + BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); + WordType carry = 0; + // First round of accumulation for those at WORD_COUNT - 1 in the full // product. - for (size_t i = 0; i < WORDCOUNT; ++i) { - NumberPair prod = - full_mul(val[i], other.val[WORDCOUNT - 1 - i]); - BigInt<128, Signed> tmp({prod.lo, prod.hi}); + for (size_t i = 0; i < WORD_COUNT; ++i) { + NumberPair prod = + full_mul(val[i], other.val[WORD_COUNT - 1 - i]); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); carry += partial_sum.add(tmp); } - for (size_t i = WORDCOUNT; i < 2 * WORDCOUNT - 1; ++i) { + for (size_t i = WORD_COUNT; i < 2 * WORD_COUNT - 1; ++i) { partial_sum.val[0] = partial_sum.val[1]; partial_sum.val[1] = carry; carry = 0; - for (size_t j = i - WORDCOUNT + 1; j < WORDCOUNT; ++j) { - NumberPair prod = full_mul(val[j], other.val[i - j]); - BigInt<128, Signed> tmp({prod.lo, prod.hi}); + for (size_t j = i - WORD_COUNT + 1; j < WORD_COUNT; ++j) { + NumberPair prod = full_mul(val[j], other.val[i - j]); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); carry += partial_sum.add(tmp); } - result.val[i - WORDCOUNT] = partial_sum.val[0]; + result.val[i - WORD_COUNT] = partial_sum.val[0]; } - result.val[WORDCOUNT - 1] = partial_sum.val[1]; + result.val[WORD_COUNT - 1] = partial_sum.val[1]; return result; } // pow takes a power and sets this to its starting value to that power. Zero // to the zeroth power returns 1. LIBC_INLINE constexpr void pow_n(uint64_t power) { - BigInt result = 1; - BigInt cur_power = *this; + BigInt result = 1; + BigInt cur_power = *this; while (power > 0) { if ((power % 2) > 0) { @@ -388,12 +406,12 @@ template struct BigInt { // div takes another BigInt of the same size and divides this by it. The value // of this will be set to the quotient, and the return value is the remainder. - LIBC_INLINE constexpr optional> - div(const BigInt &other) { - BigInt remainder(0); + LIBC_INLINE constexpr optional> + div(const BigInt &other) { + BigInt remainder(0); if (*this < other) { remainder = *this; - *this = BigInt(0); + *this = BigInt(0); return remainder; } if (other == 1) { @@ -403,15 +421,15 @@ template struct BigInt { return nullopt; } - BigInt quotient(0); - BigInt subtractor = other; + BigInt quotient(0); + BigInt subtractor = other; int cur_bit = static_cast(subtractor.clz() - this->clz()); subtractor.shift_left(cur_bit); for (; cur_bit >= 0 && *this > 0; --cur_bit, subtractor.shift_right(1)) { if (*this >= subtractor) { this->sub(subtractor); - quotient = quotient | (BigInt(1) << cur_bit); + quotient = quotient | (BigInt(1) << cur_bit); } } remainder = *this; @@ -419,8 +437,8 @@ template struct BigInt { return remainder; } - // Efficiently perform BigInt / (x * 2^e), where x is a 32-bit unsigned - // integer, and return the remainder. The main idea is as follow: + // Efficiently perform BigInt / (x * 2^e), where x is a half-word-size + // unsigned integer, and return the remainder. The main idea is as follow: // Let q = y / (x * 2^e) be the quotient, and // r = y % (x * 2^e) be the remainder. // First, notice that: @@ -428,102 +446,109 @@ template struct BigInt { // so we just need to focus on all the bits of y that is >= 2^e. // To speed up the shift-and-add steps, we only use x as the divisor, and // performing 32-bit shiftings instead of bit-by-bit shiftings. - // Since the remainder of each division step < x < 2^32, the computation of - // each step is now properly contained within uint64_t. + // Since the remainder of each division step < x < 2^(WORD_SIZE / 2), the + // computation of each step is now properly contained within WordType. // And finally we perform some extra alignment steps for the remaining bits. - LIBC_INLINE constexpr optional> - div_uint32_times_pow_2(uint32_t x, size_t e) { - BigInt remainder(0); + LIBC_INLINE constexpr optional> + div_uint_half_times_pow_2(internal::half_width_t x, size_t e) { + BigInt remainder(0); if (x == 0) { return nullopt; } if (e >= Bits) { remainder = *this; - *this = BigInt(0); + *this = BigInt(0); return remainder; } - BigInt quotient(0); - uint64_t x64 = static_cast(x); - // lower64 = smallest multiple of 64 that is >= e. - size_t lower64 = ((e >> 6) + ((e & 63) != 0)) << 6; - // lower_pos is the index of the closest 64-bit chunk >= 2^e. - size_t lower_pos = lower64 / 64; + BigInt quotient(0); + WordType x_word = static_cast(x); + constexpr size_t LOG2_WORD_SIZE = bit_width(WORD_SIZE) - 1; + constexpr size_t HALF_WORD_SIZE = WORD_SIZE >> 1; + constexpr WordType HALF_MASK = ((WordType(1) << HALF_WORD_SIZE) - 1); + // lower = smallest multiple of WORD_SIZE that is >= e. + size_t lower = ((e >> LOG2_WORD_SIZE) + ((e & (WORD_SIZE - 1)) != 0)) + << LOG2_WORD_SIZE; + // lower_pos is the index of the closest WORD_SIZE-bit chunk >= 2^e. + size_t lower_pos = lower / WORD_SIZE; // Keep track of current remainder mod x * 2^(32*i) - uint64_t rem = 0; + WordType rem = 0; // pos is the index of the current 64-bit chunk that we are processing. - size_t pos = WORDCOUNT; + size_t pos = WORD_COUNT; // TODO: look into if constexpr(Bits > 256) skip leading zeroes. - for (size_t q_pos = WORDCOUNT - lower_pos; q_pos > 0; --q_pos) { - // q_pos is 1 + the index of the current 64-bit chunk of the quotient - // being processed. - // Performing the division / modulus with divisor: - // x * 2^(64*q_pos - 32), - // i.e. using the upper 32-bit of the current 64-bit chunk. - rem <<= 32; - rem += val[--pos] >> 32; - uint64_t q_tmp = rem / x64; - rem %= x64; + for (size_t q_pos = WORD_COUNT - lower_pos; q_pos > 0; --q_pos) { + // q_pos is 1 + the index of the current WORD_SIZE-bit chunk of the + // quotient being processed. Performing the division / modulus with + // divisor: + // x * 2^(WORD_SIZE*q_pos - WORD_SIZE/2), + // i.e. using the upper (WORD_SIZE/2)-bit of the current WORD_SIZE-bit + // chunk. + rem <<= HALF_WORD_SIZE; + rem += val[--pos] >> HALF_WORD_SIZE; + WordType q_tmp = rem / x_word; + rem %= x_word; // Performing the division / modulus with divisor: - // x * 2^(64*(q_pos - 1)), - // i.e. using the lower 32-bit of the current 64-bit chunk. - rem <<= 32; - rem += val[pos] & MASK32; - quotient.val[q_pos - 1] = (q_tmp << 32) + rem / x64; - rem %= x64; + // x * 2^(WORD_SIZE*(q_pos - 1)), + // i.e. using the lower (WORD_SIZE/2)-bit of the current WORD_SIZE-bit + // chunk. + rem <<= HALF_WORD_SIZE; + rem += val[pos] & HALF_MASK; + quotient.val[q_pos - 1] = (q_tmp << HALF_WORD_SIZE) + rem / x_word; + rem %= x_word; } // So far, what we have is: - // quotient = y / (x * 2^lower64), and - // rem = (y % (x * 2^lower64)) / 2^lower64. - // If (lower64 > e), we will need to perform an extra adjustment of the + // quotient = y / (x * 2^lower), and + // rem = (y % (x * 2^lower)) / 2^lower. + // If (lower > e), we will need to perform an extra adjustment of the // quotient and remainder, namely: - // y / (x * 2^e) = [ y / (x * 2^lower64) ] * 2^(lower64 - e) + - // + (rem * 2^(lower64 - e)) / x - // (y % (x * 2^e)) / 2^e = (rem * 2^(lower64 - e)) % x - size_t last_shift = lower64 - e; + // y / (x * 2^e) = [ y / (x * 2^lower) ] * 2^(lower - e) + + // + (rem * 2^(lower - e)) / x + // (y % (x * 2^e)) / 2^e = (rem * 2^(lower - e)) % x + size_t last_shift = lower - e; if (last_shift > 0) { - // quotient * 2^(lower64 - e) + // quotient * 2^(lower - e) quotient <<= last_shift; - uint64_t q_tmp = 0; - uint64_t d = val[--pos]; - if (last_shift >= 32) { - // The shifting (rem * 2^(lower64 - e)) might overflow uint64_t, so we - // perform a 32-bit shift first. - rem <<= 32; - rem += d >> 32; - d &= MASK32; - q_tmp = rem / x64; - rem %= x64; - last_shift -= 32; + WordType q_tmp = 0; + WordType d = val[--pos]; + if (last_shift >= HALF_WORD_SIZE) { + // The shifting (rem * 2^(lower - e)) might overflow WordTyoe, so we + // perform a HALF_WORD_SIZE-bit shift first. + rem <<= HALF_WORD_SIZE; + rem += d >> HALF_WORD_SIZE; + d &= HALF_MASK; + q_tmp = rem / x_word; + rem %= x_word; + last_shift -= HALF_WORD_SIZE; } else { - // Only use the upper 32-bit of the current 64-bit chunk. - d >>= 32; + // Only use the upper HALF_WORD_SIZE-bit of the current WORD_SIZE-bit + // chunk. + d >>= HALF_WORD_SIZE; } if (last_shift > 0) { - rem <<= 32; + rem <<= HALF_WORD_SIZE; rem += d; q_tmp <<= last_shift; - x64 <<= 32 - last_shift; - q_tmp += rem / x64; - rem %= x64; + x_word <<= HALF_WORD_SIZE - last_shift; + q_tmp += rem / x_word; + rem %= x_word; } quotient.val[0] += q_tmp; - if (lower64 - e <= 32) { - // The remainder rem * 2^(lower64 - e) might overflow to the higher - // 64-bit chunk. - if (pos < WORDCOUNT - 1) { - remainder[pos + 1] = rem >> 32; + if (lower - e <= HALF_WORD_SIZE) { + // The remainder rem * 2^(lower - e) might overflow to the higher + // WORD_SIZE-bit chunk. + if (pos < WORD_COUNT - 1) { + remainder[pos + 1] = rem >> HALF_WORD_SIZE; } - remainder[pos] = (rem << 32) + (val[pos] & MASK32); + remainder[pos] = (rem << HALF_WORD_SIZE) + (val[pos] & HALF_MASK); } else { remainder[pos] = rem; } @@ -541,36 +566,36 @@ template struct BigInt { return remainder; } - LIBC_INLINE constexpr BigInt - operator/(const BigInt &other) const { - BigInt result(*this); + LIBC_INLINE constexpr BigInt + operator/(const BigInt &other) const { + BigInt result(*this); result.div(other); return result; } - LIBC_INLINE constexpr BigInt & - operator/=(const BigInt &other) { + LIBC_INLINE constexpr BigInt & + operator/=(const BigInt &other) { div(other); return *this; } - LIBC_INLINE constexpr BigInt - operator%(const BigInt &other) const { - BigInt result(*this); + LIBC_INLINE constexpr BigInt + operator%(const BigInt &other) const { + BigInt result(*this); return *result.div(other); } - LIBC_INLINE constexpr BigInt & - operator*=(const BigInt &other) { + LIBC_INLINE constexpr BigInt & + operator*=(const BigInt &other) { *this = *this * other; return *this; } LIBC_INLINE constexpr uint64_t clz() { uint64_t leading_zeroes = 0; - for (size_t i = WORDCOUNT; i > 0; --i) { + for (size_t i = WORD_COUNT; i > 0; --i) { if (val[i - 1] == 0) { - leading_zeroes += sizeof(uint64_t) * 8; + leading_zeroes += WORD_SIZE; } else { leading_zeroes += countl_zero(val[i - 1]); break; @@ -580,8 +605,30 @@ template struct BigInt { } LIBC_INLINE constexpr void shift_left(size_t s) { + if constexpr (Bits == WORD_SIZE) { + // Use native types if possible. + if (s >= WORD_SIZE) { + val[0] = 0; + return; + } + val[0] <<= s; + return; + } + if constexpr ((Bits == 64) && (WORD_SIZE == 32)) { + // Use builtin 64 bits for 32-bit base type if available; + if (s >= 64) { + val[0] = 0; + val[1] = 0; + return; + } + uint64_t tmp = uint64__t(val[0]) + (uint64_t(val[1]) << 62); + tmp <<= s; + val[0] = uint32_t(tmp); + val[1] = uint32_t(tmp >> 32); + return; + } #ifdef __SIZEOF_INT128__ - if constexpr (Bits == 128) { + if constexpr ((Bits == 128) && (WORD_SIZE == 64)) { // Use builtin 128 bits if available; if (s >= 128) { val[0] = 0; @@ -598,19 +645,19 @@ template struct BigInt { if (LIBC_UNLIKELY(s == 0)) return; - const size_t drop = s / 64; // Number of words to drop - const size_t shift = s % 64; // Bits to shift in the remaining words. - size_t i = WORDCOUNT; + const size_t drop = s / WORD_SIZE; // Number of words to drop + const size_t shift = s % WORD_SIZE; // Bits to shift in the remaining words. + size_t i = WORD_COUNT; - if (drop < WORDCOUNT) { - i = WORDCOUNT - 1; + if (drop < WORD_COUNT) { + i = WORD_COUNT - 1; if (shift > 0) { - for (size_t j = WORDCOUNT - 1 - drop; j > 0; --i, --j) { - val[i] = (val[j] << shift) | (val[j - 1] >> (64 - shift)); + for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) { + val[i] = (val[j] << shift) | (val[j - 1] >> (WORD_SIZE - shift)); } val[i] = val[0] << shift; } else { - for (size_t j = WORDCOUNT - 1 - drop; j > 0; --i, --j) { + for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) { val[i] = val[j]; } val[i] = val[0]; @@ -622,20 +669,38 @@ template struct BigInt { } } - LIBC_INLINE constexpr BigInt operator<<(size_t s) const { - BigInt result(*this); + LIBC_INLINE constexpr BigInt + operator<<(size_t s) const { + BigInt result(*this); result.shift_left(s); return result; } - LIBC_INLINE constexpr BigInt &operator<<=(size_t s) { + LIBC_INLINE constexpr BigInt &operator<<=(size_t s) { shift_left(s); return *this; } LIBC_INLINE constexpr void shift_right(size_t s) { + if constexpr ((Bits == 64) && (WORD_SIZE == 32)) { + // Use builtin 64 bits if available; + if (s >= 64) { + val[0] = 0; + val[1] = 0; + return; + } + uint64_t tmp = uint64_t(val[0]) + (uint64_t(val[1]) << 32); + if constexpr (Signed) { + tmp = static_cast(static_cast(tmp) >> s); + } else { + tmp >>= s; + } + val[0] = uint32_t(tmp); + val[1] = uint32_t(tmp >> 32); + return; + } #ifdef __SIZEOF_INT128__ - if constexpr (Bits == 128) { + if constexpr ((Bits == 128) && (WORD_SIZE == 64)) { // Use builtin 128 bits if available; if (s >= 128) { val[0] = 0; @@ -656,108 +721,110 @@ template struct BigInt { if (LIBC_UNLIKELY(s == 0)) return; - const size_t drop = s / 64; // Number of words to drop - const size_t shift = s % 64; // Bit shift in the remaining words. + const size_t drop = s / WORD_SIZE; // Number of words to drop + const size_t shift = s % WORD_SIZE; // Bit shift in the remaining words. size_t i = 0; - uint64_t sign = Signed ? (val[WORDCOUNT - 1] >> 63) : 0; + WordType sign = Signed ? (val[WORD_COUNT - 1] >> (WORD_SIZE - 1)) : 0; - if (drop < WORDCOUNT) { + if (drop < WORD_COUNT) { if (shift > 0) { - for (size_t j = drop; j < WORDCOUNT - 1; ++i, ++j) { - val[i] = (val[j] >> shift) | (val[j + 1] << (64 - shift)); + for (size_t j = drop; j < WORD_COUNT - 1; ++i, ++j) { + val[i] = (val[j] >> shift) | (val[j + 1] << (WORD_SIZE - shift)); } if constexpr (Signed) { - val[i] = static_cast( - static_cast(val[WORDCOUNT - 1]) >> shift); + val[i] = static_cast( + static_cast>(val[WORD_COUNT - 1]) >> + shift); } else { - val[i] = val[WORDCOUNT - 1] >> shift; + val[i] = val[WORD_COUNT - 1] >> shift; } ++i; } else { - for (size_t j = drop; j < WORDCOUNT; ++i, ++j) { + for (size_t j = drop; j < WORD_COUNT; ++i, ++j) { val[i] = val[j]; } } } - for (; i < WORDCOUNT; ++i) { + for (; i < WORD_COUNT; ++i) { val[i] = sign; } } - LIBC_INLINE constexpr BigInt operator>>(size_t s) const { - BigInt result(*this); + LIBC_INLINE constexpr BigInt + operator>>(size_t s) const { + BigInt result(*this); result.shift_right(s); return result; } - LIBC_INLINE constexpr BigInt &operator>>=(size_t s) { + LIBC_INLINE constexpr BigInt &operator>>=(size_t s) { shift_right(s); return *this; } - LIBC_INLINE constexpr BigInt - operator&(const BigInt &other) const { - BigInt result; - for (size_t i = 0; i < WORDCOUNT; ++i) + LIBC_INLINE constexpr BigInt + operator&(const BigInt &other) const { + BigInt result; + for (size_t i = 0; i < WORD_COUNT; ++i) result.val[i] = val[i] & other.val[i]; return result; } - LIBC_INLINE constexpr BigInt & - operator&=(const BigInt &other) { - for (size_t i = 0; i < WORDCOUNT; ++i) + LIBC_INLINE constexpr BigInt & + operator&=(const BigInt &other) { + for (size_t i = 0; i < WORD_COUNT; ++i) val[i] &= other.val[i]; return *this; } - LIBC_INLINE constexpr BigInt - operator|(const BigInt &other) const { - BigInt result; - for (size_t i = 0; i < WORDCOUNT; ++i) + LIBC_INLINE constexpr BigInt + operator|(const BigInt &other) const { + BigInt result; + for (size_t i = 0; i < WORD_COUNT; ++i) result.val[i] = val[i] | other.val[i]; return result; } - LIBC_INLINE constexpr BigInt & - operator|=(const BigInt &other) { - for (size_t i = 0; i < WORDCOUNT; ++i) + LIBC_INLINE constexpr BigInt & + operator|=(const BigInt &other) { + for (size_t i = 0; i < WORD_COUNT; ++i) val[i] |= other.val[i]; return *this; } - LIBC_INLINE constexpr BigInt - operator^(const BigInt &other) const { - BigInt result; - for (size_t i = 0; i < WORDCOUNT; ++i) + LIBC_INLINE constexpr BigInt + operator^(const BigInt &other) const { + BigInt result; + for (size_t i = 0; i < WORD_COUNT; ++i) result.val[i] = val[i] ^ other.val[i]; return result; } - LIBC_INLINE constexpr BigInt & - operator^=(const BigInt &other) { - for (size_t i = 0; i < WORDCOUNT; ++i) + LIBC_INLINE constexpr BigInt & + operator^=(const BigInt &other) { + for (size_t i = 0; i < WORD_COUNT; ++i) val[i] ^= other.val[i]; return *this; } - LIBC_INLINE constexpr BigInt operator~() const { - BigInt result; - for (size_t i = 0; i < WORDCOUNT; ++i) + LIBC_INLINE constexpr BigInt operator~() const { + BigInt result; + for (size_t i = 0; i < WORD_COUNT; ++i) result.val[i] = ~val[i]; return result; } - LIBC_INLINE constexpr BigInt operator-() const { - BigInt result = ~(*this); - result.add(BigInt(1)); + LIBC_INLINE constexpr BigInt operator-() const { + BigInt result = ~(*this); + result.add(BigInt(1)); return result; } LIBC_INLINE constexpr bool - operator==(const BigInt &other) const { - for (size_t i = 0; i < WORDCOUNT; ++i) { + operator==(const BigInt &other) const { + for (size_t i = 0; i < WORD_COUNT; ++i) { if (val[i] != other.val[i]) return false; } @@ -765,8 +832,8 @@ template struct BigInt { } LIBC_INLINE constexpr bool - operator!=(const BigInt &other) const { - for (size_t i = 0; i < WORDCOUNT; ++i) { + operator!=(const BigInt &other) const { + for (size_t i = 0; i < WORD_COUNT; ++i) { if (val[i] != other.val[i]) return true; } @@ -774,18 +841,18 @@ template struct BigInt { } LIBC_INLINE constexpr bool - operator>(const BigInt &other) const { + operator>(const BigInt &other) const { if constexpr (Signed) { // Check for different signs; - bool a_sign = val[WORDCOUNT - 1] >> 63; - bool b_sign = other.val[WORDCOUNT - 1] >> 63; + bool a_sign = val[WORD_COUNT - 1] >> (WORD_SIZE - 1); + bool b_sign = other.val[WORD_COUNT - 1] >> (WORD_SIZE - 1); if (a_sign != b_sign) { return b_sign; } } - for (size_t i = WORDCOUNT; i > 0; --i) { - uint64_t word = val[i - 1]; - uint64_t other_word = other.val[i - 1]; + for (size_t i = WORD_COUNT; i > 0; --i) { + WordType word = val[i - 1]; + WordType other_word = other.val[i - 1]; if (word > other_word) return true; else if (word < other_word) @@ -796,18 +863,18 @@ template struct BigInt { } LIBC_INLINE constexpr bool - operator>=(const BigInt &other) const { + operator>=(const BigInt &other) const { if constexpr (Signed) { // Check for different signs; - bool a_sign = val[WORDCOUNT - 1] >> 63; - bool b_sign = other.val[WORDCOUNT - 1] >> 63; + bool a_sign = val[WORD_COUNT - 1] >> (WORD_SIZE - 1); + bool b_sign = other.val[WORD_COUNT - 1] >> (WORD_SIZE - 1); if (a_sign != b_sign) { return b_sign; } } - for (size_t i = WORDCOUNT; i > 0; --i) { - uint64_t word = val[i - 1]; - uint64_t other_word = other.val[i - 1]; + for (size_t i = WORD_COUNT; i > 0; --i) { + WordType word = val[i - 1]; + WordType other_word = other.val[i - 1]; if (word > other_word) return true; else if (word < other_word) @@ -818,19 +885,19 @@ template struct BigInt { } LIBC_INLINE constexpr bool - operator<(const BigInt &other) const { + operator<(const BigInt &other) const { if constexpr (Signed) { // Check for different signs; - bool a_sign = val[WORDCOUNT - 1] >> 63; - bool b_sign = other.val[WORDCOUNT - 1] >> 63; + bool a_sign = val[WORD_COUNT - 1] >> (WORD_SIZE - 1); + bool b_sign = other.val[WORD_COUNT - 1] >> (WORD_SIZE - 1); if (a_sign != b_sign) { return a_sign; } } - for (size_t i = WORDCOUNT; i > 0; --i) { - uint64_t word = val[i - 1]; - uint64_t other_word = other.val[i - 1]; + for (size_t i = WORD_COUNT; i > 0; --i) { + WordType word = val[i - 1]; + WordType other_word = other.val[i - 1]; if (word > other_word) return false; else if (word < other_word) @@ -841,18 +908,18 @@ template struct BigInt { } LIBC_INLINE constexpr bool - operator<=(const BigInt &other) const { + operator<=(const BigInt &other) const { if constexpr (Signed) { // Check for different signs; - bool a_sign = val[WORDCOUNT - 1] >> 63; - bool b_sign = other.val[WORDCOUNT - 1] >> 63; + bool a_sign = val[WORD_COUNT - 1] >> (WORD_SIZE - 1); + bool b_sign = other.val[WORD_COUNT - 1] >> (WORD_SIZE - 1); if (a_sign != b_sign) { return a_sign; } } - for (size_t i = WORDCOUNT; i > 0; --i) { - uint64_t word = val[i - 1]; - uint64_t other_word = other.val[i - 1]; + for (size_t i = WORD_COUNT; i > 0; --i) { + WordType word = val[i - 1]; + WordType other_word = other.val[i - 1]; if (word > other_word) return false; else if (word < other_word) @@ -862,48 +929,53 @@ template struct BigInt { return true; } - LIBC_INLINE constexpr BigInt &operator++() { - BigInt one(1); + LIBC_INLINE constexpr BigInt &operator++() { + BigInt one(1); add(one); return *this; } - LIBC_INLINE constexpr BigInt operator++(int) { - BigInt oldval(*this); - BigInt one(1); + LIBC_INLINE constexpr BigInt operator++(int) { + BigInt oldval(*this); + BigInt one(1); add(one); return oldval; } - LIBC_INLINE constexpr BigInt &operator--() { - BigInt one(1); + LIBC_INLINE constexpr BigInt &operator--() { + BigInt one(1); sub(one); return *this; } - LIBC_INLINE constexpr BigInt operator--(int) { - BigInt oldval(*this); - BigInt one(1); + LIBC_INLINE constexpr BigInt operator--(int) { + BigInt oldval(*this); + BigInt one(1); sub(one); return oldval; } // Return the i-th 64-bit word of the number. - LIBC_INLINE constexpr const uint64_t &operator[](size_t i) const { + LIBC_INLINE constexpr const WordType &operator[](size_t i) const { return val[i]; } // Return the i-th 64-bit word of the number. - LIBC_INLINE constexpr uint64_t &operator[](size_t i) { return val[i]; } + LIBC_INLINE constexpr WordType &operator[](size_t i) { return val[i]; } - LIBC_INLINE uint64_t *data() { return val; } + LIBC_INLINE WordType *data() { return val; } - LIBC_INLINE const uint64_t *data() const { return val; } + LIBC_INLINE const WordType *data() const { return val; } }; -template using UInt = BigInt; +template +using UInt = + typename cpp::conditional_t, + BigInt>; -template using Int = BigInt; +template +using Int = typename cpp::conditional_t, + BigInt>; // Provides limits of U/Int<128>. template <> class numeric_limits> { @@ -927,45 +999,26 @@ template <> class numeric_limits> { }; // Provides is_integral of U/Int<128>, U/Int<192>, U/Int<256>. -template -struct is_integral> : cpp::true_type { - static_assert(Bits > 0 && Bits % 64 == 0, - "Number of bits in BigInt should be a multiple of 64."); -}; +template +struct is_integral> : cpp::true_type {}; // Provides is_unsigned of UInt<128>, UInt<192>, UInt<256>. -template struct is_unsigned> : public cpp::true_type { - static_assert(Bits > 0 && Bits % 64 == 0, - "Number of bits in UInt should be a multiple of 64."); -}; - -template -struct make_unsigned> : type_identity> { - static_assert(Bits > 0 && Bits % 64 == 0, - "Number of bits in Int should be a multiple of 64."); -}; +template +struct is_unsigned> : cpp::bool_constant {}; -template -struct make_unsigned> : type_identity> { - static_assert(Bits > 0 && Bits % 64 == 0, - "Number of bits in Int should be a multiple of 64."); -}; - -template -struct make_signed> : type_identity> { - static_assert(Bits > 0 && Bits % 64 == 0, - "Number of bits in Int should be a multiple of 64."); -}; +template +struct make_unsigned> + : type_identity> {}; -template -struct make_signed> : type_identity> { - static_assert(Bits > 0 && Bits % 64 == 0, - "Number of bits in Int should be a multiple of 64."); -}; +template +struct make_signed> + : type_identity> {}; namespace internal { template struct is_custom_uint : cpp::false_type {}; -template struct is_custom_uint> : cpp::true_type {}; + +template +struct is_custom_uint> : cpp::true_type {}; } // namespace internal // bit_cast to UInt diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index f30110d47b2192..83b68c936b27a9 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -208,7 +208,7 @@ LIBC_INLINE constexpr cpp::UInt get_table_positive(int exponent, num = num + 1; if (num > MOD_SIZE) { - auto rem = num.div_uint32_times_pow_2( + auto rem = num.div_uint_half_times_pow_2( EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) .value(); num = rem; @@ -255,8 +255,8 @@ LIBC_INLINE cpp::UInt get_table_positive_df(int exponent, if (int_num > MOD_SIZE) { auto rem = int_num - .div_uint32_times_pow_2(EXP10_9, CALC_SHIFT_CONST + - (IDX_SIZE > 1 ? IDX_SIZE : 0)) + .div_uint_half_times_pow_2( + EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) .value(); int_num = rem; } @@ -318,7 +318,7 @@ LIBC_INLINE cpp::UInt get_table_negative(int exponent, size_t i) { num = num >> (-shift_amount); } if (num > MOD_SIZE) { - auto rem = num.div_uint32_times_pow_2( + auto rem = num.div_uint_half_times_pow_2( EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) .value(); num = rem; @@ -360,8 +360,8 @@ LIBC_INLINE cpp::UInt get_table_negative_df(int exponent, if (int_num > MOD_SIZE) { auto rem = int_num - .div_uint32_times_pow_2(EXP10_9, CALC_SHIFT_CONST + - (IDX_SIZE > 1 ? IDX_SIZE : 0)) + .div_uint_half_times_pow_2( + EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) .value(); int_num = rem; } @@ -389,7 +389,8 @@ LIBC_INLINE uint32_t mul_shift_mod_1e9(const FPBits::StorageType mantissa, const int32_t shift_amount) { cpp::UInt val(large); val = (val * mantissa) >> shift_amount; - return static_cast(val.div_uint32_times_pow_2(EXP10_9, 0).value()); + return static_cast( + val.div_uint_half_times_pow_2(static_cast(EXP10_9), 0).value()); } } // namespace internal @@ -658,7 +659,7 @@ template <> class FloatToString { template LIBC_INLINE static constexpr BlockInt grab_digits(cpp::UInt &int_num) { - auto wide_result = int_num.div_uint32_times_pow_2(EXP5_9, 9); + auto wide_result = int_num.div_uint_half_times_pow_2(EXP5_9, 9); // the optional only comes into effect when dividing by 0, which will // never happen here. Thus, we just assert that it has value. LIBC_ASSERT(wide_result.has_value()); @@ -695,7 +696,8 @@ template <> class FloatToString { while (float_as_int > 0) { LIBC_ASSERT(int_block_index < static_cast(BLOCK_BUFFER_LEN)); - block_buffer[int_block_index] = grab_digits(float_as_int); + block_buffer[int_block_index] = + grab_digits(float_as_int); ++int_block_index; } block_buffer_valid = int_block_index; @@ -718,7 +720,7 @@ template <> class FloatToString { size_t positive_int_block_index = 0; while (above_decimal_point > 0) { block_buffer[positive_int_block_index] = - grab_digits(above_decimal_point); + grab_digits(above_decimal_point); ++positive_int_block_index; } block_buffer_valid = positive_int_block_index; diff --git a/libc/src/__support/integer_utils.h b/libc/src/__support/integer_utils.h index 1d9a134934cc55..dd407f9b2ef9a6 100644 --- a/libc/src/__support/integer_utils.h +++ b/libc/src/__support/integer_utils.h @@ -19,7 +19,28 @@ namespace LIBC_NAMESPACE { -template NumberPair full_mul(T a, T b); +template NumberPair full_mul(T a, T b) { + NumberPair pa = split(a); + NumberPair pb = split(b); + NumberPair prod; + + prod.lo = pa.lo * pb.lo; // exact + prod.hi = pa.hi * pb.hi; // exact + NumberPair lo_hi = split(pa.lo * pb.hi); // exact + NumberPair hi_lo = split(pa.hi * pb.lo); // exact + + constexpr size_t HALF_BIT_WIDTH = sizeof(T) * CHAR_BIT / 2; + + auto r1 = add_with_carry(prod.lo, lo_hi.lo << HALF_BIT_WIDTH, T(0)); + prod.lo = r1.sum; + prod.hi = add_with_carry(prod.hi, lo_hi.hi, r1.carry).sum; + + auto r2 = add_with_carry(prod.lo, hi_lo.lo << HALF_BIT_WIDTH, T(0)); + prod.lo = r2.sum; + prod.hi = add_with_carry(prod.hi, hi_lo.hi, r2.carry).sum; + + return prod; +} template <> LIBC_INLINE NumberPair full_mul(uint32_t a, uint32_t b) { @@ -30,35 +51,16 @@ LIBC_INLINE NumberPair full_mul(uint32_t a, uint32_t b) { return result; } +#ifdef __SIZEOF_INT128__ template <> LIBC_INLINE NumberPair full_mul(uint64_t a, uint64_t b) { -#ifdef __SIZEOF_INT128__ __uint128_t prod = __uint128_t(a) * __uint128_t(b); NumberPair result; result.lo = uint64_t(prod); result.hi = uint64_t(prod >> 64); return result; -#else - NumberPair pa = split(a); - NumberPair pb = split(b); - NumberPair prod; - - prod.lo = pa.lo * pb.lo; // exact - prod.hi = pa.hi * pb.hi; // exact - NumberPair lo_hi = split(pa.lo * pb.hi); // exact - NumberPair hi_lo = split(pa.hi * pb.lo); // exact - - auto r1 = add_with_carry(prod.lo, lo_hi.lo << 32, uint64_t(0)); - prod.lo = r1.sum; - prod.hi = add_with_carry(prod.hi, lo_hi.hi, r1.carry).sum; - - auto r2 = add_with_carry(prod.lo, hi_lo.lo << 32, uint64_t(0)); - prod.lo = r2.sum; - prod.hi = add_with_carry(prod.hi, hi_lo.hi, r2.carry).sum; - - return prod; -#endif // __SIZEOF_INT128__ } +#endif // __SIZEOF_INT128__ } // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/uint_test.cpp b/libc/test/src/__support/uint_test.cpp index 0ad72c35645c4b..1a1171b46781e8 100644 --- a/libc/test/src/__support/uint_test.cpp +++ b/libc/test/src/__support/uint_test.cpp @@ -588,7 +588,7 @@ TEST(LlvmLibcUIntClassTest, ConstexprInitTests) { d <<= e; \ LL_UInt320 q1 = y / d; \ LL_UInt320 r1 = y % d; \ - LL_UInt320 r2 = *y.div_uint32_times_pow_2(x, e); \ + LL_UInt320 r2 = *y.div_uint_half_times_pow_2(x, e); \ EXPECT_EQ(q1, y); \ EXPECT_EQ(r1, r2); \ } while (0) @@ -676,6 +676,52 @@ TEST(LlvmLibcUIntClassTest, ConstructorFromUInt128Tests) { ASSERT_EQ(LL_UInt192(e + f), LL_UInt192(a + b)); } +TEST(LlvmLibcUIntClassTest, WordTypeUInt128Tests) { + using LL_UInt256_128 = cpp::BigInt<256, false, __uint128_t>; + using LL_UInt128_128 = cpp::BigInt<128, false, __uint128_t>; + + LL_UInt256_128 a(1); + + ASSERT_EQ(static_cast(a), 1); + a = (a << 128) + 2; + ASSERT_EQ(static_cast(a), 2); + ASSERT_EQ(static_cast(a), uint64_t(2)); + a = (a << 32) + 3; + ASSERT_EQ(static_cast(a), 3); + ASSERT_EQ(static_cast(a), uint64_t(0x2'0000'0003)); + ASSERT_EQ(static_cast(a >> 32), 2); + ASSERT_EQ(static_cast(a >> (128 + 32)), 1); + + LL_UInt128_128 b(__uint128_t(1) << 127); + LL_UInt128_128 c(b); + a = b.ful_mul(c); + + ASSERT_EQ(static_cast(a >> 254), 1); + + LL_UInt256_128 d = LL_UInt256_128(123) << 4; + ASSERT_EQ(static_cast(d), 123 << 4); + LL_UInt256_128 e = a / d; + LL_UInt256_128 f = a % d; + LL_UInt256_128 r = *a.div_uint_half_times_pow_2(123, 4); + EXPECT_TRUE(e == a); + EXPECT_TRUE(f == r); +} + #endif // __SIZEOF_INT128__ +TEST(LlvmLibcUIntClassTest, OtherWordTypeTests) { + using LL_UInt96 = cpp::BigInt<96, false, uint32_t>; + + LL_UInt96 a(1); + + ASSERT_EQ(static_cast(a), 1); + a = (a << 32) + 2; + ASSERT_EQ(static_cast(a), 2); + ASSERT_EQ(static_cast(a), uint64_t(0x1'0000'0002)); + a = (a << 32) + 3; + ASSERT_EQ(static_cast(a), 3); + ASSERT_EQ(static_cast(a >> 32), 2); + ASSERT_EQ(static_cast(a >> 64), 1); +} + } // namespace LIBC_NAMESPACE From f0b271e4ebd2180f497694838ec9db0b2fd8ab4b Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Tue, 13 Feb 2024 13:15:28 -0800 Subject: [PATCH 055/240] Temporarily disable the TestAddParsedCommand.py while I figure out why it's crashing on the x86_64 Debian Linux worker. --- .../API/commands/command/script/add/TestAddParsedCommand.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py index 7dba9c6937f211..c044e2bf8c8d28 100644 --- a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py +++ b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py @@ -13,6 +13,9 @@ class ParsedCommandTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True + # This crashes on the x86_64 Debian bot, but the failure is not helpful. + # Disable the test while I try to find a way to reproduce. + @skipIfLinux def test(self): self.pycmd_tests() From 2400f704af18fd4b58ded158c3debe3b295accc6 Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Tue, 13 Feb 2024 13:20:01 -0800 Subject: [PATCH 056/240] [mlir][sparse] add assemble test for Batched-CSR and CSR-Dense (#81660) These are formats supported by PyTorch sparse, so good to make sure that our assemble instructions work on these. --- .../SparseTensor/CPU/sparse_pack_d.mlir | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100755 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir new file mode 100755 index 00000000000000..55585a7c997430 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir @@ -0,0 +1,95 @@ +//-------------------------------------------------------------------------------------------------- +// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. +// +// Set-up that's shared across all tests in this directory. In principle, this +// config could be moved to lit.local.cfg. However, there are downstream users that +// do not use these LIT config files. Hence why this is kept inline. +// +// DEFINE: %{sparsifier_opts} = enable-runtime-library=true +// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts} +// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" +// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" +// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils +// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} +// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} +// +// DEFINE: %{env} = +//-------------------------------------------------------------------------------------------------- + +// RUN: %{compile} | %{run} | FileCheck %s +// +// Do the same run, but now with direct IR generation. +// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false +// RUN: %{compile} | %{run} | FileCheck %s + +#BatchedCSR = #sparse_tensor.encoding<{ + map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 : compressed), + posWidth = 64, + crdWidth = 32 +}> + +#CSRDense = #sparse_tensor.encoding<{ + map = (d0, d1, d2) -> (d0 : dense, d1 : compressed, d2 : dense), + posWidth = 64, + crdWidth = 32 +}> + +// Test with batched-CSR and CSR-dense. +module { + // + // Main driver. + // + func.func @entry() { + %c0 = arith.constant 0 : index + %f0 = arith.constant 0.0 : f32 + + // + // Setup BatchedCSR. + // + + %data1 = arith.constant dense< + [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 ]> : tensor<16xf32> + %pos1 = arith.constant dense< + [ 0, 2, 3, 4, 6, 6, 7, 9, 11, 13, 14, 15, 16 ]> : tensor<13xi64> + %crd1 = arith.constant dense< + [ 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1]> : tensor<16xi32> + + %s1 = sparse_tensor.assemble %data1, %pos1, %crd1 : tensor<16xf32>, tensor<13xi64>, tensor<16xi32> to tensor<4x3x2xf32, #BatchedCSR> + + // + // Setup CSRDense. + // + + %data2 = arith.constant dense< + [ 1.0, 2.0, 0.0, 3.0, 4.0, 0.0, 5.0, 6.0, 0.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 0.0, 0.0, 15.0, 0.0, 16.0 ]> : tensor<22xf32> + %pos2 = arith.constant dense< + [ 0, 3, 5, 8, 11 ]> : tensor<5xi64> + %crd2 = arith.constant dense< + [ 0, 1, 2, 0, 2, 0, 1, 2, 0, 1, 2 ]> : tensor<11xi32> + + %s2 = sparse_tensor.assemble %data2, %pos2, %crd2 : tensor<22xf32>, tensor<5xi64>, tensor<11xi32> to tensor<4x3x2xf32, #CSRDense> + + // + // Verify. + // + // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) + // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) + // + + %d1 = sparse_tensor.convert %s1 : tensor<4x3x2xf32, #BatchedCSR> to tensor<4x3x2xf32> + %v1 = vector.transfer_read %d1[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> + vector.print %v1 : vector<4x3x2xf32> + + %d2 = sparse_tensor.convert %s2 : tensor<4x3x2xf32, #CSRDense> to tensor<4x3x2xf32> + %v2 = vector.transfer_read %d1[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> + vector.print %v2 : vector<4x3x2xf32> + + // FIXME: doing this explicitly crashes runtime + // bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR> + // bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense> + return + } +} From 52961491ca347e7c8766dc7c45841bacac6a4470 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Tue, 13 Feb 2024 13:20:25 -0800 Subject: [PATCH 057/240] [DWARFDump] Make --verify handle all sections by default (#81559) The current behavior of --verify is that it only verifies debug_info, debug_abbrev and debug_names. This seems fairly arbitrary and might have been unintentional, as originally the absence of any section flags implied "all". This patch changes the behavior so that the verifier now verifies everything by default. It revealed two tests that had potentially invalid DWARF: 1. dwarfdump-str-offsets.s is adding padding between two debug_str_offset contributions. The standard does not explicitly allow this behavior. See issue https://github.com/llvm/llvm-project/issues/81558 2. dwarf5-macro.test uses a checked-in binary that has invalid debug_str_offsets. One of its entries points to the _middle_ of the string section: error: .debug_str_offsets: contribution 0x0: index 0x4: invalid string offset *0x18 == 0x455D, is neither zero nor immediately following a null character If we look at the closest offset to 0x455D in debug_str: ``` 0x0000454e: "__SLONG32_TYPE int" ``` 0x455D points to "int". --- llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s | 5 ++++- llvm/test/DebugInfo/X86/skeleton-unit-verify.s | 2 ++ .../tools/llvm-dwarfdump/X86/verify_file_encoding.yaml | 2 ++ llvm/test/tools/llvm-dwarfutil/ELF/X86/dwarf5-macro.test | 7 +++++-- llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 2 +- 5 files changed, 14 insertions(+), 4 deletions(-) diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s index 1725813aac7707..66dfb5f83acb3e 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s +++ b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s @@ -1,6 +1,9 @@ # RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o # RUN: llvm-dwarfdump -v %t.o 2> %t.err | FileCheck --check-prefixes=COMMON,SPLIT,OFFSETS %s -# RUN: llvm-dwarfdump -verify %t.o | FileCheck --check-prefix=VERIFY %s + +# FIXME: the verifier does not accept padding between debug-str-offset +# sections, which this test uses. +# RUN: llvm-dwarfdump -verify --debug-info %t.o | FileCheck --check-prefix=VERIFY %s # RUN: llvm-dwarfdump -debug-str-offsets %t.o | FileCheck --check-prefix=OFFSETS %s # # Check that we don't report an error on a non-existent range list table. diff --git a/llvm/test/DebugInfo/X86/skeleton-unit-verify.s b/llvm/test/DebugInfo/X86/skeleton-unit-verify.s index 92a3df486da39d..6aaac18169b604 100644 --- a/llvm/test/DebugInfo/X86/skeleton-unit-verify.s +++ b/llvm/test/DebugInfo/X86/skeleton-unit-verify.s @@ -11,6 +11,8 @@ # CHECK-NEXT: DW_TAG_skeleton_unit # CHECK-NEXT: error: Skeleton compilation unit has children. # CHECK-NEXT: Verifying dwo Units... +# CHECK-NEXT: Verifying .debug_line... +# CHECK-NEXT: Verifying .debug_str_offsets... # CHECK-NEXT: Errors detected. .section .debug_abbrev,"",@progbits diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_file_encoding.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_file_encoding.yaml index fe31436e9f6e35..4afb7758214414 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_file_encoding.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_file_encoding.yaml @@ -51,6 +51,8 @@ # CHECK-NEXT: DW_AT_call_file [DW_FORM_sdata] (4) # CHECK-NEXT: DW_AT_call_line [DW_FORM_sdata] (5){{[[:space:]]}} # CHECK-NEXT: Verifying dwo Units... +# CHECK-NEXT: Verifying .debug_line... +# CHECK-NEXT: Verifying .debug_str_offsets... # CHECK-NEXT: error: Aggregated error counts: # CHECK-NEXT: error: Invalid encoding in DW_AT_decl_file occurred 4 time(s). # CHECK-NEXT: error: Invalid file index in DW_AT_call_line occurred 1 time(s). diff --git a/llvm/test/tools/llvm-dwarfutil/ELF/X86/dwarf5-macro.test b/llvm/test/tools/llvm-dwarfutil/ELF/X86/dwarf5-macro.test index 518244a01ab5fa..0c5cbe46b7c6c8 100644 --- a/llvm/test/tools/llvm-dwarfutil/ELF/X86/dwarf5-macro.test +++ b/llvm/test/tools/llvm-dwarfutil/ELF/X86/dwarf5-macro.test @@ -45,12 +45,15 @@ ## Check that macro table preserved during simple copying. # +# FIXME: the input of this test is itself invalid w.r.t. debug_str_offsets, +# which also causes the next two calls to --verify to fail, so we only verify +# debug_info on those. #RUN: llvm-dwarfutil --no-garbage-collection %p/Inputs/dwarf5-macro.out %t1 -#RUN: llvm-dwarfdump -verify %t1 | FileCheck %s +#RUN: llvm-dwarfdump -verify --debug-info %t1 | FileCheck %s #RUN: llvm-dwarfdump -a %t1 | FileCheck %s --check-prefix=MACRO #RUN: llvm-dwarfutil --linker parallel --no-garbage-collection %p/Inputs/dwarf5-macro.out %t1 -#RUN: llvm-dwarfdump -verify %t1 | FileCheck %s +#RUN: llvm-dwarfdump -verify %t1 --debug-info | FileCheck %s #RUN: llvm-dwarfdump -a %t1 | FileCheck %s --check-prefix=MACRO ## Check that macro table preserved during updating accelerator tables. diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 8cdd84bcc867cb..2b438a8b134613 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -863,7 +863,7 @@ int main(int argc, char **argv) { if (DumpAll) DumpType = DIDT_All; if (DumpType == DIDT_Null) { - if (Verbose) + if (Verbose || Verify) DumpType = DIDT_All; else DumpType = DIDT_DebugInfo; From 91f4a84a1504e718e4f4d4eef5db7713dc30a030 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Tue, 13 Feb 2024 13:20:49 -0800 Subject: [PATCH 058/240] [lldb][DWARFIndex] Use IDX_parent to implement GetFullyQualifiedType query (#79932) This commit changes DebugNamesDWARFIndex so that it now overrides `GetFullyQualifiedType` and attempts to use DW_IDX_parent, when available, to speed up such queries. When this type of information is not available, the base-class implementation is used. With this commit, we now achieve the 4x speedups reported in [1]. [1]: https://discourse.llvm.org/t/rfc-improve-dwarf-5-debug-names-type-lookup-parsing-speed/74151/38 --- .../SymbolFile/DWARF/DWARFDeclContext.h | 4 + .../SymbolFile/DWARF/DebugNamesDWARFIndex.cpp | 103 +++++++++ .../SymbolFile/DWARF/DebugNamesDWARFIndex.h | 9 + .../SymbolFile/DWARF/SymbolFileDWARF.h | 3 + .../unittests/SymbolFile/DWARF/CMakeLists.txt | 1 + .../DWARF/DWARFDebugNamesIndexTest.cpp | 208 ++++++++++++++++++ 6 files changed, 328 insertions(+) create mode 100644 lldb/unittests/SymbolFile/DWARF/DWARFDebugNamesIndexTest.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h index a20a862d340296..7e6c5f51f4beb5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h @@ -47,6 +47,10 @@ class DWARFDeclContext { DWARFDeclContext() : m_entries() {} + DWARFDeclContext(llvm::ArrayRef entries) { + llvm::append_range(m_entries, entries); + } + void AppendDeclContext(dw_tag_t tag, const char *name) { m_entries.push_back(Entry(tag, name)); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index b718f98340a70b..4da0d56fdcacb4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -13,6 +13,7 @@ #include "lldb/Core/Module.h" #include "lldb/Utility/RegularExpression.h" #include "lldb/Utility/Stream.h" +#include "llvm/ADT/Sequence.h" #include using namespace lldb_private; @@ -218,6 +219,108 @@ void DebugNamesDWARFIndex::GetCompleteObjCClass( m_fallback.GetCompleteObjCClass(class_name, must_be_implementation, callback); } +namespace { +using Entry = llvm::DWARFDebugNames::Entry; + +/// If `entry` and all of its parents have an `IDX_parent`, use that information +/// to build and return a list of at most `max_parents` parent Entries. +/// `entry` itself is not included in the list. +/// If any parent does not have an `IDX_parent`, or the Entry data is corrupted, +/// nullopt is returned. +std::optional> +getParentChain(Entry entry, uint32_t max_parents) { + llvm::SmallVector parent_entries; + + do { + if (!entry.hasParentInformation()) + return std::nullopt; + + llvm::Expected> parent = entry.getParentDIEEntry(); + if (!parent) { + // Bad data. + LLDB_LOG_ERROR( + GetLog(DWARFLog::Lookups), parent.takeError(), + "Failed to extract parent entry from a non-empty IDX_parent"); + return std::nullopt; + } + + // Last parent in the chain. + if (!parent->has_value()) + break; + + parent_entries.push_back(**parent); + entry = **parent; + } while (parent_entries.size() < max_parents); + + return parent_entries; +} +} // namespace + +void DebugNamesDWARFIndex::GetFullyQualifiedType( + const DWARFDeclContext &context, + llvm::function_ref callback) { + if (context.GetSize() == 0) + return; + + llvm::StringRef leaf_name = context[0].name; + llvm::SmallVector parent_names; + for (auto idx : llvm::seq(1, context.GetSize())) + parent_names.emplace_back(context[idx].name); + + // For each entry, grab its parent chain and check if we have a match. + for (const DebugNames::Entry &entry : + m_debug_names_up->equal_range(leaf_name)) { + if (!isType(entry.tag())) + continue; + + // Grab at most one extra parent, subsequent parents are not necessary to + // test equality. + std::optional> parent_chain = + getParentChain(entry, parent_names.size() + 1); + + if (!parent_chain) { + // Fallback: use the base class implementation. + if (!ProcessEntry(entry, [&](DWARFDIE die) { + return GetFullyQualifiedTypeImpl(context, die, callback); + })) + return; + continue; + } + + if (SameParentChain(parent_names, *parent_chain) && + !ProcessEntry(entry, callback)) + return; + } +} + +bool DebugNamesDWARFIndex::SameParentChain( + llvm::ArrayRef parent_names, + llvm::ArrayRef parent_entries) const { + + if (parent_entries.size() != parent_names.size()) + return false; + + auto SameAsEntryATName = [this](llvm::StringRef name, + const DebugNames::Entry &entry) { + // Peek at the AT_name of `entry` and test equality to `name`. + auto maybe_dieoffset = entry.getDIEUnitOffset(); + if (!maybe_dieoffset) + return false; + auto die_ref = ToDIERef(entry); + if (!die_ref) + return false; + return name == m_debug_info.PeekDIEName(*die_ref); + }; + + // If the AT_name of any parent fails to match the expected name, we don't + // have a match. + for (auto [parent_name, parent_entry] : + llvm::zip_equal(parent_names, parent_entries)) + if (!SameAsEntryATName(parent_name, parent_entry)) + return false; + return true; +} + void DebugNamesDWARFIndex::GetTypes( ConstString name, llvm::function_ref callback) { for (const DebugNames::Entry &entry : diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index cca0913c4124c9..b54dd1162d20ab 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -42,6 +42,11 @@ class DebugNamesDWARFIndex : public DWARFIndex { void GetCompleteObjCClass( ConstString class_name, bool must_be_implementation, llvm::function_ref callback) override; + + /// Uses DWARF5's IDX_parent fields, when available, to speed up this query. + void GetFullyQualifiedType( + const DWARFDeclContext &context, + llvm::function_ref callback) override; void GetTypes(ConstString name, llvm::function_ref callback) override; void GetTypes(const DWARFDeclContext &context, @@ -83,6 +88,10 @@ class DebugNamesDWARFIndex : public DWARFIndex { bool ProcessEntry(const DebugNames::Entry &entry, llvm::function_ref callback); + /// Returns true if `parent_entries` have identical names to `parent_names`. + bool SameParentChain(llvm::ArrayRef parent_names, + llvm::ArrayRef parent_entries) const; + static void MaybeLogLookupError(llvm::Error error, const DebugNames::NameIndex &ni, llvm::StringRef name); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 60baf694b463ec..01518b26ca669e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -373,6 +373,9 @@ class SymbolFileDWARF : public SymbolFileCommon { Type *ResolveTypeUID(const DIERef &die_ref); + /// Returns the DWARFIndex for this symbol, if it exists. + DWARFIndex *getIndex() { return m_index.get(); } + protected: SymbolFileDWARF(const SymbolFileDWARF &) = delete; const SymbolFileDWARF &operator=(const SymbolFileDWARF &) = delete; diff --git a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt index 4a37ece1242915..d5b0be7ea2a28c 100644 --- a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt +++ b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt @@ -1,5 +1,6 @@ add_lldb_unittest(SymbolFileDWARFTests DWARFASTParserClangTests.cpp + DWARFDebugNamesIndexTest.cpp DWARFDIETest.cpp DWARFIndexCachingTest.cpp DWARFUnitTest.cpp diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFDebugNamesIndexTest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFDebugNamesIndexTest.cpp new file mode 100644 index 00000000000000..e56e628d68e8cf --- /dev/null +++ b/lldb/unittests/SymbolFile/DWARF/DWARFDebugNamesIndexTest.cpp @@ -0,0 +1,208 @@ +//===-- DWARFDIETest.cpp ----------------------------------------------=---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Plugins/SymbolFile/DWARF/DWARFDIE.h" +#include "Plugins/SymbolFile/DWARF/DWARFDebugInfo.h" +#include "Plugins/SymbolFile/DWARF/DWARFDeclContext.h" +#include "Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h" +#include "TestingSupport/Symbol/YAMLModuleTester.h" +#include "llvm/ADT/STLExtras.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::plugin::dwarf; +using StringRef = llvm::StringRef; + +static void +check_num_matches(DebugNamesDWARFIndex &index, int expected_num_matches, + llvm::ArrayRef ctx_entries) { + DWARFDeclContext ctx(ctx_entries); + int num_matches = 0; + + index.GetFullyQualifiedType(ctx, [&](DWARFDIE die) { + num_matches++; + return true; + }); + ASSERT_EQ(num_matches, expected_num_matches); +} + +static DWARFDeclContext::Entry make_entry(const char *c) { + return DWARFDeclContext::Entry(dwarf::DW_TAG_class_type, c); +} + +TEST(DWARFDebugNamesIndexTest, FullyQualifiedQueryWithIDXParent) { + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +DWARF: + debug_str: + - '1' + - '2' + - '3' + debug_abbrev: + - Table: + # We intentionally don't nest types in debug_info: if the nesting is not + # inferred from debug_names, we want the test to fail. + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + - Code: 0x2 + Tag: DW_TAG_class_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + debug_info: + - Version: 4 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + - AbbrCode: 0x2 + Values: + - Value: 0x0 # Name "1" + - AbbrCode: 0x2 + Values: + - Value: 0x2 # Name "2" + - AbbrCode: 0x2 + Values: + - Value: 0x4 # Name "3" + - AbbrCode: 0x0 + debug_names: + Abbreviations: + - Code: 0x11 + Tag: DW_TAG_class_type + Indices: + - Idx: DW_IDX_parent + Form: DW_FORM_flag_present + - Idx: DW_IDX_die_offset + Form: DW_FORM_ref4 + - Code: 0x22 + Tag: DW_TAG_class_type + Indices: + - Idx: DW_IDX_parent + Form: DW_FORM_ref4 + - Idx: DW_IDX_die_offset + Form: DW_FORM_ref4 + Entries: + - Name: 0x0 # strp to Name1 + Code: 0x11 + Values: + - 0xc # Die offset to entry named "1" + - Name: 0x2 # strp to Name2 + Code: 0x22 + Values: + - 0x0 # Parent = First entry ("1") + - 0x11 # Die offset to entry named "1:2" + - Name: 0x4 # strp to Name3 + Code: 0x22 + Values: + - 0x6 # Parent = Second entry ("1::2") + - 0x16 # Die offset to entry named "1::2::3" + - Name: 0x4 # strp to Name3 + Code: 0x11 + Values: + - 0x16 # Die offset to entry named "3" +)"; + + YAMLModuleTester t(yamldata); + auto *symbol_file = + llvm::cast(t.GetModule()->GetSymbolFile()); + auto *index = static_cast(symbol_file->getIndex()); + ASSERT_NE(index, nullptr); + + check_num_matches(*index, 1, {make_entry("1")}); + check_num_matches(*index, 1, {make_entry("2"), make_entry("1")}); + check_num_matches(*index, 1, + {make_entry("3"), make_entry("2"), make_entry("1")}); + check_num_matches(*index, 0, {make_entry("2")}); + check_num_matches(*index, 1, {make_entry("3")}); +} + +TEST(DWARFDebugNamesIndexTest, FullyQualifiedQueryWithoutIDXParent) { + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +DWARF: + debug_str: + - '1' + - '2' + debug_abbrev: + - Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + - Code: 0x2 + Tag: DW_TAG_class_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Code: 0x3 + Tag: DW_TAG_class_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + debug_info: + - Version: 4 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + - AbbrCode: 0x2 + Values: + - Value: 0x0 # Name "1" + - AbbrCode: 0x3 + Values: + - Value: 0x2 # Name "2" + - AbbrCode: 0x0 + - AbbrCode: 0x3 + Values: + - Value: 0x2 # Name "2" + - AbbrCode: 0x0 + debug_names: + Abbreviations: + - Code: 0x1 + Tag: DW_TAG_class_type + Indices: + - Idx: DW_IDX_die_offset + Form: DW_FORM_ref4 + Entries: + - Name: 0x0 # strp to Name1 + Code: 0x1 + Values: + - 0xc # Die offset to entry named "1" + - Name: 0x2 # strp to Name2 + Code: 0x1 + Values: + - 0x11 # Die offset to entry named "1::2" + - Name: 0x2 # strp to Name2 + Code: 0x1 + Values: + - 0x17 # Die offset to entry named "2" +)"; + + YAMLModuleTester t(yamldata); + auto *symbol_file = + llvm::cast(t.GetModule()->GetSymbolFile()); + auto *index = static_cast(symbol_file->getIndex()); + ASSERT_NE(index, nullptr); + + check_num_matches(*index, 1, {make_entry("1")}); + check_num_matches(*index, 1, {make_entry("2"), make_entry("1")}); + check_num_matches(*index, 1, {make_entry("2")}); +} From fa77e1f5468bc6be99da89860f42059df13d3b82 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Tue, 13 Feb 2024 20:49:36 +0000 Subject: [PATCH 059/240] [DebugInfo][RemoveDIs] Convert back to intrinsic form for ThinLTO As explained on discourse [0] (comment 12), to get the non-intrinsic form of debug-info records enabled and testing, we're only using it inside of the pass manager in LLVM right now. Things like the textual IR writer and bitcode writing _passes_ are instrumented to convert back to intrinsic-form when writing a module out, but it turns out we missed the ThinLTO bitcode writing pass. That causes uh, all variable location debug-info to be dropped in ThinLTO mode (oops). This patch adds that conversion; it should be low risk as it's identical to what happens in all the other passes. However should this commit turn out to cause trouble, please instead revert d759618df76 or whichever is the most recent commit to set UseNewDbgInfoFormat to default to true. That'll revert LLVM back to the definitely-correct behaviour. [0] https://discourse.llvm.org/t/rfc-instruction-api-changes-needed-to-eliminate-debug-intrinsics-from-ir/68939 --- llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index e5f9fa1dda88e3..dd6062d303d42e 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -580,11 +580,22 @@ PreservedAnalyses llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { FunctionAnalysisManager &FAM = AM.getResult(M).getManager(); + + // RemoveDIs: there's no bitcode representation of the DPValue debug-info, + // convert to dbg.values before writing out. + bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat; + if (IsNewDbgInfoFormat) + M.convertFromNewDbgValues(); + bool Changed = writeThinLTOBitcode( OS, ThinLinkOS, [&FAM](Function &F) -> AAResults & { return FAM.getResult(F); }, M, &AM.getResult(M)); + + if (IsNewDbgInfoFormat) + M.convertToNewDbgValues(); + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } From 99c5a66c62ae6b818fcc62a4d9c936ba9d82bdce Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 13 Feb 2024 13:24:16 -0800 Subject: [PATCH 060/240] Revert "[SeparateConstOffsetFromGEP] Reorder trivial GEP chains to separate constants (#73056)" and follow ups "ninja check-llvm" is failing on tip of tree. This reverts commit ec0aa1646e9953d1a8d0d15dc381d3250c854572. This reverts commit 1b65742f8c71f576381fe85d5e34579b24f2d874. --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 73 +---- ...ne-sink-temporal-divergence-swdev407790.ll | 34 +-- .../AMDGPU/splitkit-getsubrangeformask.ll | 251 ++++++++++-------- llvm/test/CodeGen/PowerPC/licm-remat.ll | 2 +- .../AMDGPU/reorder-gep-inbounds.ll | 51 ---- .../AMDGPU/reorder-gep.ll | 175 ------------ .../NVPTX/lower-gep-reorder.ll | 62 ----- .../SeparateConstOffsetFromGEP/reorder-gep.ll | 188 ------------- 8 files changed, 157 insertions(+), 679 deletions(-) delete mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll delete mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll delete mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll delete mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 5124909696aadb..4481375054ecf1 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -391,11 +391,6 @@ class SeparateConstOffsetFromGEP { /// and returns true if the splitting succeeds. bool splitGEP(GetElementPtrInst *GEP); - /// Tries to reorder the given GEP with the GEP that produces the base if - /// doing so results in producing a constant offset as the outermost - /// index. - bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI); - /// Lower a GEP with multiple indices into multiple GEPs with a single index. /// Function splitGEP already split the original GEP into a variadic part and /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the @@ -969,66 +964,6 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, Variadic->eraseFromParent(); } -bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, - TargetTransformInfo &TTI) { - Type *GEPType = GEP->getResultElementType(); - // TODO: support reordering for non-trivial GEP chains - if (GEPType->isAggregateType() || GEP->getNumIndices() != 1) - return false; - - auto PtrGEP = dyn_cast(GEP->getPointerOperand()); - if (!PtrGEP) - return false; - Type *PtrGEPType = PtrGEP->getResultElementType(); - // TODO: support reordering for non-trivial GEP chains - if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1) - return false; - - // TODO: support reordering for non-trivial GEP chains - if (PtrGEPType != GEPType || - PtrGEP->getSourceElementType() != GEP->getSourceElementType()) - return false; - - bool NestedNeedsExtraction; - int64_t NestedByteOffset = - accumulateByteOffset(PtrGEP, NestedNeedsExtraction); - if (!NestedNeedsExtraction) - return false; - - unsigned AddrSpace = PtrGEP->getPointerAddressSpace(); - if (!TTI.isLegalAddressingMode(GEP->getResultElementType(), - /*BaseGV=*/nullptr, NestedByteOffset, - /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) - return false; - - IRBuilder<> Builder(GEP); - Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); - bool GEPInBounds = GEP->isInBounds(); - bool PtrGEPInBounds = PtrGEP->isInBounds(); - bool IsChainInBounds = GEPInBounds && PtrGEPInBounds; - if (IsChainInBounds) { - auto GEPIdx = GEP->indices().begin(); - auto KnownGEPIdx = computeKnownBits(GEPIdx->get(), *DL); - IsChainInBounds &= KnownGEPIdx.isNonNegative(); - if (IsChainInBounds) { - auto PtrGEPIdx = GEP->indices().begin(); - auto KnownPtrGEPIdx = computeKnownBits(PtrGEPIdx->get(), *DL); - IsChainInBounds &= KnownPtrGEPIdx.isNonNegative(); - } - } - - // For trivial GEP chains, we can swap the indicies. - auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(), - SmallVector(GEP->indices())); - cast(NewSrc)->setIsInBounds(IsChainInBounds); - auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, - SmallVector(PtrGEP->indices())); - cast(NewGEP)->setIsInBounds(IsChainInBounds); - GEP->replaceAllUsesWith(NewGEP); - RecursivelyDeleteTriviallyDeadInstructions(GEP); - return true; -} - bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Skip vector GEPs. if (GEP->getType()->isVectorTy()) @@ -1044,12 +979,10 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { bool NeedsExtraction; int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); - TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); - - if (!NeedsExtraction) { - Changed |= reorderGEP(GEP, TTI); + if (!NeedsExtraction) return Changed; - } + + TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); // If LowerGEP is disabled, before really splitting the GEP, check whether the // backend supports the addressing mode we are about to produce. If no, this diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 0bb5288f43efc8..138a6a86cee984 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -273,11 +273,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v58 ; CHECK-NEXT: s_branch .LBB0_7 -; CHECK-NEXT: .LBB0_16: ; %Flow45 +; CHECK-NEXT: .LBB0_16: ; %Flow43 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53 ; CHECK-NEXT: v_mov_b32_e32 v57, v0 -; CHECK-NEXT: .LBB0_17: ; %Flow46 +; CHECK-NEXT: .LBB0_17: ; %Flow44 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: s_mov_b32 s49, exec_lo @@ -323,11 +323,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v57 ; CHECK-NEXT: s_branch .LBB0_19 -; CHECK-NEXT: .LBB0_22: ; %Flow43 +; CHECK-NEXT: .LBB0_22: ; %Flow41 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 -; CHECK-NEXT: .LBB0_23: ; %Flow44 +; CHECK-NEXT: .LBB0_23: ; %Flow42 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s49 ; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1 @@ -340,7 +340,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_or_b32 s43, s4, s43 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s43 ; CHECK-NEXT: s_cbranch_execnz .LBB0_5 -; CHECK-NEXT: .LBB0_25: ; %Flow51 +; CHECK-NEXT: .LBB0_25: ; %Flow49 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s42 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 @@ -362,10 +362,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v41 ; CHECK-NEXT: s_cbranch_execz .LBB0_33 ; CHECK-NEXT: ; %bb.26: -; CHECK-NEXT: s_mov_b32 s42, 0 +; CHECK-NEXT: s_add_u32 s42, s44, 8 +; CHECK-NEXT: s_addc_u32 s43, s45, 0 +; CHECK-NEXT: s_mov_b32 s44, 0 ; CHECK-NEXT: s_branch .LBB0_28 ; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s43 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_add_u32 s8, s34, 40 @@ -381,12 +383,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7] ; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41 -; CHECK-NEXT: s_or_b32 s42, vcc_lo, s42 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s42 +; CHECK-NEXT: s_or_b32 s44, vcc_lo, s44 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44 ; CHECK-NEXT: s_cbranch_execz .LBB0_33 ; CHECK-NEXT: .LBB0_28: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41 -; CHECK-NEXT: s_mov_b32 s43, exec_lo +; CHECK-NEXT: s_mov_b32 s45, exec_lo ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0 @@ -395,15 +397,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62 ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72 -; CHECK-NEXT: v_add_co_u32 v2, s4, s44, v1 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s45, 0, s4 +; CHECK-NEXT: v_add_co_u32 v2, s4, s42, v1 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s43, 0, s4 ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 ; CHECK-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; CHECK-NEXT: s_clause 0x1 -; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:8 -; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:8 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_xor_b32_e32 v46, v9, v5 ; CHECK-NEXT: v_xor_b32_e32 v45, v8, v4 @@ -415,8 +417,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_cbranch_execz .LBB0_27 ; CHECK-NEXT: ; %bb.29: ; in Loop: Header=BB0_28 Depth=1 ; CHECK-NEXT: s_clause 0x1 -; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:24 -; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:24 +; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16 +; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v45 ; CHECK-NEXT: v_alignbit_b32 v1, v46, v45, 12 ; CHECK-NEXT: v_and_b32_e32 v2, 0xf0000, v45 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index b87439a9d6fae7..8c806e76bde6ec 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -31,188 +31,205 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10 ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8 ; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: KILL undef %125:sgpr_128 ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc - ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: KILL undef %74:sreg_64 + ; CHECK-NEXT: KILL undef %132:sgpr_128 ; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: KILL undef %89:sgpr_128 - ; CHECK-NEXT: KILL undef %118:sgpr_128 + ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc + ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc ; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4) - ; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1 - ; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1 - ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY6]], 64, implicit-def $scc + ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 0, 0 :: (invariant load (s128) from %ir.87, addrspace 4) + ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], undef %171:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %171:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY7]].sub0, 224, implicit-def $scc + ; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY7]].sub0, 576, implicit-def $scc + ; CHECK-NEXT: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], undef %171:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %301:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %312:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %356:sgpr_128, undef %357:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %367:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %367:sgpr_128, undef %368:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %378:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 0, 0 :: (invariant load (s128) from %ir.92, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 0, 0 :: (invariant load (s128) from %ir.97, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.104, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.109, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.114, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %351:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %373:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 0, 0 :: (invariant load (s128) from %ir.121, addrspace 4) + ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %383:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %394:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.126, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.127, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.132, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.137, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -313, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4) ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], 160, implicit-def $scc + ; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_25:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_24]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_25:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_26:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_24]], [[S_LSHL_B32_4]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_26:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ADD_U32_27:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 168, implicit-def $scc + ; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.147, addrspace 4) ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: undef [[S_ADD_U32_28:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_5]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_28:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_28]], 0, 0 :: (invariant load (s32) from %ir.269, align 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.154, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 0, 0 :: (invariant load (s128) from %ir.159, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 0, 0 :: (invariant load (s128) from %ir.167, addrspace 4) ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1 ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_18]], 0, 0 :: (invariant load (s128) from %ir.175, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_19]], 0, 0 :: (invariant load (s128) from %ir.180, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_29:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_6]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_29:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_29]], 0, 0 :: (invariant load (s64) from %ir.277, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_20]], 0, 0 :: (invariant load (s128) from %ir.202, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_21]], 0, 0 :: (invariant load (s128) from %ir.208, addrspace 4) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 0, 0 :: (invariant load (s128) from %ir.213, addrspace 4) ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 0, 0 :: (invariant load (s128) from %ir.218, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_30:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_7]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_30:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_30]], 0, 0 :: (invariant load (s64) from %ir.287, addrspace 4) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0 ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %469:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1 - ; CHECK-NEXT: KILL undef %469:sreg_64 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_25]], 0, 0 :: (invariant load (s128) from %ir.253, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %484:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: KILL [[S_ADD_U32_25]].sub0, [[S_ADD_U32_25]].sub1 + ; CHECK-NEXT: KILL undef %484:sreg_64 ; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3 ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_26]], 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_31:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_8]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_31:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_31]], 0, 0 :: (invariant load (s32) from %ir.298, align 8, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] @@ -224,22 +241,24 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.329, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.335, addrspace 4) + ; CHECK-NEXT: [[S_ADD_U32_32:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 96, implicit-def $scc + ; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_33:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_33:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_33]], 0, 0 :: (invariant load (s128) from %ir.316, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_34:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_34:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_34]], 0, 0 :: (invariant load (s128) from %ir.321, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_35:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_35:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_35]], 0, 0 :: (invariant load (s128) from %ir.326, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec @@ -351,13 +370,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec - ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %542:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %559:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec ; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec ; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 [[V_CNDMASK_B32_e64_]], undef %556:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) + ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 [[V_CNDMASK_B32_e64_]], undef %573:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/CodeGen/PowerPC/licm-remat.ll b/llvm/test/CodeGen/PowerPC/licm-remat.ll index cf3e3ac089a498..ffdaf5d2481e3b 100644 --- a/llvm/test/CodeGen/PowerPC/licm-remat.ll +++ b/llvm/test/CodeGen/PowerPC/licm-remat.ll @@ -21,7 +21,7 @@ define linkonce_odr void @ZN6snappyDecompressor_(ptr %this, ptr %writer) { ; CHECK-LABEL: ZN6snappyDecompressor_: ; CHECK: # %bb.0: # %entry ; CHECK: addis 4, 2, .L__ModuleStringPool@toc@ha -; CHECK: addi 26, 4, .L__ModuleStringPool@toc@l +; CHECK: addi 25, 4, .L__ModuleStringPool@toc@l ; CHECK: .LBB0_2: # %for.cond ; CHECK-NOT: addis {{[0-9]+}}, 2, .L__ModuleStringPool@toc@ha ; CHECK: bctrl diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll deleted file mode 100644 index c24bbd5f658f94..00000000000000 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll +++ /dev/null @@ -1,51 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -S -passes=separate-const-offset-from-gep < %s | FileCheck %s - -define void @inboundsPossiblyNegative(ptr %in.ptr, i32 %in.idx1) { -; CHECK-LABEL: define void @inboundsPossiblyNegative( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 -; CHECK-NEXT: ret void -; -entry: - %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 - %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1 - ret void -} - -define void @inboundsNonNegative(ptr %in.ptr, i32 %in.idx1) { -; CHECK-LABEL: define void @inboundsNonNegative( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i32 1 -; CHECK-NEXT: ret void -; -entry: - %in.idx1.nneg = and i32 %in.idx1, 2147483647 - %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 - %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1.nneg - ret void -} - -define void @inboundsNonchained(ptr %in.ptr, i32 %in.idx1) { -; CHECK-LABEL: define void @inboundsNonchained( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 -; CHECK-NEXT: ret void -; -entry: - %in.idx1.nneg = and i32 %in.idx1, 2147483647 - %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 - %idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg - ret void -} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll deleted file mode 100644 index 7137f0fb66fdb9..00000000000000 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll +++ /dev/null @@ -1,175 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --start-before=separate-const-offset-from-gep < %s | FileCheck %s - -define protected amdgpu_kernel void @sink_addr(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: sink_addr: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s3, s1, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s3 -; CHECK-NEXT: s_lshl_b32 s2, s2, 1 -; CHECK-NEXT: s_add_i32 s0, s0, s2 -; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v12 -; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512 -; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB0_2: ; %end -; CHECK-NEXT: s_add_i32 s1, s0, 0x200 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_add_i32 s2, s0, 0x400 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: s_add_i32 s3, s0, 0x600 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm -entry: - %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr half, ptr addrspace(3) %base, i32 256 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr half, ptr addrspace(3) %base, i32 512 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) - ret void -} - -define protected amdgpu_kernel void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { -; CHECK-LABEL: illegal_addr_mode: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_lshl_b32 s0, s5, 1 -; CHECK-NEXT: s_lshl_b32 s1, s6, 1 -; CHECK-NEXT: s_add_i32 s3, s4, s0 -; CHECK-NEXT: s_add_i32 s3, s3, s1 -; CHECK-NEXT: s_add_i32 s2, s3, 0x12a60 -; CHECK-NEXT: s_add_i32 s1, s3, 0x12c60 -; CHECK-NEXT: s_add_i32 s0, s3, 0x12ed8 -; CHECK-NEXT: s_cmp_lg_u32 s5, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB1_2 -; CHECK-NEXT: ; %bb.1: ; %bb.1 -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: v_mov_b32_e32 v4, s2 -; CHECK-NEXT: v_mov_b32_e32 v8, s1 -; CHECK-NEXT: v_mov_b32_e32 v12, s0 -; CHECK-NEXT: ds_read_b128 v[0:3], v0 -; CHECK-NEXT: ds_read_b128 v[4:7], v4 -; CHECK-NEXT: ds_read_b128 v[8:11], v8 -; CHECK-NEXT: ds_read_b128 v[12:15], v12 -; CHECK-NEXT: s_waitcnt lgkmcnt(3) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[0:3] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(2) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[4:7] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[8:11] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v[12:15] -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB1_2: ; %end -; CHECK-NEXT: v_mov_b32_e32 v0, s3 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s2 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s1 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ; use v0 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_endpgm -entry: - %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 - %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 - %const1 = getelementptr half, ptr addrspace(3) %base, i32 38192 - %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 - %const2 = getelementptr half, ptr addrspace(3) %base, i32 38448 - %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 - %const3 = getelementptr half, ptr addrspace(3) %base, i32 38764 - %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 - %cmp0 = icmp eq i32 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 - %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 - %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 - %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x half> %val0) - call void asm sideeffect "; use $0", "v"(<8 x half> %val1) - call void asm sideeffect "; use $0", "v"(<8 x half> %val2) - call void asm sideeffect "; use $0", "v"(<8 x half> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) - call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) - ret void -} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll deleted file mode 100644 index 516f395e061180..00000000000000 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll +++ /dev/null @@ -1,62 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -mtriple=nvptx64-nvidia-cuda -S -passes=separate-const-offset-from-gep < %s | FileCheck %s - -define protected amdgpu_kernel void @sink_addr(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { -; CHECK-LABEL: define protected amdgpu_kernel void @sink_addr( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 256 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 512 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 768 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 -; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] -; CHECK: bb.1: -; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 -; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 -; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 -; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; -entry: - %idx0 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 %in.idx1 - %const1 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 256 - %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 - %const2 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 512 - %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 - %const3 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 768 - %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 - %cmp0 = icmp eq i64 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x i64>, ptr %idx0, align 16 - %val1 = load <8 x i64>, ptr %idx1, align 16 - %val2 = load <8 x i64>, ptr %idx2, align 16 - %val3 = load <8 x i64>, ptr %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr %idx0) - call void asm sideeffect "; use $0", "v"(ptr %idx1) - call void asm sideeffect "; use $0", "v"(ptr %idx2) - call void asm sideeffect "; use $0", "v"(ptr %idx3) - ret void -} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll deleted file mode 100644 index a15f11a634db5d..00000000000000 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll +++ /dev/null @@ -1,188 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S -passes=separate-const-offset-from-gep < %s | FileCheck %s - -define void @illegal_addr_mode(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { -; CHECK-LABEL: define void @illegal_addr_mode( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i64, ptr [[BASE]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i64, ptr [[BASE]], i64 256 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i64, ptr [[BASE]], i64 512 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i64, ptr [[BASE]], i64 768 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 -; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] -; CHECK: bb.1: -; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 -; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 -; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 -; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; -entry: - %base = getelementptr i64, ptr %in.ptr, i64 %in.idx0 - %idx0 = getelementptr i64, ptr %base, i64 %in.idx1 - %const1 = getelementptr i64, ptr %base, i64 256 - %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 - %const2 = getelementptr i64, ptr %base, i64 512 - %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 - %const3 = getelementptr i64, ptr %base, i64 768 - %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 - %cmp0 = icmp eq i64 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x i64>, ptr %idx0, align 16 - %val1 = load <8 x i64>, ptr %idx1, align 16 - %val2 = load <8 x i64>, ptr %idx2, align 16 - %val3 = load <8 x i64>, ptr %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr %idx0) - call void asm sideeffect "; use $0", "v"(ptr %idx1) - call void asm sideeffect "; use $0", "v"(ptr %idx2) - call void asm sideeffect "; use $0", "v"(ptr %idx3) - ret void -} - - -define void @multi_index_reorder(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { -; CHECK-LABEL: define void @multi_index_reorder( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 256 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 512 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 768 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 -; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] -; CHECK: bb.1: -; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 -; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 -; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 -; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; -entry: - %idx0 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 %in.idx1 - %const1 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 256 - %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 - %const2 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 512 - %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 - %const3 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 768 - %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 - %cmp0 = icmp eq i64 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x i64>, ptr %idx0, align 16 - %val1 = load <8 x i64>, ptr %idx1, align 16 - %val2 = load <8 x i64>, ptr %idx2, align 16 - %val3 = load <8 x i64>, ptr %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr %idx0) - call void asm sideeffect "; use $0", "v"(ptr %idx1) - call void asm sideeffect "; use $0", "v"(ptr %idx2) - call void asm sideeffect "; use $0", "v"(ptr %idx3) - ret void -} - - -define void @different_type_reorder(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { -; CHECK-LABEL: define void @different_type_reorder( -; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX0]] -; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i64, ptr [[BASE]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i8, ptr [[BASE]], i64 256 -; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i8, ptr [[BASE]], i64 512 -; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i8, ptr [[BASE]], i64 768 -; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] -; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 -; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] -; CHECK: bb.1: -; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 -; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 -; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 -; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; -entry: - %base = getelementptr i64, ptr %in.ptr, i64 %in.idx0 - %idx0 = getelementptr i64, ptr %base, i64 %in.idx1 - %const1 = getelementptr i8, ptr %base, i64 256 - %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 - %const2 = getelementptr i8, ptr %base, i64 512 - %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 - %const3 = getelementptr i8, ptr %base, i64 768 - %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 - %cmp0 = icmp eq i64 %in.idx0, 0 - br i1 %cmp0, label %bb.1, label %end - -bb.1: - %val0 = load <8 x i64>, ptr %idx0, align 16 - %val1 = load <8 x i64>, ptr %idx1, align 16 - %val2 = load <8 x i64>, ptr %idx2, align 16 - %val3 = load <8 x i64>, ptr %idx3, align 16 - call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) - call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) - br label %end - -end: - call void asm sideeffect "; use $0", "v"(ptr %idx0) - call void asm sideeffect "; use $0", "v"(ptr %idx1) - call void asm sideeffect "; use $0", "v"(ptr %idx2) - call void asm sideeffect "; use $0", "v"(ptr %idx3) - ret void -} From 8c56e78ec531f0e2460213c20fff869b6b7add99 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 13 Feb 2024 16:38:56 -0500 Subject: [PATCH 061/240] [lldb-dap] Add support for data breakpoint. (#81541) This implements functionality to handle `DataBreakpointInfo` request and `SetDataBreakpoints` request. If variablesReference is 0 or not provided, interpret name as ${number of bytes}@${expression} to set data breakpoint at the given expression because the spec https://microsoft.github.io/debug-adapter-protocol/specification#Requests_DataBreakpointInfo doesn't say how the client could specify the number of bytes to watch. This is based on top of https://github.com/llvm/llvm-project/pull/80753. --- .../test/tools/lldb-dap/dap_server.py | 47 +++ .../tools/lldb-dap/databreakpoint/Makefile | 3 + .../TestDAP_setDataBreakpoints.py | 123 +++++++ .../tools/lldb-dap/databreakpoint/main.cpp | 17 + lldb/tools/lldb-dap/CMakeLists.txt | 1 + lldb/tools/lldb-dap/DAPForward.h | 2 + lldb/tools/lldb-dap/Watchpoint.cpp | 48 +++ lldb/tools/lldb-dap/Watchpoint.h | 34 ++ lldb/tools/lldb-dap/lldb-dap.cpp | 307 ++++++++++++++++-- .../gn/secondary/lldb/tools/lldb-dap/BUILD.gn | 1 + 10 files changed, 549 insertions(+), 34 deletions(-) create mode 100644 lldb/test/API/tools/lldb-dap/databreakpoint/Makefile create mode 100644 lldb/test/API/tools/lldb-dap/databreakpoint/TestDAP_setDataBreakpoints.py create mode 100644 lldb/test/API/tools/lldb-dap/databreakpoint/main.cpp create mode 100644 lldb/tools/lldb-dap/Watchpoint.cpp create mode 100644 lldb/tools/lldb-dap/Watchpoint.h diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index bb863bb8719176..27a76a652f4063 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -501,6 +501,18 @@ def get_local_variable_value(self, name, frameIndex=0, threadId=None): return variable["value"] return None + def get_local_variable_child(self, name, child_name, frameIndex=0, threadId=None): + local = self.get_local_variable(name, frameIndex, threadId) + if local["variablesReference"] == 0: + return None + children = self.request_variables(local["variablesReference"])["body"][ + "variables" + ] + for child in children: + if child["name"] == child_name: + return child + return None + def replay_packets(self, replay_file_path): f = open(replay_file_path, "r") mode = "invalid" @@ -895,6 +907,41 @@ def request_setFunctionBreakpoints(self, names, condition=None, hitCondition=Non } return self.send_recv(command_dict) + def request_dataBreakpointInfo( + self, variablesReference, name, frameIndex=0, threadId=None + ): + stackFrame = self.get_stackFrame(frameIndex=frameIndex, threadId=threadId) + if stackFrame is None: + return [] + args_dict = { + "variablesReference": variablesReference, + "name": name, + "frameId": stackFrame["id"], + } + command_dict = { + "command": "dataBreakpointInfo", + "type": "request", + "arguments": args_dict, + } + return self.send_recv(command_dict) + + def request_setDataBreakpoint(self, dataBreakpoints): + """dataBreakpoints is a list of dictionary with following fields: + { + dataId: (address in hex)/(size in bytes) + accessType: read/write/readWrite + [condition]: string + [hitCondition]: string + } + """ + args_dict = {"breakpoints": dataBreakpoints} + command_dict = { + "command": "setDataBreakpoints", + "type": "request", + "arguments": args_dict, + } + return self.send_recv(command_dict) + def request_compileUnits(self, moduleId): args_dict = {"moduleId": moduleId} command_dict = { diff --git a/lldb/test/API/tools/lldb-dap/databreakpoint/Makefile b/lldb/test/API/tools/lldb-dap/databreakpoint/Makefile new file mode 100644 index 00000000000000..99998b20bcb050 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/databreakpoint/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-dap/databreakpoint/TestDAP_setDataBreakpoints.py b/lldb/test/API/tools/lldb-dap/databreakpoint/TestDAP_setDataBreakpoints.py new file mode 100644 index 00000000000000..40ca6473649ea9 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/databreakpoint/TestDAP_setDataBreakpoints.py @@ -0,0 +1,123 @@ +""" +Test lldb-dap dataBreakpointInfo and setDataBreakpoints requests +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +import lldbdap_testcase + + +class TestDAP_setDataBreakpoints(lldbdap_testcase.DAPTestCaseBase): + def setUp(self): + lldbdap_testcase.DAPTestCaseBase.setUp(self) + self.accessTypes = ["read", "write", "readWrite"] + + @skipIfWindows + @skipIfRemote + def test_expression(self): + """Tests setting data breakpoints on expression.""" + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = "main.cpp" + first_loop_break_line = line_number(source, "// first loop breakpoint") + self.set_source_breakpoints(source, [first_loop_break_line]) + self.continue_to_next_stop() + self.dap_server.get_stackFrame() + # Test setting write watchpoint using expressions: &x, arr+2 + response_x = self.dap_server.request_dataBreakpointInfo(0, "4@&x") + response_arr_2 = self.dap_server.request_dataBreakpointInfo(0, "1@arr+2") + # Test response from dataBreakpointInfo request. + self.assertEquals(response_x["body"]["dataId"].split("/")[1], "4") + self.assertEquals(response_x["body"]["accessTypes"], self.accessTypes) + self.assertEquals(response_arr_2["body"]["dataId"].split("/")[1], "1") + self.assertEquals(response_arr_2["body"]["accessTypes"], self.accessTypes) + dataBreakpoints = [ + {"dataId": response_x["body"]["dataId"], "accessType": "write"}, + {"dataId": response_arr_2["body"]["dataId"], "accessType": "write"}, + ] + self.dap_server.request_setDataBreakpoint(dataBreakpoints) + + self.dap_server.request_continue() + self.dap_server.wait_for_stopped() + x_val = self.dap_server.get_local_variable_value("x") + i_val = self.dap_server.get_local_variable_value("i") + self.assertEquals(x_val, "2") + self.assertEquals(i_val, "1") + + self.dap_server.request_continue() + self.dap_server.wait_for_stopped() + arr_2 = self.dap_server.get_local_variable_child("arr", "[2]") + i_val = self.dap_server.get_local_variable_value("i") + self.assertEquals(arr_2["value"], "'z'") + self.assertEquals(i_val, "2") + + @skipIfWindows + @skipIfRemote + def test_functionality(self): + """Tests setting data breakpoints on variable.""" + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = "main.cpp" + first_loop_break_line = line_number(source, "// first loop breakpoint") + self.set_source_breakpoints(source, [first_loop_break_line]) + self.continue_to_next_stop() + self.dap_server.get_local_variables() + # Test write watchpoints on x, arr[2] + response_x = self.dap_server.request_dataBreakpointInfo(1, "x") + arr = self.dap_server.get_local_variable("arr") + response_arr_2 = self.dap_server.request_dataBreakpointInfo( + arr["variablesReference"], "[2]" + ) + + # Test response from dataBreakpointInfo request. + self.assertEquals(response_x["body"]["dataId"].split("/")[1], "4") + self.assertEquals(response_x["body"]["accessTypes"], self.accessTypes) + self.assertEquals(response_arr_2["body"]["dataId"].split("/")[1], "1") + self.assertEquals(response_arr_2["body"]["accessTypes"], self.accessTypes) + dataBreakpoints = [ + {"dataId": response_x["body"]["dataId"], "accessType": "write"}, + {"dataId": response_arr_2["body"]["dataId"], "accessType": "write"}, + ] + self.dap_server.request_setDataBreakpoint(dataBreakpoints) + + self.continue_to_next_stop() + x_val = self.dap_server.get_local_variable_value("x") + i_val = self.dap_server.get_local_variable_value("i") + self.assertEquals(x_val, "2") + self.assertEquals(i_val, "1") + + self.continue_to_next_stop() + arr_2 = self.dap_server.get_local_variable_child("arr", "[2]") + i_val = self.dap_server.get_local_variable_value("i") + self.assertEquals(arr_2["value"], "'z'") + self.assertEquals(i_val, "2") + self.dap_server.request_setDataBreakpoint([]) + + # Test hit condition + second_loop_break_line = line_number(source, "// second loop breakpoint") + breakpoint_ids = self.set_source_breakpoints(source, [second_loop_break_line]) + self.continue_to_breakpoints(breakpoint_ids) + dataBreakpoints = [ + { + "dataId": response_x["body"]["dataId"], + "accessType": "write", + "hitCondition": "2", + } + ] + self.dap_server.request_setDataBreakpoint(dataBreakpoints) + self.continue_to_next_stop() + x_val = self.dap_server.get_local_variable_value("x") + self.assertEquals(x_val, "3") + + # Test condition + dataBreakpoints = [ + { + "dataId": response_x["body"]["dataId"], + "accessType": "write", + "condition": "x==10", + } + ] + self.dap_server.request_setDataBreakpoint(dataBreakpoints) + self.continue_to_next_stop() + x_val = self.dap_server.get_local_variable_value("x") + self.assertEquals(x_val, "10") diff --git a/lldb/test/API/tools/lldb-dap/databreakpoint/main.cpp b/lldb/test/API/tools/lldb-dap/databreakpoint/main.cpp new file mode 100644 index 00000000000000..8082fe02f3e534 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/databreakpoint/main.cpp @@ -0,0 +1,17 @@ +int main(int argc, char const *argv[]) { + // Test for data breakpoint + int x = 0; + char arr[4] = {'a', 'b', 'c', 'd'}; + for (int i = 0; i < 5; ++i) { // first loop breakpoint + if (i == 1) { + x = i + 1; + } else if (i == 2) { + arr[i] = 'z'; + } + } + + x = 1; + for (int i = 0; i < 10; ++i) { // second loop breakpoint + ++x; + } +} diff --git a/lldb/tools/lldb-dap/CMakeLists.txt b/lldb/tools/lldb-dap/CMakeLists.txt index f8c0e4ecf36c2f..f8f0d86453f585 100644 --- a/lldb/tools/lldb-dap/CMakeLists.txt +++ b/lldb/tools/lldb-dap/CMakeLists.txt @@ -37,6 +37,7 @@ add_lldb_tool(lldb-dap RunInTerminal.cpp SourceBreakpoint.cpp DAP.cpp + Watchpoint.cpp LINK_LIBS liblldb diff --git a/lldb/tools/lldb-dap/DAPForward.h b/lldb/tools/lldb-dap/DAPForward.h index fffff1e3f79020..8c79488fae8dbf 100644 --- a/lldb/tools/lldb-dap/DAPForward.h +++ b/lldb/tools/lldb-dap/DAPForward.h @@ -14,6 +14,7 @@ struct BreakpointBase; struct ExceptionBreakpoint; struct FunctionBreakpoint; struct SourceBreakpoint; +struct Watchpoint; } // namespace lldb_dap namespace lldb { @@ -39,6 +40,7 @@ class SBStringList; class SBTarget; class SBThread; class SBValue; +class SBWatchpoint; } // namespace lldb #endif diff --git a/lldb/tools/lldb-dap/Watchpoint.cpp b/lldb/tools/lldb-dap/Watchpoint.cpp new file mode 100644 index 00000000000000..2f176e0da84f15 --- /dev/null +++ b/lldb/tools/lldb-dap/Watchpoint.cpp @@ -0,0 +1,48 @@ +//===-- Watchpoint.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Watchpoint.h" +#include "DAP.h" +#include "JSONUtils.h" +#include "llvm/ADT/StringExtras.h" + +namespace lldb_dap { +Watchpoint::Watchpoint(const llvm::json::Object &obj) : BreakpointBase(obj) { + llvm::StringRef dataId = GetString(obj, "dataId"); + std::string accessType = GetString(obj, "accessType").str(); + auto [addr_str, size_str] = dataId.split('/'); + lldb::addr_t addr; + size_t size; + llvm::to_integer(addr_str, addr, 16); + llvm::to_integer(size_str, size); + lldb::SBWatchpointOptions options; + options.SetWatchpointTypeRead(accessType != "write"); + if (accessType != "read") + options.SetWatchpointTypeWrite(lldb::eWatchpointWriteTypeOnModify); + wp = g_dap.target.WatchpointCreateByAddress(addr, size, options, error); + SetCondition(); + SetHitCondition(); +} + +void Watchpoint::SetCondition() { wp.SetCondition(condition.c_str()); } + +void Watchpoint::SetHitCondition() { + uint64_t hitCount = 0; + if (llvm::to_integer(hitCondition, hitCount)) + wp.SetIgnoreCount(hitCount - 1); +} + +void Watchpoint::CreateJsonObject(llvm::json::Object &object) { + if (error.Success()) { + object.try_emplace("verified", true); + } else { + object.try_emplace("verified", false); + EmplaceSafeString(object, "message", error.GetCString()); + } +} +} // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/Watchpoint.h b/lldb/tools/lldb-dap/Watchpoint.h new file mode 100644 index 00000000000000..026b07d67241ce --- /dev/null +++ b/lldb/tools/lldb-dap/Watchpoint.h @@ -0,0 +1,34 @@ +//===-- Watchpoint.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_TOOLS_LLDB_DAP_WATCHPOINT_H +#define LLDB_TOOLS_LLDB_DAP_WATCHPOINT_H + +#include "BreakpointBase.h" +#include "lldb/API/SBError.h" +#include "lldb/API/SBWatchpoint.h" +#include "lldb/API/SBWatchpointOptions.h" + +namespace lldb_dap { + +struct Watchpoint : public BreakpointBase { + // The LLDB breakpoint associated wit this watchpoint. + lldb::SBWatchpoint wp; + lldb::SBError error; + + Watchpoint() = default; + Watchpoint(const llvm::json::Object &obj); + Watchpoint(lldb::SBWatchpoint wp) : wp(wp) {} + + void SetCondition() override; + void SetHitCondition() override; + void CreateJsonObject(llvm::json::Object &object) override; +}; +} // namespace lldb_dap + +#endif diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 67022347e6d624..6bf2ec28432cd3 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "DAP.h" +#include "Watchpoint.h" #include #include @@ -560,6 +561,46 @@ void EventThreadFunction() { } } +lldb::SBValue FindVariable(uint64_t variablesReference, llvm::StringRef name) { + lldb::SBValue variable; + if (lldb::SBValueList *top_scope = GetTopLevelScope(variablesReference)) { + bool is_duplicated_variable_name = name.contains(" @"); + // variablesReference is one of our scopes, not an actual variable it is + // asking for a variable in locals or globals or registers + int64_t end_idx = top_scope->GetSize(); + // Searching backward so that we choose the variable in closest scope + // among variables of the same name. + for (int64_t i = end_idx - 1; i >= 0; --i) { + lldb::SBValue curr_variable = top_scope->GetValueAtIndex(i); + std::string variable_name = CreateUniqueVariableNameForDisplay( + curr_variable, is_duplicated_variable_name); + if (variable_name == name) { + variable = curr_variable; + break; + } + } + } else { + // This is not under the globals or locals scope, so there are no duplicated + // names. + + // We have a named item within an actual variable so we need to find it + // withing the container variable by name. + lldb::SBValue container = g_dap.variables.GetVariable(variablesReference); + variable = container.GetChildMemberWithName(name.data()); + if (!variable.IsValid()) { + if (name.starts_with("[")) { + llvm::StringRef index_str(name.drop_front(1)); + uint64_t index = 0; + if (!index_str.consumeInteger(0, index)) { + if (index_str == "]") + variable = container.GetChildAtIndex(index); + } + } + } + } + return variable; +} + // Both attach and launch take a either a sourcePath or sourceMap // argument (or neither), from which we need to set the target.source-map. void SetSourceMapFromArguments(const llvm::json::Object &arguments) { @@ -1647,6 +1688,8 @@ void request_initialize(const llvm::json::Object &request) { body.try_emplace("supportsProgressReporting", true); // The debug adapter supports 'logMessage' in breakpoint. body.try_emplace("supportsLogPoints", true); + // The debug adapter supports data watchpoints. + body.try_emplace("supportsDataBreakpoints", true); response.try_emplace("body", std::move(body)); g_dap.SendJSON(llvm::json::Value(std::move(response))); @@ -2591,6 +2634,231 @@ void request_setFunctionBreakpoints(const llvm::json::Object &request) { g_dap.SendJSON(llvm::json::Value(std::move(response))); } +// "DataBreakpointInfoRequest": { +// "allOf": [ { "$ref": "#/definitions/Request" }, { +// "type": "object", +// "description": "Obtains information on a possible data breakpoint that +// could be set on an expression or variable.\nClients should only call this +// request if the corresponding capability `supportsDataBreakpoints` is +// true.", "properties": { +// "command": { +// "type": "string", +// "enum": [ "dataBreakpointInfo" ] +// }, +// "arguments": { +// "$ref": "#/definitions/DataBreakpointInfoArguments" +// } +// }, +// "required": [ "command", "arguments" ] +// }] +// }, +// "DataBreakpointInfoArguments": { +// "type": "object", +// "description": "Arguments for `dataBreakpointInfo` request.", +// "properties": { +// "variablesReference": { +// "type": "integer", +// "description": "Reference to the variable container if the data +// breakpoint is requested for a child of the container. The +// `variablesReference` must have been obtained in the current suspended +// state. See 'Lifetime of Object References' in the Overview section for +// details." +// }, +// "name": { +// "type": "string", +// "description": "The name of the variable's child to obtain data +// breakpoint information for.\nIf `variablesReference` isn't specified, +// this can be an expression." +// }, +// "frameId": { +// "type": "integer", +// "description": "When `name` is an expression, evaluate it in the scope +// of this stack frame. If not specified, the expression is evaluated in +// the global scope. When `variablesReference` is specified, this property +// has no effect." +// } +// }, +// "required": [ "name" ] +// }, +// "DataBreakpointInfoResponse": { +// "allOf": [ { "$ref": "#/definitions/Response" }, { +// "type": "object", +// "description": "Response to `dataBreakpointInfo` request.", +// "properties": { +// "body": { +// "type": "object", +// "properties": { +// "dataId": { +// "type": [ "string", "null" ], +// "description": "An identifier for the data on which a data +// breakpoint can be registered with the `setDataBreakpoints` +// request or null if no data breakpoint is available. If a +// `variablesReference` or `frameId` is passed, the `dataId` is +// valid in the current suspended state, otherwise it's valid +// indefinitely. See 'Lifetime of Object References' in the Overview +// section for details. Breakpoints set using the `dataId` in the +// `setDataBreakpoints` request may outlive the lifetime of the +// associated `dataId`." +// }, +// "description": { +// "type": "string", +// "description": "UI string that describes on what data the +// breakpoint is set on or why a data breakpoint is not available." +// }, +// "accessTypes": { +// "type": "array", +// "items": { +// "$ref": "#/definitions/DataBreakpointAccessType" +// }, +// "description": "Attribute lists the available access types for a +// potential data breakpoint. A UI client could surface this +// information." +// }, +// "canPersist": { +// "type": "boolean", +// "description": "Attribute indicates that a potential data +// breakpoint could be persisted across sessions." +// } +// }, +// "required": [ "dataId", "description" ] +// } +// }, +// "required": [ "body" ] +// }] +// } +void request_dataBreakpointInfo(const llvm::json::Object &request) { + llvm::json::Object response; + FillResponse(request, response); + llvm::json::Object body; + lldb::SBError error; + llvm::json::Array accessTypes{"read", "write", "readWrite"}; + const auto *arguments = request.getObject("arguments"); + const auto variablesReference = + GetUnsigned(arguments, "variablesReference", 0); + llvm::StringRef name = GetString(arguments, "name"); + lldb::SBFrame frame = g_dap.GetLLDBFrame(*arguments); + lldb::SBValue variable = FindVariable(variablesReference, name); + std::string addr, size; + + if (variable.IsValid()) { + addr = llvm::utohexstr(variable.GetLoadAddress()); + size = llvm::utostr(variable.GetByteSize()); + } else if (variablesReference == 0 && frame.IsValid()) { + // Name might be an expression. In this case we assume that name is composed + // of the number of bytes to watch and expression, separated by '@': + // "${size}@${expression}" + llvm::StringRef expr; + std::tie(size, expr) = name.split('@'); + lldb::SBValue value = frame.EvaluateExpression(expr.data()); + if (value.GetError().Fail()) { + lldb::SBError error = value.GetError(); + const char *error_cstr = error.GetCString(); + body.try_emplace("dataId", nullptr); + body.try_emplace("description", error_cstr && error_cstr[0] + ? std::string(error_cstr) + : "evaluation failed"); + } else + addr = llvm::utohexstr(value.GetValueAsUnsigned()); + } else { + auto state = g_dap.target.GetProcess().GetState(); + body.try_emplace("dataId", nullptr); + body.try_emplace("description", + "variable not found: " + llvm::utostr(state)); + } + + if (!body.getObject("dataId")) { + body.try_emplace("dataId", addr + "/" + size); + body.try_emplace("accessTypes", std::move(accessTypes)); + body.try_emplace("description", + size + " bytes at " + addr + " " + name.str()); + } + response.try_emplace("body", std::move(body)); + g_dap.SendJSON(llvm::json::Value(std::move(response))); +} + +// "SetDataBreakpointsRequest": { +// "allOf": [ { "$ref": "#/definitions/Request" }, { +// "type": "object", +// "description": "Replaces all existing data breakpoints with new data +// breakpoints.\nTo clear all data breakpoints, specify an empty +// array.\nWhen a data breakpoint is hit, a `stopped` event (with reason +// `data breakpoint`) is generated.\nClients should only call this request +// if the corresponding capability `supportsDataBreakpoints` is true.", +// "properties": { +// "command": { +// "type": "string", +// "enum": [ "setDataBreakpoints" ] +// }, +// "arguments": { +// "$ref": "#/definitions/SetDataBreakpointsArguments" +// } +// }, +// "required": [ "command", "arguments" ] +// }] +// }, +// "SetDataBreakpointsArguments": { +// "type": "object", +// "description": "Arguments for `setDataBreakpoints` request.", +// "properties": { +// "breakpoints": { +// "type": "array", +// "items": { +// "$ref": "#/definitions/DataBreakpoint" +// }, +// "description": "The contents of this array replaces all existing data +// breakpoints. An empty array clears all data breakpoints." +// } +// }, +// "required": [ "breakpoints" ] +// }, +// "SetDataBreakpointsResponse": { +// "allOf": [ { "$ref": "#/definitions/Response" }, { +// "type": "object", +// "description": "Response to `setDataBreakpoints` request.\nReturned is +// information about each breakpoint created by this request.", +// "properties": { +// "body": { +// "type": "object", +// "properties": { +// "breakpoints": { +// "type": "array", +// "items": { +// "$ref": "#/definitions/Breakpoint" +// }, +// "description": "Information about the data breakpoints. The array +// elements correspond to the elements of the input argument +// `breakpoints` array." +// } +// }, +// "required": [ "breakpoints" ] +// } +// }, +// "required": [ "body" ] +// }] +// } +void request_setDataBreakpoints(const llvm::json::Object &request) { + llvm::json::Object response; + lldb::SBError error; + FillResponse(request, response); + const auto *arguments = request.getObject("arguments"); + const auto *breakpoints = arguments->getArray("breakpoints"); + llvm::json::Array response_breakpoints; + g_dap.target.DeleteAllWatchpoints(); + if (breakpoints) { + for (const auto &bp : *breakpoints) { + const auto *bp_obj = bp.getAsObject(); + if (bp_obj) { + Watchpoint wp(*bp_obj); + AppendBreakpoint(&wp, response_breakpoints); + } + } + } + llvm::json::Object body; + body.try_emplace("breakpoints", std::move(response_breakpoints)); + response.try_emplace("body", std::move(body)); + g_dap.SendJSON(llvm::json::Value(std::move(response))); +} + // "SourceRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -3074,7 +3342,6 @@ void request_setVariable(const llvm::json::Object &request) { const auto variablesReference = GetUnsigned(arguments, "variablesReference", 0); llvm::StringRef name = GetString(arguments, "name"); - bool is_duplicated_variable_name = name.contains(" @"); const auto value = GetString(arguments, "value"); // Set success to false just in case we don't find the variable by name @@ -3095,40 +3362,8 @@ void request_setVariable(const llvm::json::Object &request) { const auto id_value = GetUnsigned(arguments, "id", UINT64_MAX); if (id_value != UINT64_MAX) { variable = g_dap.variables.GetVariable(id_value); - } else if (lldb::SBValueList *top_scope = - GetTopLevelScope(variablesReference)) { - // variablesReference is one of our scopes, not an actual variable it is - // asking for a variable in locals or globals or registers - int64_t end_idx = top_scope->GetSize(); - // Searching backward so that we choose the variable in closest scope - // among variables of the same name. - for (int64_t i = end_idx - 1; i >= 0; --i) { - lldb::SBValue curr_variable = top_scope->GetValueAtIndex(i); - std::string variable_name = CreateUniqueVariableNameForDisplay( - curr_variable, is_duplicated_variable_name); - if (variable_name == name) { - variable = curr_variable; - break; - } - } } else { - // This is not under the globals or locals scope, so there are no duplicated - // names. - - // We have a named item within an actual variable so we need to find it - // withing the container variable by name. - lldb::SBValue container = g_dap.variables.GetVariable(variablesReference); - variable = container.GetChildMemberWithName(name.data()); - if (!variable.IsValid()) { - if (name.starts_with("[")) { - llvm::StringRef index_str(name.drop_front(1)); - uint64_t index = 0; - if (!index_str.consumeInteger(0, index)) { - if (index_str == "]") - variable = container.GetChildAtIndex(index); - } - } - } + variable = FindVariable(variablesReference, name); } if (variable.IsValid()) { @@ -3611,6 +3846,10 @@ void RegisterRequestCallbacks() { request_setExceptionBreakpoints); g_dap.RegisterRequestCallback("setFunctionBreakpoints", request_setFunctionBreakpoints); + g_dap.RegisterRequestCallback("dataBreakpointInfo", + request_dataBreakpointInfo); + g_dap.RegisterRequestCallback("setDataBreakpoints", + request_setDataBreakpoints); g_dap.RegisterRequestCallback("setVariable", request_setVariable); g_dap.RegisterRequestCallback("source", request_source); g_dap.RegisterRequestCallback("stackTrace", request_stackTrace); diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn index 98c2068f6da291..dc958934485ec1 100644 --- a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn @@ -52,5 +52,6 @@ executable("lldb-dap") { "RunInTerminal.cpp", "SourceBreakpoint.cpp", "lldb-dap.cpp", + "Watchpoint.cpp" ] } From 473ef10b0fc93eeb2cbb3b2cf2f1b748eac6ddd9 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Tue, 13 Feb 2024 13:43:21 -0800 Subject: [PATCH 062/240] [WebAssembly] Demote PHIs in catchswitch BB only (#81570) `DemoteCatchSwitchPHIOnly` option in `WinEHPrepare` pass was added in https://github.com/llvm/llvm-project/commit/99d60e0dabcf20f4db683da83cde905b7a1373de, because Wasm EH uses `WinEHPrepare`, but it doesn't need to demote all PHIs. PHIs in `catchswitch` BBs have to be removed (= demoted) because `catchswitch`s are removed in ISel and `catchswitch` BBs are removed as well, so they can't have other instructions. But because Wasm EH doesn't use funclets, so PHIs in `catchpad` or `cleanuppad` BBs don't need to be demoted. That was the reason `DemoteCatchSwitchPHIOnly` option was added, in order not to demote more instructions unnecessarily. The problem is it should have been set to `true` for Wasm EH. (Its default value is `false` for WinEH) And I mistakenly set it to `false` and wasn't aware about this for more than 5 years. This was not the end of the world; it just means we've been demoting more instructions than we should, possibly huting code size. In practice I think it would've had hardly any effect in real performance given that the occurrence of PHIs in `catchpad` or `cleanuppad` BBs are not very frequent and many people run other optimizers like Binaryen anyway. --- llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 3 ++- llvm/lib/CodeGen/TargetPassConfig.cpp | 2 +- llvm/test/CodeGen/WebAssembly/wasm-eh-prepare.ll | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 6cf54085915230..4172fbc96d1e5d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -249,7 +249,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, "WinEHPrepare failed to remove PHIs from imaginary BBs"); continue; } - if (isa(PadInst)) + if (isa(PadInst) && + Personality != EHPersonality::Wasm_CXX) assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs"); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index e82f14e878141a..2ed39a5696e205 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -918,7 +918,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // on catchpads and cleanuppads because it does not outline them into // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we // should remove PHIs there. - addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); + addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/true)); addPass(createWasmEHPass()); break; case ExceptionHandling::None: diff --git a/llvm/test/CodeGen/WebAssembly/wasm-eh-prepare.ll b/llvm/test/CodeGen/WebAssembly/wasm-eh-prepare.ll index bd577e387c72b7..164c138cb7578e 100644 --- a/llvm/test/CodeGen/WebAssembly/wasm-eh-prepare.ll +++ b/llvm/test/CodeGen/WebAssembly/wasm-eh-prepare.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -win-eh-prepare -demote-catchswitch-only -wasm-eh-prepare -S --mattr=+atomics,+bulk-memory | FileCheck %s ; RUN: opt < %s -passes='win-eh-prepare,wasm-eh-prepare' -S | FileCheck %s ; RUN: opt < %s -passes='win-eh-prepare,wasm-eh-prepare' -S --mattr=+atomics,+bulk-memory | FileCheck %s +; RUN: llc < %s -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -stop-after=wasm-eh-prepare | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" @@ -245,7 +246,6 @@ bb.true: ; preds = %entry bb.true.0: ; preds = %bb.true br label %merge -; CHECK: bb.false bb.false: ; preds = %entry br label %merge From fd3a0c185f177351207783fc2a604dac086cdaf7 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 13 Feb 2024 21:41:28 +0000 Subject: [PATCH 063/240] Revert "Reapply "[DebugInfo][RemoveDIs] Turn on non-instrinsic debug-info by default"" This reverts commit d759618df76361a8e490eeae5c5399e0738cbfd0. Causes crashes, see comments in https://github.com/llvm/llvm-project/commit/d759618df76361a8e490eeae5c5399e0738cbfd0. --- llvm/lib/IR/BasicBlock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index bf02eba9fb448d..fe9d0d08c5fe97 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -34,7 +34,7 @@ cl::opt UseNewDbgInfoFormat("experimental-debuginfo-iterators", cl::desc("Enable communicating debuginfo positions " "through iterators, eliminating intrinsics"), - cl::init(true)); + cl::init(false)); DPMarker *BasicBlock::createMarker(Instruction *I) { assert(IsNewDbgInfoFormat && From 84277fe90f98edfa8fb7b7f06ab430610ef85248 Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Tue, 13 Feb 2024 16:48:14 -0500 Subject: [PATCH 064/240] [libc][stdfix] Generate stdfix.h header with fixed point precision macros according to ISO/IEC TR 18037:2008 standard, and add fixed point type support detection. (#81255) Fixed point extension standard: https://standards.iso.org/ittf/PubliclyAvailableStandards/c051126_ISO_IEC_TR_18037_2008.zip --- .../cmake/modules/CheckCompilerFeatures.cmake | 12 +- .../compiler_features/check_fixed_point.cpp | 5 + libc/config/linux/api.td | 1 + libc/config/linux/x86_64/headers.txt | 1 + libc/docs/math/index.rst | 6 + libc/docs/math/stdfix.rst | 136 ++++++++ libc/include/CMakeLists.txt | 8 + libc/include/llvm-libc-macros/CMakeLists.txt | 6 + libc/include/llvm-libc-macros/stdfix-macros.h | 330 ++++++++++++++++++ libc/include/stdfix.h.def | 21 ++ libc/spec/stdc_ext.td | 16 + 11 files changed, 539 insertions(+), 3 deletions(-) create mode 100644 libc/cmake/modules/compiler_features/check_fixed_point.cpp create mode 100644 libc/docs/math/stdfix.rst create mode 100644 libc/include/llvm-libc-macros/stdfix-macros.h create mode 100644 libc/include/stdfix.h.def create mode 100644 libc/spec/stdc_ext.td diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake index 983ce86ab1b253..9789d72f99dc4c 100644 --- a/libc/cmake/modules/CheckCompilerFeatures.cmake +++ b/libc/cmake/modules/CheckCompilerFeatures.cmake @@ -2,8 +2,7 @@ # Compiler features definition and flags # ------------------------------------------------------------------------------ -# Initialize ALL_COMPILER_FEATURES as empty list. -set(ALL_COMPILER_FEATURES "float128") +set(ALL_COMPILER_FEATURES "float128" "fixed_point") # Making sure ALL_COMPILER_FEATURES is sorted. list(SORT ALL_COMPILER_FEATURES) @@ -42,16 +41,23 @@ set(AVAILABLE_COMPILER_FEATURES "") # Try compile a C file to check if flag is supported. set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) foreach(feature IN LISTS ALL_COMPILER_FEATURES) + set(compile_options ${LIBC_COMPILE_OPTIONS_NATIVE}) + if(${feature} STREQUAL "fixed_point") + list(APPEND compile_options "-ffixed-point") + endif() + try_compile( has_feature ${CMAKE_CURRENT_BINARY_DIR}/compiler_features SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp - COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE} + COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options} ) if(has_feature) list(APPEND AVAILABLE_COMPILER_FEATURES ${feature}) if(${feature} STREQUAL "float128") set(LIBC_COMPILER_HAS_FLOAT128 TRUE) + elseif(${feature} STREQUAL "fixed_point") + set(LIBC_COMPILER_HAS_FIXED_POINT TRUE) endif() endif() endforeach() diff --git a/libc/cmake/modules/compiler_features/check_fixed_point.cpp b/libc/cmake/modules/compiler_features/check_fixed_point.cpp new file mode 100644 index 00000000000000..02932dbf4d722d --- /dev/null +++ b/libc/cmake/modules/compiler_features/check_fixed_point.cpp @@ -0,0 +1,5 @@ +#include "include/llvm-libc-macross/stdfix_macros.h" + +#ifndef LIBC_COMPILER_HAS_FIXED_POINT +#error unsupported +#endif diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index c1f052e1bfa3ce..5a1d7642f1aebe 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -6,6 +6,7 @@ include "spec/linux.td" include "spec/gnu_ext.td" include "spec/bsd_ext.td" include "spec/llvm_libc_ext.td" +include "spec/stdc_ext.td" def AssertMacro : MacroDef<"assert"> { let Defn = [{ diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt index 8f37cf9f30f8d0..d0c662c2bc072e 100644 --- a/libc/config/linux/x86_64/headers.txt +++ b/libc/config/linux/x86_64/headers.txt @@ -16,6 +16,7 @@ set(TARGET_PUBLIC_HEADERS libc.include.spawn libc.include.setjmp libc.include.stdbit + libc.include.stdfix libc.include.stdio libc.include.stdlib libc.include.string diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 94604491a73ecb..bd2af656d9eecd 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -16,6 +16,7 @@ Math Functions :hidden: log.rst + stdfix.rst .. contents:: Table of Contents @@ -611,6 +612,11 @@ Algorithms + Implementation Details * :doc:`log` +Fixed-point Arithmetics +======================= + +* :doc:`stdfix` + References ========== diff --git a/libc/docs/math/stdfix.rst b/libc/docs/math/stdfix.rst new file mode 100644 index 00000000000000..85d721b358f5ec --- /dev/null +++ b/libc/docs/math/stdfix.rst @@ -0,0 +1,136 @@ +================ +StdFix Functions +================ + +.. include:: ../check.rst + +Standards +--------- + +- stdfix.h is specified in the `ISO/IEC TR 18037:2008 `_, + C extensions to support embedded processors . + +- Its `specifications `_. + +--------------- +Source location +--------------- + +- The main source for fixed-point functions is located at: + ``libc/src/stdfix`` with subdirectories for internal implementations. + +--------------------- +Implementation Status +--------------------- + +Requirements +============ + +- In order to build LLVM libc to support fixed-point arithmetics, we need the + compiler to support the basic fixed-point types `_Fract` and `_Accum` in + C++. + +- For the users to be able to use the generated headers, their compiler needs + to support `_Fract` and `_Accum` types in C or C++. + +- This compiler support is checked at the beginning of + `libc/include/llvm-libc-macros/stdfix-macros.h `_. + + + +Predefined Macros +================= + +- We use the macro `LIBC_COMPILER_HAS_FIXED_POINT` to specify whether the + compiler support the fixed-point types. + +- Other predefined precision macros specified in section 7.18a.3 are defined + in `libc/include/llvm-libc-macros/stdfix-macros.h `_ + using the default configuration of `typical desktop processor` in section + A.3. + + +Fixed-point Arithmetics +======================= + ++---------------+------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------+ +| Function Name | _Fract (r) | _Accum (k) | +| +------------------------------+----------------------------+------------------------------+------------------------------+----------------------------+------------------------------+ +| | short (hr) | _ (r) | long (lr) | short (hk) | _ (k) | long (lk) | +| +----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| | unsigned (uhr) | signed (hr) | unsigned (ur) | signed (r) | unsigned (ulr) | signed (lr) | unsigned (uhk) | signed (hk) | unsigned (uk) | signed (k) | unsigned (ulk) | signed (lk) | ++===============+================+=============+===============+============+================+=============+================+=============+===============+============+================+=============+ +| abs | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| bits\* | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| \*bits | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| countls | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| divi | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| idivi | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| muli | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| rdivi | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| round | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| sqrt | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ + +================== ========= +Type Generic Macro Available +================== ========= +absfx +countlsfx +roundfx +================== ========= + + +Higher math functions +===================== + ++---------------+------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------+ +| Function Name | _Fract (r) | _Accum (k) | +| +------------------------------+----------------------------+------------------------------+------------------------------+----------------------------+------------------------------+ +| | short (hr) | _ (r) | long (lr) | short (hk) | _ (k) | long (lk) | +| +----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| | unsigned (uhr) | signed (hr) | unsigned (ur) | signed (r) | unsigned (ulr) | signed (lr) | unsigned (uhk) | signed (hk) | unsigned (uk) | signed (k) | unsigned (ulk) | signed (lk) | ++===============+================+=============+===============+============+================+=============+================+=============+===============+============+================+=============+ +| cos | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| exp | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| log | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| sin | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| tan | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ + + +Conversion Functions +==================== + ++---------------+------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------+ +| Function Name | _Fract (r) | _Accum (k) | +| +------------------------------+----------------------------+------------------------------+------------------------------+----------------------------+------------------------------+ +| | short (hr) | _ (r) | long (lr) | short (hk) | _ (k) | long (lk) | +| +----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| | unsigned (uhr) | signed (hr) | unsigned (ur) | signed (r) | unsigned (ulr) | signed (lr) | unsigned (uhk) | signed (hk) | unsigned (uk) | signed (k) | unsigned (ulk) | signed (lk) | ++===============+================+=============+===============+============+================+=============+================+=============+===============+============+================+=============+ +| fprintf | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| fscanf | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ +| strtofx | | | | | | | | | | | | | ++---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+ + + +Warnings +======== + +This is currently a work-in-progress, its headers, macros, and ABI are still unstable, and might be modified. diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 332410453b54d1..5882d03593a54e 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -104,6 +104,14 @@ add_gen_header( .llvm-libc-types.float128 ) +add_gen_header( + stdfix + DEF_FILE stdfix.h.def + GEN_HDR stdfix.h + DEPENDS + .llvm-libc-macros.stdfix_macros +) + # TODO: This should be conditional on POSIX networking being included. file(MAKE_DIRECTORY ${LIBC_INCLUDE_DIR}/arpa) diff --git a/libc/include/llvm-libc-macros/CMakeLists.txt b/libc/include/llvm-libc-macros/CMakeLists.txt index 562769a5e84cef..225885d3a9b085 100644 --- a/libc/include/llvm-libc-macros/CMakeLists.txt +++ b/libc/include/llvm-libc-macros/CMakeLists.txt @@ -227,3 +227,9 @@ add_macro_header( HDR inttypes-macros.h ) + +add_macro_header( + stdfix_macros + HDR + stdfix-macros.h +) diff --git a/libc/include/llvm-libc-macros/stdfix-macros.h b/libc/include/llvm-libc-macros/stdfix-macros.h new file mode 100644 index 00000000000000..7cb74adc3999fe --- /dev/null +++ b/libc/include/llvm-libc-macros/stdfix-macros.h @@ -0,0 +1,330 @@ +//===-- Definitions from stdfix.h -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_MACROS_STDFIX_MACROS_H +#define __LLVM_LIBC_MACROS_STDFIX_MACROS_H + +#ifdef __clang__ +#if (!defined(__cplusplus) || (__clang_major__ >= 18)) +// _Fract and _Accum types are avaiable +#define LIBC_COMPILER_HAS_FIXED_POINT +#endif // __cplusplus +#endif // __clang__ + +#ifdef LIBC_COMPILER_HAS_FIXED_POINT + +#define fract _Fract +#define accum _Accum +#define sat _Sat + +// Default values: from ISO/IEC TR 18037:2008 standard - Annex A.3 - Typical +// desktop processor. + +#ifdef __SFRACT_FBIT__ +#define SFRACT_FBIT __SFRACT_FBIT__ +#else +#define SFRACT_FBIT 7 +#endif // SFRACT_FBIT + +#ifdef __SFRACT_MIN__ +#define SFRACT_MIN __SFRACT_MIN__ +#else +#define SFRACT_MIN (-0.5HR - 0.5HR) +#endif // SFRACT_MIN + +#ifdef __SFRACT_MAX__ +#define SFRACT_MAX __SFRACT_MAX__ +#else +#define SFRACT_MAX 0x1.FCp-1HR +#endif // SFRACT_MAX + +#ifdef __SFRACT_EPSILON__ +#define SFRACT_EPSILON __SFRACT_EPSILON__ +#else +#define SFRACT_EPSILON 0x1.0p-7HR +#endif // SFRACT_EPSILON + +#ifdef __USFRACT_FBIT__ +#define USFRACT_FBIT __USFRACT_FBIT__ +#else +#define USFRACT_FBIT 8 +#endif // USFRACT_FBIT + +#define USFRACT_MIN 0.0UHR + +#ifdef __USFRACT_MAX__ +#define USFRACT_MAX __USFRACT_MAX__ +#else +#define USFRACT_MAX 0x1.FEp-1UHR +#endif // USFRACT_MAX + +#ifdef __USFRACT_EPSILON__ +#define USFRACT_EPSILON __USFRACT_EPSILON__ +#else +#define USFRACT_EPSILON 0x1.0p-8UHR +#endif // USFRACT_EPSILON + +#ifdef __FRACT_FBIT__ +#define FRACT_FBIT __FRACT_FBIT__ +#else +#define FRACT_FBIT 15 +#endif // FRACT_FBIT + +#ifdef __FRACT_MIN__ +#define FRACT_MIN __FRACT_MIN__ +#else +#define FRACT_MIN (-0.5R - 0.5R) +#endif // FRACT_MIN + +#ifdef __FRACT_MAX__ +#define FRACT_MAX __FRACT_MAX__ +#else +#define FRACT_MAX 0x1.FFFCp-1R +#endif // FRACT_MAX + +#ifdef __FRACT_EPSILON__ +#define FRACT_EPSILON __FRACT_EPSILON__ +#else +#define FRACT_EPSILON 0x1.0p-15R +#endif // FRACT_EPSILON + +#ifdef __UFRACT_FBIT__ +#define UFRACT_FBIT __UFRACT_FBIT__ +#else +#define UFRACT_FBIT 16 +#endif // UFRACT_FBIT + +#define UFRACT_MIN 0.0UR + +#ifdef __UFRACT_MAX__ +#define UFRACT_MAX __UFRACT_MAX__ +#else +#define UFRACT_MAX 0x1.FFFEp-1UR +#endif // UFRACT_MAX + +#ifdef __UFRACT_EPSILON__ +#define UFRACT_EPSILON __UFRACT_EPSILON__ +#else +#define UFRACT_EPSILON 0x1.0p-16UR +#endif // UFRACT_EPSILON + +#ifdef __LFRACT_FBIT__ +#define LFRACT_FBIT __LFRACT_FBIT__ +#else +#define LFRACT_FBIT 31 +#endif // LFRACT_FBIT + +#ifdef __LFRACT_MIN__ +#define LFRACT_MIN __LFRACT_MIN__ +#else +#define LFRACT_MIN (-0.5LR - 0.5LR) +#endif // LFRACT_MIN + +#ifdef __LFRACT_MAX__ +#define LFRACT_MAX __LFRACT_MAX__ +#else +#define LFRACT_MAX 0x1.FFFFFFFCp-1LR +#endif // LFRACT_MAX + +#ifdef __LFRACT_EPSILON__ +#define LFRACT_EPSILON __LFRACT_EPSILON__ +#else +#define LFRACT_EPSILON 0x1.0p-31LR +#endif // LFRACT_EPSILON + +#ifdef __ULFRACT_FBIT__ +#define ULFRACT_FBIT __ULFRACT_FBIT__ +#else +#define ULFRACT_FBIT 32 +#endif // ULFRACT_FBIT + +#define ULFRACT_MIN 0.0ULR + +#ifdef __ULFRACT_MAX__ +#define ULFRACT_MAX __ULFRACT_MAX__ +#else +#define ULFRACT_MAX 0x1.FFFFFFFEp-1ULR +#endif // ULFRACT_MAX + +#ifdef __ULFRACT_EPSILON__ +#define ULFRACT_EPSILON __ULFRACT_EPSILON__ +#else +#define ULFRACT_EPSILON 0x1.0p-32ULR +#endif // ULFRACT_EPSILON + +#ifdef __SACCUM_FBIT__ +#define SACCUM_FBIT __SACCUM_FBIT__ +#else +#define SACCUM_FBIT 7 +#endif // SACCUM_FBIT + +#ifdef __SACCUM_IBIT__ +#define SACCUM_IBIT __SACCUM_IBIT__ +#else +#define SACCUM_IBIT 8 +#endif // SACCUM_IBIT + +#ifdef __SACCUM_MIN__ +#define SACCUM_MIN __SACCUM_MIN__ +#else +#define SACCUM_MIN (-0x1.0p+7HK - 0x1.0p+7HK) +#endif // SACCUM_MIN + +#ifdef __SACCUM_MAX__ +#define SACCUM_MAX __SACCUM_MAX__ +#else +#define SACCUM_MAX 0x1.FFFCp+7HK +#endif // SACCUM_MAX + +#ifdef __SACCUM_EPSILON__ +#define SACCUM_EPSILON __SACCUM_EPSILON__ +#else +#define SACCUM_EPSILON 0x1.0p-7HK +#endif // SACCUM_EPSILON + +#ifdef __USACCUM_FBIT__ +#define USACCUM_FBIT __USACCUM_FBIT__ +#else +#define USACCUM_FBIT 8 +#endif // USACCUM_FBIT + +#ifdef __USACCUM_IBIT__ +#define USACCUM_IBIT __USACCUM_IBIT__ +#else +#define USACCUM_IBIT 8 +#endif // USACCUM_IBIT + +#define USACCUM_MIN 0.0UHK + +#ifdef __USACCUM_MAX__ +#define USACCUM_MAX __USACCUM_MAX__ +#else +#define USACCUM_MAX 0x1.FFFEp+7UHK +#endif // USACCUM_MAX + +#ifdef __USACCUM_EPSILON__ +#define USACCUM_EPSILON __USACCUM_EPSILON__ +#else +#define USACCUM_EPSILON 0x1.0p-8UHK +#endif // USACCUM_EPSILON + +#ifdef __ACCUM_FBIT__ +#define ACCUM_FBIT __ACCUM_FBIT__ +#else +#define ACCUM_FBIT 15 +#endif // ACCUM_FBIT + +#ifdef __ACCUM_IBIT__ +#define ACCUM_IBIT __ACCUM_IBIT__ +#else +#define ACCUM_IBIT 16 +#endif // ACCUM_IBIT + +#ifdef __ACCUM_MIN__ +#define ACCUM_MIN __ACCUM_MIN__ +#else +#define ACCUM_MIN (-0x1.0p+15K - 0x1.0p+15K) +#endif // ACCUM_MIN + +#ifdef __ACCUM_MAX__ +#define ACCUM_MAX __ACCUM_MAX__ +#else +#define ACCUM_MAX 0x1.FFFFFFFCp+15K +#endif // ACCUM_MAX + +#ifdef __ACCUM_EPSILON__ +#define ACCUM_EPSILON __ACCUM_EPSILON__ +#else +#define ACCUM_EPSILON 0x1.0p-15K +#endif // ACCUM_EPSILON + +#ifdef __UACCUM_FBIT__ +#define UACCUM_FBIT __UACCUM_FBIT__ +#else +#define UACCUM_FBIT 16 +#endif // UACCUM_FBIT + +#ifdef __UACCUM_IBIT__ +#define UACCUM_IBIT __UACCUM_IBIT__ +#else +#define UACCUM_IBIT 16 +#endif // UACCUM_IBIT + +#define UACCUM_MIN 0.0UK + +#ifdef __UACCUM_MAX__ +#define UACCUM_MAX __UACCUM_MAX__ +#else +#define UACCUM_MAX 0x1.FFFFFFFEp+15UK +#endif // UACCUM_MAX + +#ifdef __UACCUM_EPSILON__ +#define UACCUM_EPSILON __UACCUM_EPSILON__ +#else +#define UACCUM_EPSILON 0x1.0p-16UK +#endif // UACCUM_EPSILON + +#ifdef __LACCUM_FBIT__ +#define LACCUM_FBIT __LACCUM_FBIT__ +#else +#define LACCUM_FBIT 31 +#endif // LACCUM_FBIT + +#ifdef __LACCUM_IBIT__ +#define LACCUM_IBIT __LACCUM_IBIT__ +#else +#define LACCUM_IBIT 32 +#endif // LACCUM_IBIT + +#ifdef __LACCUM_MIN__ +#define LACCUM_MIN __LACCUM_MIN__ +#else +#define LACCUM_MIN (-0x1.0p+31LK - 0x1.0p+31LK) +#endif // LACCUM_MIN + +#ifdef __LACCUM_MAX__ +#define LACCUM_MAX __LACCUM_MAX__ +#else +#define LACCUM_MAX 0x1.FFFFFFFFFFFFFFFCp+31LK +#endif // LACCUM_MAX + +#ifdef __LACCUM_EPSILON__ +#define LACCUM_EPSILON __LACCUM_EPSILON__ +#else +#define LACCUM_EPSILON 0x1.0p-31LK +#endif // LACCUM_EPSILON + +#ifdef __ULACCUM_FBIT__ +#define ULACCUM_FBIT __ULACCUM_FBIT__ +#else +#define ULACCUM_FBIT 32 +#endif // ULACCUM_FBIT + +#ifdef __ULACCUM_IBIT__ +#define ULACCUM_IBIT __ULACCUM_IBIT__ +#else +#define ULACCUM_IBIT 32 +#endif // ULACCUM_IBIT + +#define ULACCUM_MIN 0.0ULK + +#ifdef __ULACCUM_MAX__ +#define ULACCUM_MAX __ULACCUM_MAX__ +#else +#define ULACCUM_MAX 0x1.FFFFFFFFFFFFFFFEp+31ULK +#endif // ULACCUM_MAX + +#ifdef __ULACCUM_EPSILON__ +#define ULACCUM_EPSILON __ULACCUM_EPSILON__ +#else +#define ULACCUM_EPSILON 0x1.0p-32ULK +#endif // ULACCUM_EPSILON + +#endif // LIBC_COMPILER_HAS_FIXED_POINT + +#endif // __LLVM_LIBC_MACROS_STDFIX_MACROS_H diff --git a/libc/include/stdfix.h.def b/libc/include/stdfix.h.def new file mode 100644 index 00000000000000..368eeb33f2f0af --- /dev/null +++ b/libc/include/stdfix.h.def @@ -0,0 +1,21 @@ +//===-- C standard library header stdfix.h --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_STDFIX_H +#define LLVM_LIBC_STDFIX_H + +#include <__llvm-libc-common.h> +#include + +// From ISO/IEC TR 18037:2008 standard: +// https://www.iso.org/standard/51126.html +// https://standards.iso.org/ittf/PubliclyAvailableStandards/c051126_ISO_IEC_TR_18037_2008.zip + +%%public_api() + +#endif // LLVM_LIBC_STDFIX_H diff --git a/libc/spec/stdc_ext.td b/libc/spec/stdc_ext.td new file mode 100644 index 00000000000000..4a5b74f7bdc364 --- /dev/null +++ b/libc/spec/stdc_ext.td @@ -0,0 +1,16 @@ +def StdcExt : StandardSpec<"stdc_ext"> { + // From ISO/IEC TR 18037:2008 standard: + // https://standards.iso.org/ittf/PubliclyAvailableStandards/c051126_ISO_IEC_TR_18037_2008.zip + HeaderSpec StdFix = HeaderSpec< + "stdfix.h", + [], // macros + [], // types + [], // enums + [ // functions + ] + >; + + let Headers = [ + StdFix, + ]; +} From 9f87bfe8bf2a3bd9b761e61c3cea3ccfec0553ec Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Tue, 13 Feb 2024 22:00:30 +0000 Subject: [PATCH 065/240] [SPIRV] Add to LINK_COMPONENTS to fix BUILD_SHARED_LIBS build Fixes: 7b08b4360b488b35428c97132b3f9d2a777bd770 --- llvm/lib/Target/SPIRV/Analysis/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/SPIRV/Analysis/CMakeLists.txt b/llvm/lib/Target/SPIRV/Analysis/CMakeLists.txt index 132d8ff838353a..4d4351132d3ded 100644 --- a/llvm/lib/Target/SPIRV/Analysis/CMakeLists.txt +++ b/llvm/lib/Target/SPIRV/Analysis/CMakeLists.txt @@ -2,8 +2,10 @@ add_llvm_component_library(LLVMSPIRVAnalysis SPIRVConvergenceRegionAnalysis.cpp LINK_COMPONENTS + Analysis Core Support + TransformUtils ADD_TO_COMPONENT SPIRV From c92bf6b689a1b6c662f3fb30318c67257dbca864 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Tue, 13 Feb 2024 22:08:33 +0000 Subject: [PATCH 066/240] [SPIRV] Add to LINK_COMPONENTS to fix BUILD_SHARED_LIBS check Fixes: 7b08b4360b488b35428c97132b3f9d2a777bd770 --- llvm/unittests/Target/SPIRV/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/unittests/Target/SPIRV/CMakeLists.txt b/llvm/unittests/Target/SPIRV/CMakeLists.txt index 326a74b0cbe50e..83ae215c512ca2 100644 --- a/llvm/unittests/Target/SPIRV/CMakeLists.txt +++ b/llvm/unittests/Target/SPIRV/CMakeLists.txt @@ -4,6 +4,7 @@ include_directories( ) set(LLVM_LINK_COMPONENTS + Analysis AsmParser Core SPIRVCodeGen From 5b386158aacac4b41126983a5379d36ed413d0ea Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Tue, 13 Feb 2024 14:19:41 -0800 Subject: [PATCH 067/240] [lldb][test] Switch LLDB API tests from vendored unittest2 to unittest (#79945) This removes the dependency LLDB API tests have on lldb/third_party/Python/module/unittest2, and instead uses the standard one provided by Python. This does not actually remove the vendored dep yet, nor update the docs. I'll do both those once this sticks. Non-trivial changes to call out: - expected failures (i.e. "bugnumber") don't have a reason anymore, so those params were removed - `assertItemsEqual` is now called `assertCountEqual` - When a test is marked xfail, our copy of unittest2 considers failures during teardown to be OK, but modern unittest does not. See TestThreadLocal.py. (Very likely could be a real bug/leak). - Our copy of unittest2 was patched to print all test results, even ones that don't happen, e.g. `(5 passes, 0 failures, 1 errors, 0 skipped, ...)`, but standard unittest prints a terser message that omits test result types that didn't happen, e.g. `OK (skipped=1)`. Our lit integration parses this stderr and needs to be updated w/ that expectation. I tested this w/ `ninja check-lldb-api` on Linux. There's a good chance non-Linux tests have similar quirks, but I'm not able to uncover those. --- .../Python/lldbsuite/test/configuration.py | 4 +- .../Python/lldbsuite/test/decorators.py | 34 +++++++------- lldb/packages/Python/lldbsuite/test/dotest.py | 14 +++--- .../Python/lldbsuite/test/lldbtest.py | 28 ++++-------- .../Python/lldbsuite/test/test_result.py | 18 ++++---- .../API/commands/expression/test/TestExprs.py | 4 +- .../TestThreadPlanUserBreakpoint.py | 2 +- .../jitloader_gdb/TestJITLoaderGDB.py | 4 +- .../thread/state/TestThreadStates.py | 6 +-- .../API/functionalities/tty/TestTerminal.py | 6 +-- .../API/lang/c/shared_lib/TestSharedLib.py | 4 +- .../TestSharedLibStrippedSymbols.py | 4 +- .../lang/cpp/namespace/TestNamespaceLookup.py | 10 ++--- .../TestCppReferenceToOuterClass.py | 4 +- .../lang/cpp/thread_local/TestThreadLocal.py | 5 +++ .../lang/objc/hidden-ivars/TestHiddenIvars.py | 4 +- lldb/test/API/lldbtest.py | 45 +++++++++++++++---- .../API/macosx/universal/TestUniversal.py | 8 ++-- .../TestGdbRemoteLibrariesSvr4Support.py | 2 +- .../test/test_lldbgdbserverutils.py | 4 +- 20 files changed, 115 insertions(+), 95 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py index 2a4b9b3c070c7f..685f491c85fe19 100644 --- a/lldb/packages/Python/lldbsuite/test/configuration.py +++ b/lldb/packages/Python/lldbsuite/test/configuration.py @@ -12,14 +12,14 @@ # Third-party modules -import unittest2 +import unittest # LLDB Modules import lldbsuite # The test suite. -suite = unittest2.TestSuite() +suite = unittest.TestSuite() # The list of categories we said we care about categories_list = None diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 0fb146913388e0..a5e1fa51cf6e63 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -11,7 +11,7 @@ import subprocess # Third-party modules -import unittest2 +import unittest # LLDB modules import lldb @@ -115,11 +115,11 @@ def _compiler_supports( def expectedFailureIf(condition, bugnumber=None): def expectedFailure_impl(func): - if isinstance(func, type) and issubclass(func, unittest2.TestCase): + if isinstance(func, type) and issubclass(func, unittest.TestCase): raise Exception("Decorator can only be used to decorate a test method") if condition: - return unittest2.expectedFailure(func) + return unittest.expectedFailure(func) return func if callable(bugnumber): @@ -130,14 +130,14 @@ def expectedFailure_impl(func): def expectedFailureIfFn(expected_fn, bugnumber=None): def expectedFailure_impl(func): - if isinstance(func, type) and issubclass(func, unittest2.TestCase): + if isinstance(func, type) and issubclass(func, unittest.TestCase): raise Exception("Decorator can only be used to decorate a test method") @wraps(func) def wrapper(*args, **kwargs): xfail_reason = expected_fn(*args, **kwargs) if xfail_reason is not None: - xfail_func = unittest2.expectedFailure(func) + xfail_func = unittest.expectedFailure(func) xfail_func(*args, **kwargs) else: func(*args, **kwargs) @@ -157,11 +157,11 @@ def wrapper(*args, **kwargs): def skipTestIfFn(expected_fn, bugnumber=None): def skipTestIfFn_impl(func): - if isinstance(func, type) and issubclass(func, unittest2.TestCase): + if isinstance(func, type) and issubclass(func, unittest.TestCase): reason = expected_fn() # The return value is the reason (or None if we don't skip), so # reason is used for both args. - return unittest2.skipIf(condition=reason, reason=reason)(func) + return unittest.skipIf(condition=reason, reason=reason)(func) @wraps(func) def wrapper(*args, **kwargs): @@ -191,7 +191,7 @@ def wrapper(*args, **kwargs): def _xfailForDebugInfo(expected_fn, bugnumber=None): def expectedFailure_impl(func): - if isinstance(func, type) and issubclass(func, unittest2.TestCase): + if isinstance(func, type) and issubclass(func, unittest.TestCase): raise Exception("Decorator can only be used to decorate a test method") func.__xfail_for_debug_info_cat_fn__ = expected_fn @@ -205,7 +205,7 @@ def expectedFailure_impl(func): def _skipForDebugInfo(expected_fn, bugnumber=None): def skipImpl(func): - if isinstance(func, type) and issubclass(func, unittest2.TestCase): + if isinstance(func, type) and issubclass(func, unittest.TestCase): raise Exception("Decorator can only be used to decorate a test method") func.__skip_for_debug_info_cat_fn__ = expected_fn @@ -434,7 +434,7 @@ def add_test_categories(cat): cat = test_categories.validate(cat, True) def impl(func): - if isinstance(func, type) and issubclass(func, unittest2.TestCase): + if isinstance(func, type) and issubclass(func, unittest.TestCase): raise Exception( "@add_test_categories can only be used to decorate a test method" ) @@ -465,7 +465,7 @@ def should_skip_benchmarks_test(): def no_debug_info_test(func): """Decorate the item as a test what don't use any debug info. If this annotation is specified then the test runner won't generate a separate test for each debug info format.""" - if isinstance(func, type) and issubclass(func, unittest2.TestCase): + if isinstance(func, type) and issubclass(func, unittest.TestCase): raise Exception( "@no_debug_info_test can only be used to decorate a test method" ) @@ -631,7 +631,7 @@ def is_out_of_tree_debugserver(): def skipIfRemote(func): """Decorate the item to skip tests if testing remotely.""" - return unittest2.skipIf(lldb.remote_platform, "skip on remote platform")(func) + return unittest.skipIf(lldb.remote_platform, "skip on remote platform")(func) def skipIfNoSBHeaders(func): @@ -768,7 +768,7 @@ def skipUnlessDarwin(func): def skipUnlessTargetAndroid(func): - return unittest2.skipUnless( + return unittest.skipUnless( lldbplatformutil.target_is_android(), "requires target to be Android" )(func) @@ -809,7 +809,7 @@ def skipIfPlatform(oslist): """Decorate the item to skip tests if running on one of the listed platforms.""" # This decorator cannot be ported to `skipIf` yet because it is used on entire # classes, which `skipIf` explicitly forbids. - return unittest2.skipIf( + return unittest.skipIf( lldbplatformutil.getPlatform() in oslist, "skip on %s" % (", ".join(oslist)) ) @@ -818,7 +818,7 @@ def skipUnlessPlatform(oslist): """Decorate the item to skip tests unless running on one of the listed platforms.""" # This decorator cannot be ported to `skipIf` yet because it is used on entire # classes, which `skipIf` explicitly forbids. - return unittest2.skipUnless( + return unittest.skipUnless( lldbplatformutil.getPlatform() in oslist, "requires one of %s" % (", ".join(oslist)), ) @@ -1078,7 +1078,7 @@ def _get_bool_config(key, fail_value=True): def _get_bool_config_skip_if_decorator(key): have = _get_bool_config(key) - return unittest2.skipIf(not have, "requires " + key) + return unittest.skipIf(not have, "requires " + key) def skipIfCursesSupportMissing(func): @@ -1110,7 +1110,7 @@ def skipIfLLVMTargetMissing(target): found = True break - return unittest2.skipIf(not found, "requires " + target) + return unittest.skipIf(not found, "requires " + target) # Call sysctl on darwin to see if a specified hardware feature is available on this machine. diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 4393e0caacaab8..291d7bad5c0897 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -33,7 +33,7 @@ import tempfile # Third-party modules -import unittest2 +import unittest # LLDB Modules import lldbsuite @@ -658,7 +658,7 @@ def iter_filters(): for filterspec in iter_filters(): filtered = True print("adding filter spec %s to module %s" % (filterspec, repr(module))) - tests = unittest2.defaultTestLoader.loadTestsFromName(filterspec, module) + tests = unittest.defaultTestLoader.loadTestsFromName(filterspec, module) configuration.suite.addTests(tests) # Forgo this module if the (base, filterspec) combo is invalid @@ -669,9 +669,7 @@ def iter_filters(): # Add the entire file's worth of tests since we're not filtered. # Also the fail-over case when the filterspec branch # (base, filterspec) combo doesn't make sense. - configuration.suite.addTests( - unittest2.defaultTestLoader.loadTestsFromName(base) - ) + configuration.suite.addTests(unittest.defaultTestLoader.loadTestsFromName(base)) def visit(prefix, dir, names): @@ -1032,7 +1030,7 @@ def run_suite(): # # Install the control-c handler. - unittest2.signals.installHandler() + unittest.signals.installHandler() # # Invoke the default TextTestRunner to run the test suite @@ -1066,7 +1064,7 @@ def run_suite(): # Invoke the test runner. if configuration.count == 1: - result = unittest2.TextTestRunner( + result = unittest.TextTestRunner( stream=sys.stderr, verbosity=configuration.verbose, resultclass=test_result.LLDBTestResult, @@ -1077,7 +1075,7 @@ def run_suite(): # not enforced. test_result.LLDBTestResult.__ignore_singleton__ = True for i in range(configuration.count): - result = unittest2.TextTestRunner( + result = unittest.TextTestRunner( stream=sys.stderr, verbosity=configuration.verbose, resultclass=test_result.LLDBTestResult, diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index d944b09cbcc472..018f2a06980a88 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -44,7 +44,7 @@ import traceback # Third-party modules -import unittest2 +import unittest # LLDB modules import lldb @@ -517,7 +517,7 @@ def builder_module(): return lldbplatformutil.builder_module() -class Base(unittest2.TestCase): +class Base(unittest.TestCase): """ Abstract base for performing lldb (see TestBase) or other generic tests (see BenchBase for one example). lldbtest.Base works with the test driver to @@ -1090,17 +1090,14 @@ def markFailure(self): # Once by the Python unittest framework, and a second time by us. print("FAIL", file=sbuf) - def markExpectedFailure(self, err, bugnumber): + def markExpectedFailure(self, err): """Callback invoked when an expected failure/error occurred.""" self.__expected__ = True with recording(self, False) as sbuf: # False because there's no need to write "expected failure" to the # stderr twice. # Once by the Python unittest framework, and a second time by us. - if bugnumber is None: - print("expected failure", file=sbuf) - else: - print("expected failure (problem id:" + str(bugnumber) + ")", file=sbuf) + print("expected failure", file=sbuf) def markSkippedTest(self): """Callback invoked when a test is skipped.""" @@ -1111,19 +1108,14 @@ def markSkippedTest(self): # Once by the Python unittest framework, and a second time by us. print("skipped test", file=sbuf) - def markUnexpectedSuccess(self, bugnumber): + def markUnexpectedSuccess(self): """Callback invoked when an unexpected success occurred.""" self.__unexpected__ = True with recording(self, False) as sbuf: # False because there's no need to write "unexpected success" to the # stderr twice. # Once by the Python unittest framework, and a second time by us. - if bugnumber is None: - print("unexpected success", file=sbuf) - else: - print( - "unexpected success (problem id:" + str(bugnumber) + ")", file=sbuf - ) + print("unexpected success", file=sbuf) def getRerunArgs(self): return " -f %s.%s" % (self.__class__.__name__, self._testMethodName) @@ -1704,13 +1696,11 @@ def test_method(self, attrvalue=attrvalue): xfail_reason = xfail_for_debug_info_cat_fn(cat) if xfail_reason: - test_method = unittest2.expectedFailure(xfail_reason)( - test_method - ) + test_method = unittest.expectedFailure(test_method) skip_reason = skip_for_debug_info_cat_fn(cat) if skip_reason: - test_method = unittest2.skip(skip_reason)(test_method) + test_method = unittest.skip(skip_reason)(test_method) newattrs[method_name] = test_method @@ -2226,7 +2216,7 @@ def completions_match(self, command, completions): match_strings = lldb.SBStringList() interp.HandleCompletion(command, len(command), 0, -1, match_strings) # match_strings is a 1-indexed list, so we have to slice... - self.assertItemsEqual( + self.assertCountEqual( completions, list(match_strings)[1:], "List of returned completion is wrong" ) diff --git a/lldb/packages/Python/lldbsuite/test/test_result.py b/lldb/packages/Python/lldbsuite/test/test_result.py index cb84c909c41967..20365f53a67541 100644 --- a/lldb/packages/Python/lldbsuite/test/test_result.py +++ b/lldb/packages/Python/lldbsuite/test/test_result.py @@ -12,14 +12,14 @@ import traceback # Third-party modules -import unittest2 +import unittest # LLDB Modules from . import configuration from lldbsuite.test_event import build_exception -class LLDBTestResult(unittest2.TextTestResult): +class LLDBTestResult(unittest.TextTestResult): """ Enforce a singleton pattern to allow introspection of test progress. @@ -243,7 +243,7 @@ def addFailure(self, test, err): if self.checkExclusion( configuration.xfail_tests, test.id() ) or self.checkCategoryExclusion(configuration.xfail_categories, test): - self.addExpectedFailure(test, err, None) + self.addExpectedFailure(test, err) return configuration.sdir_has_content = True @@ -264,12 +264,12 @@ def addFailure(self, test, err): else: configuration.failures_per_category[category] = 1 - def addExpectedFailure(self, test, err, bugnumber): + def addExpectedFailure(self, test, err): configuration.sdir_has_content = True - super(LLDBTestResult, self).addExpectedFailure(test, err, bugnumber) + super(LLDBTestResult, self).addExpectedFailure(test, err) method = getattr(test, "markExpectedFailure", None) if method: - method(err, bugnumber) + method(err) self.stream.write( "XFAIL: LLDB (%s) :: %s\n" % (self._config_string(test), str(test)) ) @@ -285,12 +285,12 @@ def addSkip(self, test, reason): % (self._config_string(test), str(test), reason) ) - def addUnexpectedSuccess(self, test, bugnumber): + def addUnexpectedSuccess(self, test): configuration.sdir_has_content = True - super(LLDBTestResult, self).addUnexpectedSuccess(test, bugnumber) + super(LLDBTestResult, self).addUnexpectedSuccess(test) method = getattr(test, "markUnexpectedSuccess", None) if method: - method(bugnumber) + method() self.stream.write( "XPASS: LLDB (%s) :: %s\n" % (self._config_string(test), str(test)) ) diff --git a/lldb/test/API/commands/expression/test/TestExprs.py b/lldb/test/API/commands/expression/test/TestExprs.py index e95c76b7104c2a..0e3d2e6cf41ffb 100644 --- a/lldb/test/API/commands/expression/test/TestExprs.py +++ b/lldb/test/API/commands/expression/test/TestExprs.py @@ -12,7 +12,7 @@ """ -import unittest2 +import unittest import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -46,7 +46,7 @@ def build_and_run(self): # llvm.org/pr17135 # APFloat::toString does not identify the correct (i.e. least) precision. - @unittest2.expectedFailure + @unittest.expectedFailure def test_floating_point_expr_commands(self): self.build_and_run() diff --git a/lldb/test/API/functionalities/breakpoint/thread_plan_user_breakpoint/TestThreadPlanUserBreakpoint.py b/lldb/test/API/functionalities/breakpoint/thread_plan_user_breakpoint/TestThreadPlanUserBreakpoint.py index d9b7426b148447..ee597ad2b148c5 100644 --- a/lldb/test/API/functionalities/breakpoint/thread_plan_user_breakpoint/TestThreadPlanUserBreakpoint.py +++ b/lldb/test/API/functionalities/breakpoint/thread_plan_user_breakpoint/TestThreadPlanUserBreakpoint.py @@ -8,7 +8,7 @@ """ -import unittest2 +import unittest import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/jitloader_gdb/TestJITLoaderGDB.py b/lldb/test/API/functionalities/jitloader_gdb/TestJITLoaderGDB.py index a0742152447874..98c0b149003df6 100644 --- a/lldb/test/API/functionalities/jitloader_gdb/TestJITLoaderGDB.py +++ b/lldb/test/API/functionalities/jitloader_gdb/TestJITLoaderGDB.py @@ -1,7 +1,7 @@ """Test for the JITLoaderGDB interface""" -import unittest2 +import unittest import os import lldb from lldbsuite.test import lldbutil @@ -14,7 +14,7 @@ class JITLoaderGDBTestCase(TestBase): lambda: "Skipped because the test crashes the test runner", bugnumber="llvm.org/pr24702", ) - @unittest2.expectedFailure # llvm.org/pr24702 + @unittest.expectedFailure # llvm.org/pr24702 def test_bogus_values(self): """Test that we handle inferior misusing the GDB JIT interface""" self.build() diff --git a/lldb/test/API/functionalities/thread/state/TestThreadStates.py b/lldb/test/API/functionalities/thread/state/TestThreadStates.py index e128ca84977b41..56954c9f34c7e4 100644 --- a/lldb/test/API/functionalities/thread/state/TestThreadStates.py +++ b/lldb/test/API/functionalities/thread/state/TestThreadStates.py @@ -3,7 +3,7 @@ """ -import unittest2 +import unittest import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -41,14 +41,14 @@ def test_state_after_continue(self): @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24660") @expectedFailureNetBSD # thread states not properly maintained - @unittest2.expectedFailure # llvm.org/pr16712 + @unittest.expectedFailure # llvm.org/pr16712 def test_state_after_expression(self): """Test thread state after expression.""" self.build() self.thread_state_after_expression_test() # thread states not properly maintained - @unittest2.expectedFailure # llvm.org/pr15824 and + @unittest.expectedFailure # llvm.org/pr15824 and @expectedFailureAll( oslist=["windows"], bugnumber="llvm.org/pr24668: Breakpoints not resolved correctly", diff --git a/lldb/test/API/functionalities/tty/TestTerminal.py b/lldb/test/API/functionalities/tty/TestTerminal.py index 457abd7b4a89d1..750cdb3fc8361c 100644 --- a/lldb/test/API/functionalities/tty/TestTerminal.py +++ b/lldb/test/API/functionalities/tty/TestTerminal.py @@ -2,7 +2,7 @@ Test lldb command aliases. """ -import unittest2 +import unittest import os import lldb from lldbsuite.test.decorators import * @@ -17,13 +17,13 @@ class LaunchInTerminalTestCase(TestBase): @skipUnlessDarwin # If the test is being run under sudo, the spawned terminal won't retain that elevated # privilege so it can't open the socket to talk back to the test case - @unittest2.skipIf( + @unittest.skipIf( hasattr(os, "geteuid") and os.geteuid() == 0, "test cannot be run as root" ) # Do we need to disable this test if the testsuite is being run on a remote system? # This env var is only defined when the shell is running in a local mac # terminal window - @unittest2.skipUnless( + @unittest.skipUnless( "TERM_PROGRAM" in os.environ, "test must be run on local system" ) @no_debug_info_test diff --git a/lldb/test/API/lang/c/shared_lib/TestSharedLib.py b/lldb/test/API/lang/c/shared_lib/TestSharedLib.py index 235b9b4ce3442d..e0994aae76169d 100644 --- a/lldb/test/API/lang/c/shared_lib/TestSharedLib.py +++ b/lldb/test/API/lang/c/shared_lib/TestSharedLib.py @@ -1,7 +1,7 @@ """Test that types defined in shared libraries work correctly.""" -import unittest2 +import unittest import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -35,7 +35,7 @@ def test_expr_no_preload(self): """Test that types work when defined in a shared library and forward-declared in the main executable, but with preloading disabled""" self.common_test_expr(False) - @unittest2.expectedFailure # llvm.org/PR36712 + @unittest.expectedFailure # llvm.org/PR36712 def test_frame_variable(self): """Test that types work when defined in a shared library and forward-declared in the main executable""" self.build() diff --git a/lldb/test/API/lang/c/shared_lib_stripped_symbols/TestSharedLibStrippedSymbols.py b/lldb/test/API/lang/c/shared_lib_stripped_symbols/TestSharedLibStrippedSymbols.py index 070bd88d8db942..6971fc0fbc3fde 100644 --- a/lldb/test/API/lang/c/shared_lib_stripped_symbols/TestSharedLibStrippedSymbols.py +++ b/lldb/test/API/lang/c/shared_lib_stripped_symbols/TestSharedLibStrippedSymbols.py @@ -1,7 +1,7 @@ """Test that types defined in shared libraries with stripped symbols work correctly.""" -import unittest2 +import unittest import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -28,7 +28,7 @@ def test_expr(self): ) @expectedFailureAll(oslist=["windows"]) - @unittest2.expectedFailure # llvm.org/PR36712 + @unittest.expectedFailure # llvm.org/PR36712 def test_frame_variable(self): """Test that types work when defined in a shared library and forward-declared in the main executable""" self.build() diff --git a/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py b/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py index 44cfbd2df5f346..b5e8115160d209 100644 --- a/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py +++ b/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py @@ -3,7 +3,7 @@ """ -import unittest2 +import unittest import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -41,7 +41,7 @@ def runToBkpt(self, command): ) @skipIfWindows # This is flakey on Windows: llvm.org/pr38373 - @unittest2.expectedFailure # CU-local objects incorrectly scoped + @unittest.expectedFailure # CU-local objects incorrectly scoped def test_scope_lookup_with_run_command_globals(self): """Test scope lookup of functions in lldb.""" self.build() @@ -211,7 +211,7 @@ def test_scope_lookup_with_run_command(self): # Evaluate B::func() - should call B::func() self.expect_expr("B::func()", result_type="int", result_value="4") - @unittest2.expectedFailure # lldb scope lookup of functions bugs + @unittest.expectedFailure # lldb scope lookup of functions bugs def test_function_scope_lookup_with_run_command(self): """Test scope lookup of functions in lldb.""" self.build() @@ -272,7 +272,7 @@ def test_scope_after_using_directive_lookup_with_run_command(self): # Evaluate func2() - should call A::func2() self.expect_expr("func2()", result_type="int", result_value="3") - @unittest2.expectedFailure # lldb scope lookup after using declaration bugs + @unittest.expectedFailure # lldb scope lookup after using declaration bugs # NOTE: this test may fail on older systems that don't emit import # emtries in DWARF - may need to add checks for compiler versions here. def test_scope_after_using_declaration_lookup_with_run_command(self): @@ -294,7 +294,7 @@ def test_scope_after_using_declaration_lookup_with_run_command(self): # Evaluate func() - should call A::func() self.expect_expr("func()", result_type="int", result_value="3") - @unittest2.expectedFailure # lldb scope lookup ambiguity after using bugs + @unittest.expectedFailure # lldb scope lookup ambiguity after using bugs def test_scope_ambiguity_after_using_lookup_with_run_command(self): """Test scope lookup ambiguity after using in lldb.""" self.build() diff --git a/lldb/test/API/lang/cpp/reference-to-outer-type/TestCppReferenceToOuterClass.py b/lldb/test/API/lang/cpp/reference-to-outer-type/TestCppReferenceToOuterClass.py index 3172b5f2fe384f..a6e419b7fcdfa2 100644 --- a/lldb/test/API/lang/cpp/reference-to-outer-type/TestCppReferenceToOuterClass.py +++ b/lldb/test/API/lang/cpp/reference-to-outer-type/TestCppReferenceToOuterClass.py @@ -1,4 +1,4 @@ -import unittest2 +import unittest import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -6,7 +6,7 @@ class TestCase(TestBase): - @unittest2.expectedFailure # The fix for this was reverted due to llvm.org/PR52257 + @unittest.expectedFailure # The fix for this was reverted due to llvm.org/PR52257 def test(self): self.build() self.dbg.CreateTarget(self.getBuildArtifact("a.out")) diff --git a/lldb/test/API/lang/cpp/thread_local/TestThreadLocal.py b/lldb/test/API/lang/cpp/thread_local/TestThreadLocal.py index 9b128ba6097acb..0b63e15e876d67 100644 --- a/lldb/test/API/lang/cpp/thread_local/TestThreadLocal.py +++ b/lldb/test/API/lang/cpp/thread_local/TestThreadLocal.py @@ -39,6 +39,11 @@ def test_thread_local(self): process.Kill() lldbutil.run_to_breakpoint_do_run(self, target, main_bkpt) + # The test fails during tear down because the module isn't cleared. + # Even though this test case is marked as xfail, a failure during + # tear down still counts as an error. + main_module.Clear() + self.expect( "expr tl_local_int", error=True, diff --git a/lldb/test/API/lang/objc/hidden-ivars/TestHiddenIvars.py b/lldb/test/API/lang/objc/hidden-ivars/TestHiddenIvars.py index 479f4379a53023..f5e4eb1404ac8a 100644 --- a/lldb/test/API/lang/objc/hidden-ivars/TestHiddenIvars.py +++ b/lldb/test/API/lang/objc/hidden-ivars/TestHiddenIvars.py @@ -3,7 +3,7 @@ import subprocess -import unittest2 +import unittest import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -58,7 +58,7 @@ def test_frame_variable(self): self.build() self.frame_var(False) - @unittest2.expectedFailure # rdar://18683637 + @unittest.expectedFailure # rdar://18683637 def test_frame_variable_across_modules(self): if self.getArchitecture() == "i386": self.skipTest("requires modern objc runtime") diff --git a/lldb/test/API/lldbtest.py b/lldb/test/API/lldbtest.py index 2b44bb7f6f9f4b..bae73e71820f73 100644 --- a/lldb/test/API/lldbtest.py +++ b/lldb/test/API/lldbtest.py @@ -1,3 +1,4 @@ +import collections import os import re import operator @@ -86,20 +87,46 @@ def execute(self, test, litConfig): if timeoutInfo: return lit.Test.TIMEOUT, output - # Parse the dotest output from stderr. - result_regex = r"\((\d+) passes, (\d+) failures, (\d+) errors, (\d+) skipped, (\d+) expected failures, (\d+) unexpected successes\)" - results = re.search(result_regex, err) + # Parse the dotest output from stderr. First get the # of total tests, in order to infer the # of passes. + # Example: "Ran 5 tests in 0.042s" + num_ran_regex = r"^Ran (\d+) tests? in " + num_ran_results = re.search(num_ran_regex, err, re.MULTILINE) + + # If parsing fails mark this test as unresolved. + if not num_ran_results: + return lit.Test.UNRESOLVED, output + num_ran = int(num_ran_results.group(1)) + + # Then look for a detailed summary, which is OK or FAILED followed by optional details. + # Example: "OK (skipped=1, expected failures=1)" + # Example: "FAILED (failures=3)" + # Example: "OK" + result_regex = r"^(?:OK|FAILED)(?: \((.*)\))?$" + results = re.search(result_regex, err, re.MULTILINE) # If parsing fails mark this test as unresolved. if not results: return lit.Test.UNRESOLVED, output - passes = int(results.group(1)) - failures = int(results.group(2)) - errors = int(results.group(3)) - skipped = int(results.group(4)) - expected_failures = int(results.group(5)) - unexpected_successes = int(results.group(6)) + details = results.group(1) + parsed_details = collections.defaultdict(int) + if details: + for detail in details.split(", "): + detail_parts = detail.split("=") + if len(detail_parts) != 2: + return lit.Test.UNRESOLVED, output + parsed_details[detail_parts[0]] = int(detail_parts[1]) + + failures = parsed_details["failures"] + errors = parsed_details["errors"] + skipped = parsed_details["skipped"] + expected_failures = parsed_details["expected failures"] + unexpected_successes = parsed_details["unexpected successes"] + + non_pass = ( + failures + errors + skipped + expected_failures + unexpected_successes + ) + passes = num_ran - non_pass if exitCode: # Mark this test as FAIL if at least one test failed. diff --git a/lldb/test/API/macosx/universal/TestUniversal.py b/lldb/test/API/macosx/universal/TestUniversal.py index 6e8c112efa096f..d988cc0923b275 100644 --- a/lldb/test/API/macosx/universal/TestUniversal.py +++ b/lldb/test/API/macosx/universal/TestUniversal.py @@ -1,4 +1,4 @@ -import unittest2 +import unittest import os import lldb from lldbsuite.test.decorators import * @@ -24,7 +24,7 @@ def setUp(self): @add_test_categories(["pyapi"]) @skipUnlessDarwin - @unittest2.skipUnless( + @unittest.skipUnless( hasattr(os, "uname") and os.uname()[4] in ["x86_64"], "requires x86_64" ) @skipIfDarwinEmbedded # this test file assumes we're targetting an x86 system @@ -50,7 +50,7 @@ def test_sbdebugger_create_target_with_file_and_target_triple(self): self.assertTrue(process, PROCESS_IS_VALID) @skipUnlessDarwin - @unittest2.skipUnless( + @unittest.skipUnless( hasattr(os, "uname") and os.uname()[4] in ["x86_64"], "requires x86_64" ) @skipIfDarwinEmbedded # this test file assumes we're targetting an x86 system @@ -115,7 +115,7 @@ def test_process_launch_for_universal(self): self.runCmd("continue") @skipUnlessDarwin - @unittest2.skipUnless( + @unittest.skipUnless( hasattr(os, "uname") and os.uname()[4] in ["x86_64"], "requires x86_64" ) @skipIfDarwinEmbedded # this test file assumes we're targetting an x86 system diff --git a/lldb/test/API/tools/lldb-server/libraries-svr4/TestGdbRemoteLibrariesSvr4Support.py b/lldb/test/API/tools/lldb-server/libraries-svr4/TestGdbRemoteLibrariesSvr4Support.py index ac65d2d4c660f8..846adade344020 100644 --- a/lldb/test/API/tools/lldb-server/libraries-svr4/TestGdbRemoteLibrariesSvr4Support.py +++ b/lldb/test/API/tools/lldb-server/libraries-svr4/TestGdbRemoteLibrariesSvr4Support.py @@ -72,7 +72,7 @@ def libraries_svr4_well_formed(self): self.assertEqual(xml_root.tag, "library-list-svr4") for child in xml_root: self.assertEqual(child.tag, "library") - self.assertItemsEqual(child.attrib.keys(), ["name", "lm", "l_addr", "l_ld"]) + self.assertCountEqual(child.attrib.keys(), ["name", "lm", "l_addr", "l_ld"]) def libraries_svr4_has_correct_load_addr(self): xml_root = self.get_libraries_svr4_xml() diff --git a/lldb/test/API/tools/lldb-server/test/test_lldbgdbserverutils.py b/lldb/test/API/tools/lldb-server/test/test_lldbgdbserverutils.py index 6a6fd020de4501..9c9a73cb4e64ef 100644 --- a/lldb/test/API/tools/lldb-server/test/test_lldbgdbserverutils.py +++ b/lldb/test/API/tools/lldb-server/test/test_lldbgdbserverutils.py @@ -1,10 +1,10 @@ -import unittest2 +import unittest import re from lldbgdbserverutils import * -class TestLldbGdbServerUtils(unittest2.TestCase): +class TestLldbGdbServerUtils(unittest.TestCase): def test_entry_exact_payload_match(self): entry = GdbRemoteEntry(is_send_to_remote=False, exact_payload="$OK#9a") entry.assert_match(self, "$OK#9a") From 137bd782959523e8168c346bc8801d0b14f684c5 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Tue, 13 Feb 2024 14:22:04 -0800 Subject: [PATCH 068/240] [flang] Register LLVMTranslationDialectInterface for FIR. (#81668) Register the LLVM IR translation interface for FIR to avoid warnings about "Unhandled parameter attribute" after #78228. --- .../flang/Optimizer/Dialect/FIRDialect.h | 3 +++ flang/include/flang/Optimizer/Support/InitFIR.h | 1 + flang/lib/Optimizer/Dialect/FIRDialect.cpp | 17 +++++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/flang/include/flang/Optimizer/Dialect/FIRDialect.h b/flang/include/flang/Optimizer/Dialect/FIRDialect.h index 440fe77059a4a7..238385505dbff7 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRDialect.h +++ b/flang/include/flang/Optimizer/Dialect/FIRDialect.h @@ -70,6 +70,9 @@ bool canLegallyInline(mlir::Operation *, mlir::Operation *, bool); // Register the FIRInlinerInterface to FIROpsDialect void addFIRInlinerExtension(mlir::DialectRegistry ®istry); +// Register implementation of LLVMTranslationDialectInterface. +void addFIRToLLVMIRExtension(mlir::DialectRegistry ®istry); + } // namespace fir #endif // FORTRAN_OPTIMIZER_DIALECT_FIRDIALECT_H diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index f376840afd842a..9f4c4ed28a4aec 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -58,6 +58,7 @@ inline void addFIRExtensions(mlir::DialectRegistry ®istry, bool addFIRInlinerInterface = true) { if (addFIRInlinerInterface) addFIRInlinerExtension(registry); + addFIRToLLVMIRExtension(registry); } inline void loadNonCodegenDialects(mlir::MLIRContext &context) { diff --git a/flang/lib/Optimizer/Dialect/FIRDialect.cpp b/flang/lib/Optimizer/Dialect/FIRDialect.cpp index f4589fda184587..850b6120b2a00e 100644 --- a/flang/lib/Optimizer/Dialect/FIRDialect.cpp +++ b/flang/lib/Optimizer/Dialect/FIRDialect.cpp @@ -15,6 +15,7 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "mlir/Transforms/InliningUtils.h" using namespace fir; @@ -77,6 +78,22 @@ void fir::addFIRInlinerExtension(mlir::DialectRegistry ®istry) { }); } +// We do not provide LLVMTranslationDialectInterface implementation +// for FIR dialect, since at the point of translation to LLVM IR +// there should not be any FIR operations (the CodeGen converts +// them to LLVMIR dialect operations). +// Here we register the default implementation of +// LLVMTranslationDialectInterface that will drop all FIR dialect +// attributes - this helps to avoid warnings about unhandled attributes. +// We can provide our own implementation of the interface, +// when more sophisticated translation is required. +void fir::addFIRToLLVMIRExtension(mlir::DialectRegistry ®istry) { + registry.addExtension( + +[](mlir::MLIRContext *ctx, fir::FIROpsDialect *dialect) { + dialect->addInterface(); + }); +} + // anchor the class vtable to this compilation unit fir::FIROpsDialect::~FIROpsDialect() { // do nothing From e06f3522cc55cec60084a1278109ab236ef7a3ee Mon Sep 17 00:00:00 2001 From: jkorous-apple <32549412+jkorous-apple@users.noreply.github.com> Date: Tue, 13 Feb 2024 14:31:11 -0800 Subject: [PATCH 069/240] [-Wunsafe-buffer-usage] Emit fixits for array decayed to pointer (#80347) Covers cases where DeclRefExpr referring to a const-size array decays to a pointer and is used "as a pointer" (e. g. passed to a pointer type parameter). Since std::array doesn't implicitly convert to pointer to its element type T* the cast needs to be done explicitly as part of the fixit when we retrofit std::array to code that previously worked with constant size array. std::array::data() method is used for the explicit cast. In terms of the fixit machine this covers the UPC(DRE) case for Array fixit strategy. The emitted fixit inserts call to std::array::data() method similarly to analogous fixit for Span strategy. --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 2 +- ...afe-buffer-usage-fixits-pointer-access.cpp | 50 ++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 3c2a6fd81b1d8f..d00c598c4b9de3 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -1876,6 +1876,7 @@ std::optional UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const { const auto VD = cast(Node->getDecl()); switch (S.lookup(VD)) { + case FixitStrategy::Kind::Array: case FixitStrategy::Kind::Span: { ASTContext &Ctx = VD->getASTContext(); SourceManager &SM = Ctx.getSourceManager(); @@ -1890,7 +1891,6 @@ UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const { } case FixitStrategy::Kind::Wontfix: case FixitStrategy::Kind::Iterator: - case FixitStrategy::Kind::Array: return std::nullopt; case FixitStrategy::Kind::Vector: llvm_unreachable("unsupported strategies for FixableGadgets"); diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp index ca19702c7ec300..f94072015ff87d 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp @@ -83,12 +83,27 @@ void unsafe_method_invocation_single_param() { } +void unsafe_method_invocation_single_param_array() { + int p[32]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array p" + + int tmp = p[5]; + foo(p); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:8-[[@LINE-1]]:8}:".data()" +} + void safe_method_invocation_single_param() { int* p = new int[10]; // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:{{.*}}-[[@LINE-1]]:{{.*}}} foo(p); } +void safe_method_invocation_single_param_array() { + int p[10]; + foo(p); + // CHECK-NO: fix-it:"{{.*}}":{[[@LINE-1]]:{{.*}}-[[@LINE-1]]:{{.*}}}:".data()" +} + void unsafe_method_invocation_double_param() { int* p = new int[10]; // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" @@ -111,6 +126,20 @@ void unsafe_method_invocation_double_param() { m1(q, q, 8); } +void unsafe_method_invocation_double_param_array() { + int p[14]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array p" + + int q[40]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array q" + + q[5] = p[5]; + + m1(p, p, 10); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:7}:".data()" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:10}:".data()" +} + void unsafe_access_in_lamda() { int* p = new int[10]; // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" @@ -177,4 +206,23 @@ void fixits_in_lambda_capture_rename() { }; p[5] = 10; -} +} + +bool ptr_comparison(int* ptr, unsigned idx) { + int arr[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:14}:"std::array arr" + arr[idx] = idx; + + return arr > ptr; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:13-[[@LINE-1]]:13}:".data()" +} + +int long long ptr_distance(int* ptr, unsigned idx) { + int arr[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:14}:"std::array arr" + arr[idx] = idx; + + int long long dist = arr - ptr; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:27-[[@LINE-1]]:27}:".data()" + return dist; +} From 017675fff116c26bef7f0a389c983c909a3141fd Mon Sep 17 00:00:00 2001 From: Artem Dergachev Date: Tue, 13 Feb 2024 14:57:55 -0800 Subject: [PATCH 070/240] [attributes][analyzer] Generalize [[clang::suppress]] to declarations. (#80371) The attribute is now allowed on an assortment of declarations, to suppress warnings related to declarations themselves, or all warnings in the lexical scope of the declaration. I don't necessarily see a reason to have a list at all, but it does look as if some of those more niche items aren't properly supported by the compiler itself so let's maintain a short safe list for now. The initial implementation raised a question whether the attribute should apply to lexical declaration context vs. "actual" declaration context. I'm using "lexical" here because it results in less warnings suppressed, which is the conservative behavior: we can always expand it later if we think this is wrong, without breaking any existing code. I also think that this is the correct behavior that we will probably never want to change, given that the user typically desires to keep the suppressions as localized as possible. --- clang/include/clang/Basic/Attr.td | 7 ++ clang/include/clang/Basic/AttrDocs.td | 23 +++++++ clang/lib/Sema/SemaDecl.cpp | 3 + clang/lib/Sema/SemaDeclAttr.cpp | 5 -- .../Checkers/ObjCUnusedIVarsChecker.cpp | 4 +- .../StaticAnalyzer/Core/BugSuppression.cpp | 18 ++++- .../WebKit/ref-cntbl-base-virtual-dtor.cpp | 10 +++ .../WebKit/uncounted-lambda-captures.cpp | 5 ++ .../Checkers/WebKit/uncounted-local-vars.cpp | 1 + .../Checkers/WebKit/uncounted-members.cpp | 9 +++ clang/test/Analysis/ObjCRetSigs.m | 10 +++ clang/test/Analysis/objc_invalidation.m | 17 ++++- clang/test/Analysis/suppression-attr-doc.cpp | 14 ++++ clang/test/Analysis/suppression-attr.cpp | 68 +++++++++++++++++++ clang/test/Analysis/suppression-attr.m | 60 ++++++++++++---- clang/test/Analysis/unused-ivars.m | 11 ++- clang/test/SemaCXX/attr-suppress.cpp | 10 +-- clang/test/SemaObjC/attr-suppress.m | 19 ++---- 18 files changed, 251 insertions(+), 43 deletions(-) create mode 100644 clang/test/Analysis/suppression-attr.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 45a29e771f2a21..7e643b89971c17 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2891,6 +2891,13 @@ def Suppress : DeclOrStmtAttr { let Spellings = [CXX11<"gsl", "suppress">, Clang<"suppress">]; let Args = [VariadicStringArgument<"DiagnosticIdentifiers">]; let Accessors = [Accessor<"isGSL", [CXX11<"gsl", "suppress">]>]; + // There's no fundamental reason why we can't simply accept all Decls + // but let's make a short list so that to avoid supporting something weird + // by accident. We can always expand the list later. + let Subjects = SubjectList<[ + Stmt, Var, Field, ObjCProperty, Function, ObjCMethod, Record, ObjCInterface, + ObjCImplementation, Namespace, Empty + ], ErrorDiag, "variables, functions, structs, interfaces, and namespaces">; let Documentation = [SuppressDocs]; } diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 8d369091d21590..b96fbddd51154c 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5321,6 +5321,29 @@ Putting the attribute on a compound statement suppresses all warnings in scope: } } +The attribute can also be placed on entire declarations of functions, classes, +variables, member variables, and so on, to suppress warnings related +to the declarations themselves. When used this way, the attribute additionally +suppresses all warnings in the lexical scope of the declaration: + +.. code-block:: c++ + + class [[clang::suppress]] C { + int foo() { + int *x = nullptr; + ... + return *x; // warnings suppressed in the entire class scope + } + + int bar(); + }; + + int C::bar() { + int *x = nullptr; + ... + return *x; // warning NOT suppressed! - not lexically nested in 'class C{}' + } + Some static analysis warnings are accompanied by one or more notes, and the line of code against which the warning is emitted isn't necessarily the best for suppression purposes. In such cases the tools are allowed to implement diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 375f92e4ac573c..e95e675d0507ed 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2960,6 +2960,9 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D, S.mergeHLSLNumThreadsAttr(D, *NT, NT->getX(), NT->getY(), NT->getZ()); else if (const auto *SA = dyn_cast(Attr)) NewAttr = S.mergeHLSLShaderAttr(D, *SA, SA->getType()); + else if (const auto *SupA = dyn_cast(Attr)) + // Do nothing. Each redeclaration should be suppressed separately. + NewAttr = nullptr; else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr)) NewAttr = cast(Attr->clone(S.Context)); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index d5526957937bbb..8a204b1d3b88d9 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5260,11 +5260,6 @@ static void handleSuppressAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // Suppression attribute with GSL spelling requires at least 1 argument. if (!AL.checkAtLeastNumArgs(S, 1)) return; - } else if (!isa(D)) { - // Analyzer suppression applies only to variables and statements. - S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str) - << AL << 0 << "variables and statements"; - return; } std::vector DiagnosticIdentifiers; diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp index 1c2d84254d4645..2f2df63468b4b1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp @@ -161,8 +161,8 @@ static void checkObjCUnusedIvar(const ObjCImplementationDecl *D, PathDiagnosticLocation L = PathDiagnosticLocation::create(Ivar, BR.getSourceManager()); - BR.EmitBasicReport(D, Checker, "Unused instance variable", "Optimization", - os.str(), L); + BR.EmitBasicReport(ID, Checker, "Unused instance variable", + "Optimization", os.str(), L); } } diff --git a/clang/lib/StaticAnalyzer/Core/BugSuppression.cpp b/clang/lib/StaticAnalyzer/Core/BugSuppression.cpp index fded071567f958..84004b8e5c1cdb 100644 --- a/clang/lib/StaticAnalyzer/Core/BugSuppression.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugSuppression.cpp @@ -82,12 +82,12 @@ class CacheInitializer : public RecursiveASTVisitor { CacheInitializer(ToInit).TraverseDecl(const_cast(D)); } - bool VisitVarDecl(VarDecl *VD) { + bool VisitDecl(Decl *D) { // Bug location could be somewhere in the init value of // a freshly declared variable. Even though it looks like the // user applied attribute to a statement, it will apply to a // variable declaration, and this is where we check for it. - return VisitAttributedNode(VD); + return VisitAttributedNode(D); } bool VisitAttributedStmt(AttributedStmt *AS) { @@ -147,6 +147,20 @@ bool BugSuppression::isSuppressed(const PathDiagnosticLocation &Location, // done as well as perform a lot of work we'll never need. // Gladly, none of our on-by-default checkers currently need it. DeclWithIssue = ACtx.getTranslationUnitDecl(); + } else { + // This is the fast path. However, we should still consider the topmost + // declaration that isn't TranslationUnitDecl, because we should respect + // attributes on the entire declaration chain. + while (true) { + // Use the "lexical" parent. Eg., if the attribute is on a class, suppress + // warnings in inline methods but not in out-of-line methods. + const Decl *Parent = + dyn_cast_or_null(DeclWithIssue->getLexicalDeclContext()); + if (Parent == nullptr || isa(Parent)) + break; + + DeclWithIssue = Parent; + } } // While some warnings are attached to AST nodes (mostly path-sensitive diff --git a/clang/test/Analysis/Checkers/WebKit/ref-cntbl-base-virtual-dtor.cpp b/clang/test/Analysis/Checkers/WebKit/ref-cntbl-base-virtual-dtor.cpp index 1fc59c108b0e83..5cf7e7614d06e6 100644 --- a/clang/test/Analysis/Checkers/WebKit/ref-cntbl-base-virtual-dtor.cpp +++ b/clang/test/Analysis/Checkers/WebKit/ref-cntbl-base-virtual-dtor.cpp @@ -13,7 +13,17 @@ struct DerivedWithVirtualDtor : RefCntblBase { virtual ~DerivedWithVirtualDtor() {} }; +// Confirm that the checker respects [[clang::suppress]] +struct [[clang::suppress]] SuppressedDerived : RefCntblBase { }; +struct [[clang::suppress]] SuppressedDerivedWithVirtualDtor : RefCntblBase { + virtual ~SuppressedDerivedWithVirtualDtor() {} +}; +// FIXME: Support attributes on base specifiers? Currently clang +// doesn't support such attributes at all, even though it knows +// how to parse them. +// +// struct SuppressedBaseSpecDerived : [[clang::suppress]] RefCntblBase { }; template struct DerivedClassTmpl : T { }; diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp index 30798793ceab14..27e0a74d583cd3 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp @@ -15,6 +15,11 @@ void raw_ptr() { // CHECK-NEXT:{{^ | }} ^ auto foo4 = [=](){ (void) ref_countable; }; // CHECK: warning: Implicitly captured raw-pointer 'ref_countable' to uncounted type is unsafe [webkit.UncountedLambdaCapturesChecker] + + // Confirm that the checker respects [[clang::suppress]]. + RefCountable* suppressed_ref_countable = nullptr; + [[clang::suppress]] auto foo5 = [suppressed_ref_countable](){}; + // CHECK-NOT: warning: Captured raw-pointer 'suppressed_ref_countable' to uncounted type is unsafe [webkit.UncountedLambdaCapturesChecker] } void references() { diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp index 8694d5fb85b8b4..0fcd3b21376caf 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp @@ -60,6 +60,7 @@ class Foo { // expected-warning@-1{{Local variable 'baz' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} auto *baz2 = this->provide_ref_ctnbl(); // expected-warning@-1{{Local variable 'baz2' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + [[clang::suppress]] auto *baz_suppressed = provide_ref_ctnbl(); // no-warning } }; } // namespace auto_keyword diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp index a0ea61e0e2a13b..108d5effdd2e8d 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp @@ -8,6 +8,9 @@ namespace members { RefCountable* a = nullptr; // expected-warning@-1{{Member variable 'a' in 'members::Foo' is a raw pointer to ref-countable type 'RefCountable'}} + [[clang::suppress]] + RefCountable* a_suppressed = nullptr; + protected: RefPtr b; @@ -25,8 +28,14 @@ namespace members { }; void forceTmplToInstantiate(FooTmpl) {} + + struct [[clang::suppress]] FooSuppressed { + private: + RefCountable* a = nullptr; + }; } + namespace ignore_unions { union Foo { RefCountable* a; diff --git a/clang/test/Analysis/ObjCRetSigs.m b/clang/test/Analysis/ObjCRetSigs.m index 97d33f9f5467b1..f92506a8341958 100644 --- a/clang/test/Analysis/ObjCRetSigs.m +++ b/clang/test/Analysis/ObjCRetSigs.m @@ -4,10 +4,12 @@ @interface MyBase -(long long)length; +-(long long)suppressedLength; @end @interface MySub : MyBase{} -(double)length; +-(double)suppressedLength; @end @implementation MyBase @@ -15,6 +17,10 @@ -(long long)length{ printf("Called MyBase -length;\n"); return 3; } +-(long long)suppressedLength{ + printf("Called MyBase -length;\n"); + return 3; +} @end @implementation MySub @@ -22,4 +28,8 @@ -(double)length{ // expected-warning{{types are incompatible}} printf("Called MySub -length;\n"); return 3.3; } +-(double)suppressedLength [[clang::suppress]]{ // no-warning + printf("Called MySub -length;\n"); + return 3.3; +} @end diff --git a/clang/test/Analysis/objc_invalidation.m b/clang/test/Analysis/objc_invalidation.m index 52a79d8f34baa2..e61b0897646a22 100644 --- a/clang/test/Analysis/objc_invalidation.m +++ b/clang/test/Analysis/objc_invalidation.m @@ -257,6 +257,17 @@ @interface MissingInvalidationMethod : Foo @implementation MissingInvalidationMethod @end +@interface SuppressedMissingInvalidationMethod : Foo +@property (assign) [[clang::suppress]] SuppressedMissingInvalidationMethod *foobar16_warn; +// FIXME: Suppression should have worked but decl-with-issue is the ivar, not the property. +#if RUN_IVAR_INVALIDATION +// expected-warning@-3 {{Property foobar16_warn needs to be invalidated; no invalidation method is defined in the @implementation for SuppressedMissingInvalidationMethod}} +#endif + +@end +@implementation SuppressedMissingInvalidationMethod +@end + @interface MissingInvalidationMethod2 : Foo { Foo *Ivar1; #if RUN_IVAR_INVALIDATION @@ -290,8 +301,10 @@ @implementation MissingInvalidationMethodDecl2 @end @interface InvalidatedInPartial : SomeInvalidationImplementingObject { - SomeInvalidationImplementingObject *Ivar1; - SomeInvalidationImplementingObject *Ivar2; + SomeInvalidationImplementingObject *Ivar1; + SomeInvalidationImplementingObject *Ivar2; + [[clang::suppress]] + SomeInvalidationImplementingObject *Ivar3; // no-warning } -(void)partialInvalidator __attribute__((annotate("objc_instance_variable_invalidator_partial"))); @end diff --git a/clang/test/Analysis/suppression-attr-doc.cpp b/clang/test/Analysis/suppression-attr-doc.cpp index 1208842799ed9a..ca4e665a082ce4 100644 --- a/clang/test/Analysis/suppression-attr-doc.cpp +++ b/clang/test/Analysis/suppression-attr-doc.cpp @@ -52,3 +52,17 @@ int bar2(bool coin_flip) { __attribute__((suppress)) return *result; // leak warning is suppressed only on this path } + +class [[clang::suppress]] C { + int foo() { + int *x = nullptr; + return *x; // warnings suppressed in the entire class + } + + int bar(); +}; + +int C::bar() { + int *x = nullptr; + return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} +} diff --git a/clang/test/Analysis/suppression-attr.cpp b/clang/test/Analysis/suppression-attr.cpp new file mode 100644 index 00000000000000..89bc3c47dbd51a --- /dev/null +++ b/clang/test/Analysis/suppression-attr.cpp @@ -0,0 +1,68 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s + +namespace [[clang::suppress]] +suppressed_namespace { + int foo() { + int *x = 0; + return *x; + } + + int foo_forward(); +} + +int suppressed_namespace::foo_forward() { + int *x = 0; + return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} +} + +// Another instance of the same namespace. +namespace suppressed_namespace { + int bar() { + int *x = 0; + return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} + } +} + +void lambda() { + [[clang::suppress]] { + auto lam = []() { + int *x = 0; + return *x; + }; + } +} + +class [[clang::suppress]] SuppressedClass { + int foo() { + int *x = 0; + return *x; + } + + int bar(); +}; + +int SuppressedClass::bar() { + int *x = 0; + return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} +} + +class SuppressedMethodClass { + [[clang::suppress]] int foo() { + int *x = 0; + return *x; + } + + [[clang::suppress]] int bar1(); + int bar2(); +}; + +int SuppressedMethodClass::bar1() { + int *x = 0; + return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} +} + +[[clang::suppress]] +int SuppressedMethodClass::bar2() { + int *x = 0; + return *x; // no-warning +} diff --git a/clang/test/Analysis/suppression-attr.m b/clang/test/Analysis/suppression-attr.m index 8ba8dda722721b..acef4b34fb09f3 100644 --- a/clang/test/Analysis/suppression-attr.m +++ b/clang/test/Analysis/suppression-attr.m @@ -168,17 +168,15 @@ void malloc_leak_suppression_2_1() { *x = 42; } -// TODO: reassess when we decide what to do with declaration annotations -void malloc_leak_suppression_2_2() /* SUPPRESS */ { +void malloc_leak_suppression_2_2() SUPPRESS { int *x = (int *)malloc(sizeof(int)); *x = 42; -} // expected-warning{{Potential leak of memory pointed to by 'x'}} +} // no-warning -// TODO: reassess when we decide what to do with declaration annotations -/* SUPPRESS */ void malloc_leak_suppression_2_3() { +SUPPRESS void malloc_leak_suppression_2_3() { int *x = (int *)malloc(sizeof(int)); *x = 42; -} // expected-warning{{Potential leak of memory pointed to by 'x'}} +} // no-warning void malloc_leak_suppression_2_4(int cond) { int *x = (int *)malloc(sizeof(int)); @@ -233,20 +231,15 @@ - (void)methodWhichMayFail:(NSError **)error { @interface TestSuppress : UIResponder { } -// TODO: reassess when we decide what to do with declaration annotations -@property(copy) /* SUPPRESS */ NSMutableString *mutableStr; -// expected-warning@-1 {{Property of mutable type 'NSMutableString' has 'copy' attribute; an immutable object will be stored instead}} +@property(copy) SUPPRESS NSMutableString *mutableStr; // no-warning @end @implementation TestSuppress -// TODO: reassess when we decide what to do with declaration annotations -- (BOOL)resignFirstResponder /* SUPPRESS */ { +- (BOOL)resignFirstResponder SUPPRESS { // no-warning return 0; -} // expected-warning {{The 'resignFirstResponder' instance method in UIResponder subclass 'TestSuppress' is missing a [super resignFirstResponder] call}} +} -// TODO: reassess when we decide what to do with declaration annotations -- (void)methodWhichMayFail:(NSError **)error /* SUPPRESS */ { - // expected-warning@-1 {{Method accepting NSError** should have a non-void return value to indicate whether or not an error occurred}} +- (void)methodWhichMayFail:(NSError **)error SUPPRESS { // no-warning } @end @@ -269,3 +262,40 @@ void ast_checker_suppress_1() { struct ABC *Abc; SUPPRESS { Abc = (struct ABC *)&Ab; } } + +SUPPRESS int suppressed_function() { + int *x = 0; + return *x; // no-warning +} + +SUPPRESS int suppressed_function_forward(); +int suppressed_function_forward() { + int *x = 0; + return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} +} + +int suppressed_function_backward(); +SUPPRESS int suppressed_function_backward() { + int *x = 0; + return *x; // no-warning +} + +SUPPRESS +@interface SuppressedInterface +-(int)suppressedMethod; +-(int)regularMethod SUPPRESS; +@end + +@implementation SuppressedInterface +-(int)suppressedMethod SUPPRESS { + int *x = 0; + return *x; // no-warning +} + +// This one is NOT suppressed by the attribute on the forward declaration, +// and it's also NOT suppressed by the attribute on the entire interface. +-(int)regularMethod { + int *x = 0; + return *x; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}} +} +@end diff --git a/clang/test/Analysis/unused-ivars.m b/clang/test/Analysis/unused-ivars.m index 32e7e80fc42768..8788804bf0c33c 100644 --- a/clang/test/Analysis/unused-ivars.m +++ b/clang/test/Analysis/unused-ivars.m @@ -44,6 +44,15 @@ - (void)setIvar:(id)newValue { } @end +// Confirm that the checker respects [[clang::suppress]]. +@interface TestC { +@private + [[clang::suppress]] int x; // no-warning +} +@end +@implementation TestC @end + + //===----------------------------------------------------------------------===// // Detect that ivar is in use, if used in category in the same file as the // implementation. @@ -125,4 +134,4 @@ @implementation Radar11059352 - (void)useWorkspace { NSString *workspacePathString = _workspacePath.pathString; } -@end \ No newline at end of file +@end diff --git a/clang/test/SemaCXX/attr-suppress.cpp b/clang/test/SemaCXX/attr-suppress.cpp index fb5e2ac7ce2066..e8f6d979880916 100644 --- a/clang/test/SemaCXX/attr-suppress.cpp +++ b/clang/test/SemaCXX/attr-suppress.cpp @@ -23,18 +23,16 @@ union [[gsl::suppress("type.1")]] U { float f; }; +// This doesn't really suppress anything but why not? [[clang::suppress]]; -// expected-error@-1 {{'suppress' attribute only applies to variables and statements}} namespace N { [[clang::suppress("in-a-namespace")]]; -// expected-error@-1 {{'suppress' attribute only applies to variables and statements}} } // namespace N [[clang::suppress]] int global = 42; [[clang::suppress]] void foo() { - // expected-error@-1 {{'suppress' attribute only applies to variables and statements}} [[clang::suppress]] int *p; [[clang::suppress]] int a = 0; // no-warning @@ -56,7 +54,11 @@ namespace N { } class [[clang::suppress("type.1")]] V { - // expected-error@-1 {{'suppress' attribute only applies to variables and statements}} int i; float f; }; + +// FIXME: There's no good reason why we shouldn't support this case. +// But it doesn't look like clang generally supports such attributes yet. +class W : [[clang::suppress]] public V { // expected-error{{'suppress' attribute cannot be applied to a base specifier}} +}; diff --git a/clang/test/SemaObjC/attr-suppress.m b/clang/test/SemaObjC/attr-suppress.m index ade8f94ec5895e..c12da097bf8442 100644 --- a/clang/test/SemaObjC/attr-suppress.m +++ b/clang/test/SemaObjC/attr-suppress.m @@ -6,8 +6,7 @@ SUPPRESS1 int global = 42; SUPPRESS1 void foo() { - // expected-error@-1 {{'suppress' attribute only applies to variables and statements}} - SUPPRESS1 int *p; + SUPPRESS1 int *p; // no-warning SUPPRESS1 int a = 0; // no-warning SUPPRESS2() @@ -28,23 +27,19 @@ SUPPRESS1 switch (a) { // no-warning // GNU-style attributes and C++11 attributes apply to different things when // written like this. GNU attribute gets attached to the declaration, while // C++11 attribute ends up on the type. - int SUPPRESS2("r") z; - SUPPRESS2(foo) + int SUPPRESS2("r") z; // no-warning + SUPPRESS2(foo) // no-warning float f; // expected-error@-2 {{expected string literal as argument of 'suppress' attribute}} } -union SUPPRESS2("type.1") U { - // expected-error@-1 {{'suppress' attribute only applies to variables and statements}} +union SUPPRESS2("type.1") U { // no-warning int i; float f; }; -SUPPRESS1 @interface Test { - // expected-error@-1 {{'suppress' attribute only applies to variables and statements}} +SUPPRESS1 @interface Test { // no-warning } -@property SUPPRESS2("prop") int *prop; -// expected-error@-1 {{'suppress' attribute only applies to variables and statements}} -- (void)bar:(int)x SUPPRESS1; -// expected-error@-1 {{'suppress' attribute only applies to variables and statements}} +@property SUPPRESS2("prop") int *prop; // no-warning +- (void)bar:(int)x SUPPRESS1; // no-warning @end From 0de2b26942f890a6ec84cd75ac7abe3f6f2b2e37 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 13 Feb 2024 14:59:28 -0800 Subject: [PATCH 071/240] [RISCV] Register fixed stack slots for callee saved registers for -msave-restore/Zcmp (#81392) PEI previously used fake frame indices for these callee saved registers. These fake frame indices are not register with MachineFrameInfo. This required them to be deleted form CalleeSavedInfo after PEI to avoid breaking later passes. See #79535 Unfortunately, removing the registers from CalleeSavedInfo pessimizes Interprocedural Register Allocation. The RegUsageInfoCollector pass runs after PEI and uses CalleeSavedInfo. This patch replaces #79535 by properly creating fixed stack objects through MachineFrameInfo. This changes the stack size and offsets returned by MachineFrameInfo which requires changes to how RISCVFrameLowering uses that information. In addition to the individual object for each register, I've also create a single large fixed object that covers the entire stack area covered by cm.push or the libcalls. cm.push must always push a multiple of 16 bytes and the save restore libcall pushes a multiple of stack align. I think this leaves holes in the stack where we could spill other registers, but it matches what we did previously. Maybe we can optimize this in the future. The only test changes are due to stack alignment handling after the callee save registers. Since we now have the fixed objects, on the stack the offset is non-zero when an aligned object is processed so the offset gets rounded up, increasing the stack size. I suspect we might need some more updates for RVV related code. There is very little or maybe even no testing of RVV mixed with Zcmp and save-restore. --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 163 +++++++++++------- llvm/lib/Target/RISCV/RISCVFrameLowering.h | 10 +- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 34 ---- llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 3 - .../test/CodeGen/RISCV/calling-conv-ilp32e.ll | 116 ++++++------- 5 files changed, 163 insertions(+), 163 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 37672dd047f2d1..7e3dcb3283caba 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -291,9 +291,7 @@ static Register getMaxPushPopReg(const MachineFunction &MF, const std::vector &CSI) { Register MaxPushPopReg = RISCV::NoRegister; for (auto &CS : CSI) { - // RISCVRegisterInfo::hasReservedSpillSlot assigns negative frame indices to - // registers which can be saved by Zcmp Push. - if (CS.getFrameIdx() < 0) + if (llvm::is_contained(AllPopRegs, CS.getReg().id())) MaxPushPopReg = std::max(MaxPushPopReg.id(), CS.getReg().id()); } // if rlist is {rs, s0-s10}, then s11 will also be included @@ -532,8 +530,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // FIXME (note copied from Lanai): This appears to be overallocating. Needs // investigation. Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = getStackSizeWithRVVPadding(MF); - uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize(); + uint64_t RealStackSize = getStackSizeWithRVVPadding(MF); + uint64_t StackSize = RealStackSize - RVFI->getReservedSpillsSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); // Early exit if there is no need to allocate on the stack @@ -590,20 +588,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // directives. for (const auto &Entry : CSI) { int FrameIdx = Entry.getFrameIdx(); - int64_t Offset; - // Offsets for objects with fixed locations (IE: those saved by libcall) are - // simply calculated from the frame index. - if (FrameIdx < 0) { - if (RVFI->isPushable(MF)) { - // Callee-saved register stored by Zcmp push is in reverse order. - Offset = -(FrameIdx + RVFI->getRVPushRegs() + 1) * - (int64_t)STI.getXLen() / 8; - } else { - Offset = FrameIdx * (int64_t)STI.getXLen() / 8; - } - } else { - Offset = MFI.getObjectOffset(FrameIdx) - RVFI->getReservedSpillsSize(); - } + int64_t Offset = MFI.getObjectOffset(FrameIdx); Register Reg = Entry.getReg(); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, RI->getDwarfRegNum(Reg, true), Offset)); @@ -746,8 +731,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, if (!CSI.empty()) LastFrameDestroy = std::prev(MBBI, CSI.size()); - uint64_t StackSize = getStackSizeWithRVVPadding(MF); - uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize(); + uint64_t RealStackSize = getStackSizeWithRVVPadding(MF); + uint64_t StackSize = RealStackSize - RVFI->getReservedSpillsSize(); uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -897,8 +882,6 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, if (FrameReg == getFPReg(STI)) { Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); - if (FI >= 0) - Offset -= StackOffset::getFixed(RVFI->getReservedSpillsSize()); // When using FP to access scalable vector objects, we need to minus // the frame size. // @@ -965,8 +948,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, if (MFI.isFixedObjectIndex(FI)) { assert(!RI->hasStackRealignment(MF) && "Can't index across variable sized realign"); - Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) + - RVFI->getReservedSpillsSize(), + Offset += StackOffset::get(getStackSizeWithRVVPadding(MF), RVFI->getRVVStackSize()); } else { Offset += StackOffset::getFixed(MFI.getStackSize()); @@ -1243,16 +1225,10 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( RVFI->setBranchRelaxationScratchFrameIndex(FI); } - if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF) || - RVFI->isPushable(MF)) { - RVFI->setCalleeSavedStackSize(0); - return; - } - - unsigned Size = 0; + unsigned Size = RVFI->getReservedSpillsSize(); for (const auto &Info : MFI.getCalleeSavedInfo()) { int FrameIdx = Info.getFrameIdx(); - if (MFI.getStackID(FrameIdx) != TargetStackID::Default) + if (FrameIdx < 0 || MFI.getStackID(FrameIdx) != TargetStackID::Default) continue; Size += MFI.getObjectSize(FrameIdx); @@ -1260,30 +1236,6 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( RVFI->setCalleeSavedStackSize(Size); } -void RISCVFrameLowering::processFunctionBeforeFrameIndicesReplaced( - MachineFunction &MF, RegScavenger *RS) const { - // Remove CalleeSavedInfo for registers saved by Zcmp or save/restore - // libcalls. - MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const auto *RVFI = MF.getInfo(); - if (!RVFI->isPushable(MF) && !RVFI->useSaveRestoreLibCalls(MF)) - return; - const std::vector &CSIs = MFI.getCalleeSavedInfo(); - std::vector NewCSIs; - for (const auto &CSI : CSIs) { - // Skip CSRs that have fake a frame index. - int ReservedFI = 0; - if (TRI->hasReservedSpillSlot(MF, CSI.getReg(), ReservedFI)) { - assert(CSI.getFrameIdx() == ReservedFI && - "Reserved CSR spill slot frame index mismatch in CSI"); - continue; - } - NewCSIs.push_back(CSI); - } - MFI.setCalleeSavedInfo(std::move(NewCSIs)); -} - // Not preserve stack space within prologue for outgoing variables when the // function contains variable size objects or there are vector objects accessed // by the frame pointer. @@ -1403,6 +1355,93 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { return 0; } +// Offsets which need to be scale by XLen representing locations of CSRs which +// are given a fixed location by save/restore libcalls or Zcmp Push/Pop. +static const std::pair FixedCSRFIMap[] = { + {/*ra*/ RISCV::X1, -1}, {/*s0*/ RISCV::X8, -2}, + {/*s1*/ RISCV::X9, -3}, {/*s2*/ RISCV::X18, -4}, + {/*s3*/ RISCV::X19, -5}, {/*s4*/ RISCV::X20, -6}, + {/*s5*/ RISCV::X21, -7}, {/*s6*/ RISCV::X22, -8}, + {/*s7*/ RISCV::X23, -9}, {/*s8*/ RISCV::X24, -10}, + {/*s9*/ RISCV::X25, -11}, {/*s10*/ RISCV::X26, -12}, + {/*s11*/ RISCV::X27, -13}}; + +bool RISCVFrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector &CSI, unsigned &MinCSFrameIndex, + unsigned &MaxCSFrameIndex) const { + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return true; + + auto *RVFI = MF.getInfo(); + + if (RVFI->isPushable(MF)) { + // Determine how many GPRs we need to push and save it to RVFI. + Register MaxReg = getMaxPushPopReg(MF, CSI); + if (MaxReg != RISCV::NoRegister) { + auto [RegEnc, PushedRegNum] = getPushPopEncodingAndNum(MaxReg); + RVFI->setRVPushRegs(PushedRegNum); + RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16)); + + // Use encoded number to represent registers to spill. + RVFI->setRVPushRlist(RegEnc); + } + } + + MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + unsigned Size = RegInfo->getSpillSize(*RC); + + // This might need a fixed stack slot. + if (RVFI->useSaveRestoreLibCalls(MF) || RVFI->isPushable(MF)) { + const auto *FII = llvm::find_if( + FixedCSRFIMap, [&](auto P) { return P.first == CS.getReg(); }); + if (FII != std::end(FixedCSRFIMap)) { + int64_t Offset; + if (RVFI->isPushable(MF)) + Offset = -((FII->second + RVFI->getRVPushRegs() + 1) * (int64_t)Size); + else + Offset = FII->second * (int64_t)Size; + + int FrameIdx = MFI.CreateFixedSpillStackObject(Size, Offset); + assert(FrameIdx < 0); + CS.setFrameIdx(FrameIdx); + continue; + } + } + + // Not a fixed slot. + Align Alignment = RegInfo->getSpillAlign(*RC); + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Alignment = std::min(Alignment, getStackAlign()); + int FrameIdx = MFI.CreateStackObject(Size, Alignment, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) + MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) + MaxCSFrameIndex = FrameIdx; + CS.setFrameIdx(FrameIdx); + } + + // Allocate a fixed object that covers the full push or libcall size. + if (RVFI->isPushable(MF)) { + if (int64_t PushSize = RVFI->getRVPushStackSize()) + MFI.CreateFixedSpillStackObject(PushSize, -PushSize); + } else if (int LibCallRegs = getLibCallID(MF, CSI) + 1) { + int64_t LibCallFrameSize = + alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign()); + MFI.CreateFixedSpillStackObject(LibCallFrameSize, -LibCallFrameSize); + } + + return true; +} + bool RISCVFrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, const TargetRegisterInfo *TRI) const { @@ -1418,14 +1457,10 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( // Emit CM.PUSH with base SPimm & evaluate Push stack RISCVMachineFunctionInfo *RVFI = MF->getInfo(); if (RVFI->isPushable(*MF)) { - Register MaxReg = getMaxPushPopReg(*MF, CSI); - if (MaxReg != RISCV::NoRegister) { - auto [RegEnc, PushedRegNum] = getPushPopEncodingAndNum(MaxReg); - RVFI->setRVPushRegs(PushedRegNum); - RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16)); - + unsigned PushedRegNum = RVFI->getRVPushRegs(); + if (PushedRegNum > 0) { // Use encoded number to represent registers to spill. - RVFI->setRVPushRlist(RegEnc); + int RegEnc = RVFI->getRVPushRlist(); MachineInstrBuilder PushBuilder = BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH)) .setMIFlag(MachineInstr::FrameSetup); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index a784479f111b11..210f8c1064724a 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -37,10 +37,6 @@ class RISCVFrameLowering : public TargetFrameLowering { void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; - void - processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, - RegScavenger *RS) const override; - bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; @@ -49,6 +45,12 @@ class RISCVFrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + + bool assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI, + unsigned &MinCSFrameIndex, + unsigned &MaxCSFrameIndex) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 30457f528853b8..ca519dbc4c0359 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -156,40 +156,6 @@ const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } -// Frame indexes representing locations of CSRs which are given a fixed location -// by save/restore libcalls or Zcmp Push/Pop. -static const std::pair FixedCSRFIMap[] = { - {/*ra*/ RISCV::X1, -1}, - {/*s0*/ RISCV::X8, -2}, - {/*s1*/ RISCV::X9, -3}, - {/*s2*/ RISCV::X18, -4}, - {/*s3*/ RISCV::X19, -5}, - {/*s4*/ RISCV::X20, -6}, - {/*s5*/ RISCV::X21, -7}, - {/*s6*/ RISCV::X22, -8}, - {/*s7*/ RISCV::X23, -9}, - {/*s8*/ RISCV::X24, -10}, - {/*s9*/ RISCV::X25, -11}, - {/*s10*/ RISCV::X26, -12}, - {/*s11*/ RISCV::X27, -13} -}; - -bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, - Register Reg, - int &FrameIdx) const { - const auto *RVFI = MF.getInfo(); - if (!RVFI->useSaveRestoreLibCalls(MF) && !RVFI->isPushable(MF)) - return false; - - const auto *FII = - llvm::find_if(FixedCSRFIMap, [&](auto P) { return P.first == Reg; }); - if (FII == std::end(FixedCSRFIMap)) - return false; - - FrameIdx = FII->second; - return true; -} - void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, Register DestReg, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 8b729caa5f713f..431ea23b3e2d04 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -35,9 +35,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const uint32_t *getNoPreservedMask() const override; - bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg, - int &FrameIdx) const override; - // Update DestReg to have the value SrcReg plus an offset. This is // used during frame layout, and we may need to ensure that if we // split the offset internally that the DestReg is always aligned, diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll index 5c55113fc97497..d08cf577b1bdd3 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -731,11 +731,11 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_aligned_stack: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -48 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 18 @@ -776,18 +776,18 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 13 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 32(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_aligned_stack -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -56 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 56 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_aligned_stack: ; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -48 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 18 @@ -828,8 +828,8 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 13 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 32(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_aligned_stack -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -56 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 56 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %1 = call i32 @callee_aligned_stack(i32 1, i32 11, fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, @@ -1431,11 +1431,11 @@ define i32 @caller_large_scalars() { ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalars: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -32 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 48 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 48 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 @@ -1443,26 +1443,26 @@ define i32 @caller_large_scalars() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 32(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 24 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -40 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 40 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalars: ; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -32 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 48 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 48 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 @@ -1470,16 +1470,16 @@ define i32 @caller_large_scalars() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 32(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 24 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 24(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -40 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 40 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000) ret i32 %1 @@ -1688,18 +1688,18 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalars_exhausted_regs: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -48 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 9 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 40 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp) @@ -1708,37 +1708,37 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 52(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 48(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 40(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -56 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -64 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 56 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalars_exhausted_regs: ; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -48 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 56 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 9 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 40 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp) @@ -1747,20 +1747,20 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 52(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 48(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 40(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -56 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 48 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -64 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 56 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %1 = call i32 @callee_large_scalars_exhausted_regs( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, @@ -2346,33 +2346,33 @@ define void @caller_large_scalar_ret() { ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: caller_large_scalar_ret: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 32 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 32 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a0, sp ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalar_ret -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -24 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -32 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 24 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: tail __riscv_restore_1 ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: caller_large_scalar_ret: ; ILP32E-WITHFP-SAVE-RESTORE: # %bb.0: ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call t0, __riscv_save_1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, -24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 32 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset ra, -4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 32 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a0, sp ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalar_ret -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -24 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -32 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 24 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %1 = call fp128 @callee_large_scalar_ret() ret void From cb1a9f70ecb22d48df1919bd54daf64bfaa08864 Mon Sep 17 00:00:00 2001 From: Danila Malyutin Date: Wed, 14 Feb 2024 03:16:32 +0400 Subject: [PATCH 072/240] [InstSimplify] Add trivial simplifications for gc.relocate intrinsic (#81639) Fold gc.relocate of undef and null to undef and null respectively. Similar transform is currently done by instcombine, but there is no reason to not include it here as well. --- llvm/lib/Analysis/InstructionSimplify.cpp | 22 +++++++++++++++ .../Transforms/InstSimplify/gc_relocate.ll | 27 ++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 51e258d69e9e2e..333b38f221cfc4 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/Support/KnownBits.h" #include #include @@ -6847,6 +6848,27 @@ static Value *simplifyIntrinsic(CallBase *Call, Value *Callee, } case Intrinsic::experimental_constrained_ldexp: return simplifyLdexp(Args[0], Args[1], Q, true); + case Intrinsic::experimental_gc_relocate: { + GCRelocateInst &GCR = *cast(Call); + Value *DerivedPtr = GCR.getDerivedPtr(); + Value *BasePtr = GCR.getBasePtr(); + + // Undef is undef, even after relocation. + if (isa(DerivedPtr) || isa(BasePtr)) { + return UndefValue::get(GCR.getType()); + } + + if (auto *PT = dyn_cast(GCR.getType())) { + // For now, the assumption is that the relocation of null will be null + // for most any collector. If this ever changes, a corresponding hook + // should be added to GCStrategy and this code should check it first. + if (isa(DerivedPtr)) { + // Use null-pointer of gc_relocate's type to replace it. + return ConstantPointerNull::get(PT); + } + } + return nullptr; + } default: return nullptr; } diff --git a/llvm/test/Transforms/InstSimplify/gc_relocate.ll b/llvm/test/Transforms/InstSimplify/gc_relocate.ll index 3f6de8b3845a23..894e5ed76584aa 100644 --- a/llvm/test/Transforms/InstSimplify/gc_relocate.ll +++ b/llvm/test/Transforms/InstSimplify/gc_relocate.ll @@ -11,9 +11,34 @@ define void @dead_relocate(ptr addrspace(1) %in) gc "statepoint-example" { ; entry: %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(ptr addrspace(1) %in)] - %a = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + %a = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) ret void } +define ptr addrspace(1) @relocate_undef() gc "statepoint-example" { +; CHECK-LABEL: @relocate_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(ptr addrspace(1) undef) ] +; CHECK-NEXT: ret ptr addrspace(1) undef +; +entry: + %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(ptr addrspace(1) undef)] + %a = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + ret ptr addrspace(1) %a +} + +define ptr addrspace(1) @relocate_null() gc "statepoint-example" { +; CHECK-LABEL: @relocate_null( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(ptr addrspace(1) null) ] +; CHECK-NEXT: ret ptr addrspace(1) null +; +entry: + %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(ptr addrspace(1) null)] + %a = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + ret ptr addrspace(1) %a +} + + declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32) From 4bc2a4f64ff71dae42c80faf34fa8aa7885f6b3d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 13 Feb 2024 18:21:27 -0500 Subject: [PATCH 073/240] [gn] fix typo in 8c56e78ec531 The missing trailing comma confuses the sync script. --- llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn index dc958934485ec1..a752b61bdbaabf 100644 --- a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn @@ -52,6 +52,6 @@ executable("lldb-dap") { "RunInTerminal.cpp", "SourceBreakpoint.cpp", "lldb-dap.cpp", - "Watchpoint.cpp" + "Watchpoint.cpp", ] } From bf3d5dbe2fc7e558b4627637de53ea2bcf6bb8eb Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 13 Feb 2024 18:31:32 -0500 Subject: [PATCH 074/240] [lld/ELF] fix typos to cycle bots --- lld/ELF/Arch/LoongArch.cpp | 6 +++--- lld/ELF/InputFiles.cpp | 6 +++--- lld/ELF/Thunks.cpp | 2 +- lld/test/ELF/arm-thumb-thunk-v6m-xo.s | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 3e9d6e0e742008..49fd979bd0a55c 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -75,7 +75,7 @@ enum Reg { // // Here a "page" is in fact just another way to refer to the 12-bit range // allowed by the immediate field of the addi/ld/st instructions, and not -// related to the system or the kernel's actual page size. The sematics happens +// related to the system or the kernel's actual page size. The semantics happen // to match the AArch64 `adrp`, so the concept of "page" is borrowed here. static uint64_t getLoongArchPage(uint64_t p) { return p & ~static_cast(0xfff); @@ -86,7 +86,7 @@ static uint32_t lo12(uint32_t val) { return val & 0xfff; } // Calculate the adjusted page delta between dest and PC. uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) { // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d - // + lu52i.d`, they must be adjancent so that we can infer the PC of + // + lu52i.d`, they must be adjacent so that we can infer the PC of // `pcalau12i` when calculating the page delta for the other two instructions // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit // complicated. Just use psABI recommended algorithm. @@ -539,7 +539,7 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, return; case R_LARCH_CALL36: { - // This relocation is designed for adjancent pcaddu18i+jirl pairs that + // This relocation is designed for adjacent pcaddu18i+jirl pairs that // are patched in one time. Because of sign extension of these insns' // immediate fields, the relocation range is [-128G - 0x20000, +128G - // 0x20000) (of course must be 4-byte aligned). diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 6c7ef27cbd4942..00aebb47640e84 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -41,8 +41,8 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -// This function is explicity instantiated in ARM.cpp, don't do it here to avoid -// warnings with MSVC. +// This function is explicitly instantiated in ARM.cpp, don't do it here to +// avoid warnings with MSVC. extern template void ObjFile::importCmseSymbols(); extern template void ObjFile::importCmseSymbols(); extern template void ObjFile::importCmseSymbols(); @@ -323,7 +323,7 @@ template static void doParseFile(InputFile *file) { // Add symbols in File to the symbol table. void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); } -// This function is explicity instantiated in ARM.cpp. Mark it extern here, +// This function is explicitly instantiated in ARM.cpp. Mark it extern here, // to avoid warnings when building with MSVC. extern template void ObjFile::importCmseSymbols(); extern template void ObjFile::importCmseSymbols(); diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 5f543ffdcfaa32..f912f61e372943 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -1338,7 +1338,7 @@ static Thunk *addThunkV6M(const InputSection &isec, RelType reloc, Symbol &s, return make(s, a); fatal("relocation " + toString(reloc) + " to " + toString(s) + - " not supported for Armv6-M targets for position independant" + " not supported for Armv6-M targets for position independent" " and execute only code"); } if (isPureCode) diff --git a/lld/test/ELF/arm-thumb-thunk-v6m-xo.s b/lld/test/ELF/arm-thumb-thunk-v6m-xo.s index 10f1e73f0b60a1..f1b6c0c194b38a 100644 --- a/lld/test/ELF/arm-thumb-thunk-v6m-xo.s +++ b/lld/test/ELF/arm-thumb-thunk-v6m-xo.s @@ -54,4 +54,4 @@ far: // CHECK-NEXT: : // CHECK-NEXT: 12345678: bx lr -// CHECK-PI: error: relocation R_ARM_THM_CALL to far not supported for Armv6-M targets for position independant and execute only code +// CHECK-PI: error: relocation R_ARM_THM_CALL to far not supported for Armv6-M targets for position independent and execute only code From a6b846ae1e58e11160185e427e20a995f6656859 Mon Sep 17 00:00:00 2001 From: ZijunZhaoCCK <88353225+ZijunZhaoCCK@users.noreply.github.com> Date: Tue, 13 Feb 2024 15:42:37 -0800 Subject: [PATCH 075/240] [libc++][ranges] Implement ranges::contains_subrange (#66963) --- libcxx/docs/Status/RangesAlgorithms.csv | 6 +- libcxx/include/CMakeLists.txt | 1 + .../__algorithm/ranges_contains_subrange.h | 99 ++++++ libcxx/include/algorithm | 14 + libcxx/include/libcxx.imp | 1 + libcxx/include/module.modulemap.in | 1 + libcxx/modules/std/algorithm.inc | 2 - ...obust_against_copying_projections.pass.cpp | 4 + ...nges.nodiscard_extensions.compile.pass.cpp | 2 + .../ranges.nodiscard_extensions.verify.cpp | 4 + .../ranges.contains_subrange.pass.cpp | 320 ++++++++++++++++++ .../niebloid.compile.pass.cpp | 1 + 12 files changed, 450 insertions(+), 5 deletions(-) create mode 100644 libcxx/include/__algorithm/ranges_contains_subrange.h create mode 100644 libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp diff --git a/libcxx/docs/Status/RangesAlgorithms.csv b/libcxx/docs/Status/RangesAlgorithms.csv index 2fe530bf75fd94..f7a51f732c4b14 100644 --- a/libcxx/docs/Status/RangesAlgorithms.csv +++ b/libcxx/docs/Status/RangesAlgorithms.csv @@ -3,13 +3,13 @@ C++20,all C++20 algorithms,N/A,N/A,✅ C++23,`find_last `_,Unassigned,No patch yet,Not started C++23,`find_last_if `_,Unassigned,No patch yet,Not started C++23,`find_last_if_not `_,Unassigned,No patch yet,Not started -C++23,`starts_with `_,Zijun Zhao,`D150735 `_,✅ -C++23,`ends_with `_,Zijun Zhao,No patch yet,In Progress +C++23,`starts_with `_,Zijun Zhao,`D150735 `_,Complete +C++23,`ends_with `_,Zijun Zhao, `D150831 `_,Complete C++23,`shift_left `_,Unassigned,No patch yet,Not started C++23,`shift_right `_,Unassigned,No patch yet,Not started C++23,`iota (algorithm) `_,Unassigned,No patch yet,Not started C++23,`fold `_,Unassigned,No patch yet,Not started -C++23,`contains `_,Zijun Zhao,No patch yet,In Progress +C++23,`contains `_,Zijun Zhao, `#65148 `_,Complete C++23,`fold_left_with_iter `_,Christopher Di Bella,N/A,Complete C++23,`fold_left `_,Christopher Di Bella,N/A,Complete C++23,`fold_left_first_with_iter `_,Christopher Di Bella,N/A,In progress diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index d55dc66a91bc7d..b44068357e7089 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -110,6 +110,7 @@ set(files __algorithm/ranges_binary_search.h __algorithm/ranges_clamp.h __algorithm/ranges_contains.h + __algorithm/ranges_contains_subrange.h __algorithm/ranges_copy.h __algorithm/ranges_copy_backward.h __algorithm/ranges_copy_if.h diff --git a/libcxx/include/__algorithm/ranges_contains_subrange.h b/libcxx/include/__algorithm/ranges_contains_subrange.h new file mode 100644 index 00000000000000..4cd03cbb537060 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_contains_subrange.h @@ -0,0 +1,99 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_CONTAINS_SUBRANGE_H +#define _LIBCPP___ALGORITHM_RANGES_CONTAINS_SUBRANGE_H + +#include <__algorithm/ranges_search.h> +#include <__config> +#include <__functional/identity.h> +#include <__functional/ranges_operations.h> +#include <__functional/reference_wrapper.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/indirectly_comparable.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/subrange.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __contains_subrange { +struct __fn { + template _Sent1, + forward_iterator _Iter2, + sentinel_for<_Iter2> _Sent2, + class _Pred = ranges::equal_to, + class _Proj1 = identity, + class _Proj2 = identity> + requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool static operator()( + _Iter1 __first1, + _Sent1 __last1, + _Iter2 __first2, + _Sent2 __last2, + _Pred __pred = {}, + _Proj1 __proj1 = {}, + _Proj2 __proj2 = {}) { + auto __n2 = ranges::distance(__first2, __last2); + if (__n2 == 0) + return true; + + auto __ret = ranges::search( + std::move(__first1), __last1, std::move(__first2), __last2, __pred, std::ref(__proj1), std::ref(__proj2)); + return __ret.empty() == false; + } + + template + requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool static + operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { + auto __n2 = 0; + if constexpr (sized_range<_Range2>) { + __n2 = ranges::size(__range2); + } else { + __n2 = std::distance(cbegin(__range2), cend(__range2)); + } + if (__n2 == 0) + return true; + + auto __ret = ranges::search(__range1, __range2, __pred, std::ref(__proj1), std::ref(__proj2)); + return __ret.empty() == false; + } +}; +} // namespace __contains_subrange + +inline namespace __cpo { +inline constexpr auto contains_subrange = __contains_subrange::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ALGORITHM_RANGES_CONTAINS_SUBRANGE_H diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 1176602a2b6951..70e30bc87e8128 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -217,6 +217,19 @@ namespace ranges { constexpr ranges::minmax_element_result> minmax_element(R&& r, Comp comp = {}, Proj proj = {}); // since C++20 + template S1, + forward_iterator I2, sentinel_for S2, + class Pred = ranges::equal_to, class Proj1 = identity, class Proj2 = identity> + requires indirectly_comparable + constexpr bool contains_subrange(I1 first1, S1 last1, I2 first2, S2 last2, + Pred pred = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++23 + + template + requires indirectly_comparable, iterator_t, Pred, Proj1, Proj2> + constexpr bool contains_subrange(R1&& r1, R2&& r2, Pred pred = {}, + Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++23 + template using copy_result = in_out_result; // since C++20 @@ -1875,6 +1888,7 @@ template #include <__algorithm/ranges_binary_search.h> #include <__algorithm/ranges_clamp.h> #include <__algorithm/ranges_contains.h> +#include <__algorithm/ranges_contains_subrange.h> #include <__algorithm/ranges_copy.h> #include <__algorithm/ranges_copy_backward.h> #include <__algorithm/ranges_copy_if.h> diff --git a/libcxx/include/libcxx.imp b/libcxx/include/libcxx.imp index 69de4705f37886..3f056d418f47cf 100644 --- a/libcxx/include/libcxx.imp +++ b/libcxx/include/libcxx.imp @@ -110,6 +110,7 @@ { include: [ "<__algorithm/ranges_binary_search.h>", "private", "", "public" ] }, { include: [ "<__algorithm/ranges_clamp.h>", "private", "", "public" ] }, { include: [ "<__algorithm/ranges_contains.h>", "private", "", "public" ] }, + { include: [ "<__algorithm/ranges_contains_subrange.h>", "private", "", "public" ] }, { include: [ "<__algorithm/ranges_copy.h>", "private", "", "public" ] }, { include: [ "<__algorithm/ranges_copy_backward.h>", "private", "", "public" ] }, { include: [ "<__algorithm/ranges_copy_if.h>", "private", "", "public" ] }, diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index f3246f808b9d02..63af3a90d88b9b 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -778,6 +778,7 @@ module std_private_algorithm_ranges_clamp [system export std_private_functional_ranges_operations } module std_private_algorithm_ranges_contains [system] { header "__algorithm/ranges_contains.h" } +module std_private_algorithm_ranges_contains_subrange [system] { header "__algorithm/ranges_contains_subrange.h" } module std_private_algorithm_ranges_copy [system] { header "__algorithm/ranges_copy.h" export std_private_algorithm_in_out_result diff --git a/libcxx/modules/std/algorithm.inc b/libcxx/modules/std/algorithm.inc index 75e8a3af78dea2..e7796bfa26af81 100644 --- a/libcxx/modules/std/algorithm.inc +++ b/libcxx/modules/std/algorithm.inc @@ -46,9 +46,7 @@ export namespace std { // [alg.contains], contains namespace ranges { using std::ranges::contains; -#if 0 using std::ranges::contains_subrange; -#endif } // namespace ranges #endif // _LIBCPP_STD_VER >= 23 diff --git a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp index e96a57f4005e04..71823d9afc1a4b 100644 --- a/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp +++ b/libcxx/test/libcxx/algorithms/ranges_robust_against_copying_projections.pass.cpp @@ -86,6 +86,10 @@ constexpr bool all_the_algorithms() assert(copies == 0); (void)std::ranges::contains(a, value, Proj(&copies)); assert(copies == 0); + (void)std::ranges::contains_subrange(first, last, first2, last2, Equal(), Proj(&copies), Proj(&copies)); + assert(copies == 0); + (void)std::ranges::contains_subrange(a, b, Equal(), Proj(&copies), Proj(&copies)); + assert(copies == 0); #endif (void)std::ranges::count(first, last, value, Proj(&copies)); assert(copies == 0); (void)std::ranges::count(a, value, Proj(&copies)); assert(copies == 0); diff --git a/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp index 19e07b83079c78..12f6b271583693 100644 --- a/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp +++ b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp @@ -33,6 +33,8 @@ void test() { #if TEST_STD_VER >= 23 std::ranges::contains(range, 1); std::ranges::contains(iter, iter, 1); + std::ranges::contains_subrange(range, range); + std::ranges::contains_subrange(iter, iter, iter, iter); #endif std::ranges::count_if(range, pred); std::ranges::count_if(iter, iter, pred); diff --git a/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp index 5e45ad086cbd08..57ce6ab1e6b11c 100644 --- a/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp @@ -95,6 +95,10 @@ void test() { // expected-warning@-1{{ignoring return value of function declared with 'nodiscard' attribute}} std::ranges::contains(iter, iter, 1); // expected-warning@-1{{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::contains_subrange(range, range); + // expected-warning@-1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::contains_subrange(iter, iter, iter, iter); + // expected-warning@-1 {{ignoring return value of function declared with 'nodiscard' attribute}} std::ranges::fold_left(range, 0, std::plus()); // expected-warning@-1{{ignoring return value of function declared with 'nodiscard' attribute}} std::ranges::fold_left(iter, iter, 0, std::plus()); diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp new file mode 100644 index 00000000000000..d48ee9e4e7e02e --- /dev/null +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp @@ -0,0 +1,320 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=2000000 + +// template S1, +// forward_iterator I2, sentinel_for S2, class Proj = identity> +// requires indirectly_comparable +// constexpr bool ranges::contains_subrange(I1 first1, S1 last1, I2 first2, S2 last2, +// Pred pred = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++23 + +// template +// requires indirectly_comparable, iterator_t, Pred, Proj1, Proj2> +// constexpr bool ranges::contains_subrange(R1&& r1, R2&& r2, Pred pred = {}, +// Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++23 + +#include +#include +#include +#include +#include + +#include "almost_satisfies_types.h" +#include "test_iterators.h" + +struct NotEqualityComparable {}; + +template +concept HasContainsSubrangeIt = requires(Iter1 first1, Sent1 last1, Iter2 first2, Sent2 last2) { + std::ranges::contains_subrange(first1, last1, first2, last2); +}; + +static_assert(HasContainsSubrangeIt); +static_assert(!HasContainsSubrangeIt); +static_assert(!HasContainsSubrangeIt); +static_assert(!HasContainsSubrangeIt); +static_assert(!HasContainsSubrangeIt); // not indirectly comparable +static_assert(!HasContainsSubrangeIt); +static_assert(!HasContainsSubrangeIt); +static_assert(!HasContainsSubrangeIt); +static_assert(!HasContainsSubrangeIt); +static_assert(!HasContainsSubrangeIt); + +template > +concept HasContainsSubrangeR = requires(Range1&& range1, Range2&& range2) { + std::ranges::contains_subrange(std::forward(range1), std::forward(range2)); +}; + +static_assert(HasContainsSubrangeR>); +static_assert(!HasContainsSubrangeR); +static_assert(!HasContainsSubrangeR); +static_assert(!HasContainsSubrangeR); +static_assert(!HasContainsSubrangeR); +static_assert(!HasContainsSubrangeR, UncheckedRange>); // not indirectly comparable +static_assert(!HasContainsSubrangeR, ForwardRangeNotDerivedFrom>); +static_assert(!HasContainsSubrangeR, ForwardRangeNotIncrementable>); +static_assert(!HasContainsSubrangeR, ForwardRangeNotSentinelSemiregular>); +static_assert(!HasContainsSubrangeR, ForwardRangeNotSentinelEqualityComparableWith>); + +template +constexpr void test_iterators() { + { // simple tests + int a[] = {1, 2, 3, 4, 5, 6}; + int p[] = {3, 4, 5}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + std::same_as decltype(auto) ret = + std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(ret); + } + { + std::same_as decltype(auto) ret = std::ranges::contains_subrange(whole, subrange); + assert(ret); + } + } + + { // no match + int a[] = {1, 2, 3, 4, 5, 6}; + int p[] = {3, 4, 2}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(!ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(!ret); + } + } + + { // range consists of just one element + int a[] = {3}; + int p[] = {3, 4, 2}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(!ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(!ret); + } + } + + { // subrange consists of just one element + int a[] = {23, 1, 20, 3, 54, 2}; + int p[] = {3}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(ret); + } + } + + { // range has zero length + int a[] = {}; + int p[] = {3, 4, 2}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(a))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(!ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(!ret); + } + } + + { // subrange has zero length + int a[] = {3, 4, 2}; + int p[] = {}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(p))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(ret); + } + } + + { // range and subrange both have zero length + int a[] = {}; + int p[] = {}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(a))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(p))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(ret); + } + } + + { // range and subrange are identical + int a[] = {3, 4, 11, 32, 54, 2}; + int p[] = {3, 4, 11, 32, 54, 2}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(ret); + } + } + + { // subrange is longer than range + int a[] = {3, 4, 2}; + int p[] = {23, 3, 4, 2, 11, 32, 54, 2}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(!ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(!ret); + } + } + + { // subrange is the prefix + int a[] = {3, 43, 5, 100, 433, 278, 6457, 900}; + int p[] = {3, 43, 5}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(ret); + } + } + + { // subrange is the suffix + int a[] = {3, 43, 5, 7, 68, 100, 433, 900}; + int p[] = {100, 433, 900}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(ret); + } + } + + { // subrange is a subsequence + int a[] = {23, 1, 0, 54, 2}; + int p[] = {1, 0, 2}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(!ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(!ret); + } + } + + { // repeated subrange + int a[] = {23, 1, 0, 2, 54, 1, 0, 2, 23, 33}; + int p[] = {1, 0, 2}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange); + assert(ret); + } + } + + { // check that the predicate is used + int a[] = {23, 81, 61, 0, 42, 25, 1, 2, 1, 29, 2}; + int p[] = {-1, -2, -1}; + auto pred = [](int l, int r) { return l * -1 == r; }; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + { + bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end(), pred); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange, pred); + assert(ret); + } + } + + { // check that the projections are used + int a[] = {1, 3, 15, 1, 2, 1, 8}; + int p[] = {2, 1, 2}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + auto proj1 = [](int i) { return i - 3; }; + auto proj2 = [](int i) { return i * -1; }; + { + bool ret = std::ranges::contains_subrange( + whole.begin(), whole.end(), subrange.begin(), subrange.end(), {}, proj1, proj2); + assert(ret); + } + { + bool ret = std::ranges::contains_subrange(whole, subrange, {}, proj1, proj2); + assert(ret); + } + } +} + +constexpr bool test() { + types::for_each(types::forward_iterator_list{}, [] { + types::for_each(types::forward_iterator_list{}, [] { + test_iterators(); + test_iterators>(); + test_iterators, Iter2, Iter2>(); + test_iterators, Iter2, sized_sentinel>(); + }); + }); + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp index 494e9fd19c3545..9506ca1c768bd7 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp @@ -67,6 +67,7 @@ static_assert(test(std::ranges::binary_search, a, 42)); static_assert(test(std::ranges::clamp, 42, 42, 42)); #if TEST_STD_VER >= 23 static_assert(test(std::ranges::contains, a, 42)); +static_assert(test(std::ranges::contains_subrange, a, a)); #endif static_assert(test(std::ranges::copy, a, a)); static_assert(test(std::ranges::copy_backward, a, a)); From 9168a2119cf6b5cc7105d201683bb0d1a84b441b Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 13 Feb 2024 23:42:41 +0000 Subject: [PATCH 076/240] [gn build] Port a6b846ae1e58 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 45125ce25085c4..b7c7de9f991576 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -188,6 +188,7 @@ if (current_toolchain == default_toolchain) { "__algorithm/ranges_binary_search.h", "__algorithm/ranges_clamp.h", "__algorithm/ranges_contains.h", + "__algorithm/ranges_contains_subrange.h", "__algorithm/ranges_copy.h", "__algorithm/ranges_copy_backward.h", "__algorithm/ranges_copy_if.h", From 3122969e8e2404c1eb0b9c660bd979e1001c42fd Mon Sep 17 00:00:00 2001 From: Aart Bik <39774503+aartbik@users.noreply.github.com> Date: Tue, 13 Feb 2024 15:55:42 -0800 Subject: [PATCH 077/240] [mlir][sparse] add doubly compressed test case to assembly op (#81687) Removes audit TODO --- .../ExecutionEngine/SparseTensor/Storage.h | 8 ---- .../SparseTensor/CPU/sparse_pack_d.mlir | 41 ++++++++++++++++++- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 14182172f4f622..eff1aca42d3e8f 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -465,9 +465,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase { /// Computes the assembled-size associated with the `l`-th level, /// given the assembled-size associated with the `(l-1)`-th level. - /// "Assembled-sizes" correspond to the (nominal) sizes of overhead - /// storage, as opposed to "level-sizes" which are the cardinality - /// of possible coordinates for that level. uint64_t assembledSize(uint64_t parentSz, uint64_t l) const { if (isCompressedLvl(l)) return positions[l][parentSz]; @@ -764,11 +761,6 @@ SparseTensorStorage::SparseTensorStorage( // Note that none of the buffers can be reused because ownership // of the memory passed from clients is not necessarily transferred. // Therefore, all data is copied over into a new SparseTensorStorage. - // - // TODO: this needs to be generalized to all formats AND - // we need a proper audit of e.g. double compressed - // levels where some are not filled - // uint64_t trailCOOLen = 0, parentSz = 1, bufIdx = 0; for (uint64_t l = 0; l < lvlRank; l++) { if (!isUniqueLvl(l) && (isCompressedLvl(l) || isLooseCompressedLvl(l))) { diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir index 55585a7c997430..c818c23bfa0f46 100755 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir @@ -23,6 +23,12 @@ // REDEFINE: %{sparsifier_opts} = enable-runtime-library=false // RUN: %{compile} | %{run} | FileCheck %s +#CCC = #sparse_tensor.encoding<{ + map = (d0, d1, d2) -> (d0 : compressed, d1 : compressed, d2 : compressed), + posWidth = 64, + crdWidth = 32 +}> + #BatchedCSR = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 : compressed), posWidth = 64, @@ -35,7 +41,9 @@ crdWidth = 32 }> -// Test with batched-CSR and CSR-dense. +// +// Test assembly operation with CCC, batched-CSR and CSR-dense. +// module { // // Main driver. @@ -44,6 +52,31 @@ module { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 + // + // Setup CCC. + // + + %data0 = arith.constant dense< + [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 ]> : tensor<8xf32> + %pos00 = arith.constant dense< + [ 0, 3 ]> : tensor<2xi64> + %crd00 = arith.constant dense< + [ 0, 2, 3 ]> : tensor<3xi32> + %pos01 = arith.constant dense< + [ 0, 2, 4, 5 ]> : tensor<4xi64> + %crd01 = arith.constant dense< + [ 0, 1, 1, 2, 1 ]> : tensor<5xi32> + %pos02 = arith.constant dense< + [ 0, 2, 4, 5, 7, 8 ]> : tensor<6xi64> + %crd02 = arith.constant dense< + [ 0, 1, 0, 1, 0, 0, 1, 0 ]> : tensor<8xi32> + + %s0 = sparse_tensor.assemble %data0, %pos00, %crd00, %pos01, %crd01, %pos02, %crd02 : + tensor<8xf32>, + tensor<2xi64>, tensor<3xi32>, + tensor<4xi64>, tensor<5xi32>, + tensor<6xi64>, tensor<8xi32> to tensor<4x3x2xf32, #CCC> + // // Setup BatchedCSR. // @@ -75,10 +108,15 @@ module { // // Verify. // + // CHECK: ( ( ( 1, 2 ), ( 3, 4 ), ( 0, 0 ) ), ( ( 0, 0 ), ( 0, 0 ), ( 0, 0 ) ), ( ( 0, 0 ), ( 5, 0 ), ( 6, 7 ) ), ( ( 0, 0 ), ( 8, 0 ), ( 0, 0 ) ) ) // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) // + %d0 = sparse_tensor.convert %s0 : tensor<4x3x2xf32, #CCC> to tensor<4x3x2xf32> + %v0 = vector.transfer_read %d0[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> + vector.print %v0 : vector<4x3x2xf32> + %d1 = sparse_tensor.convert %s1 : tensor<4x3x2xf32, #BatchedCSR> to tensor<4x3x2xf32> %v1 = vector.transfer_read %d1[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> vector.print %v1 : vector<4x3x2xf32> @@ -88,6 +126,7 @@ module { vector.print %v2 : vector<4x3x2xf32> // FIXME: doing this explicitly crashes runtime + // bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC> // bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR> // bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense> return From 3647ff159a2f2445c45d9cbb4f8791b5f30da16b Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Tue, 13 Feb 2024 16:09:35 -0800 Subject: [PATCH 078/240] Used std::vector::reserve when I meant std::vector::resize. The Linux std has more asserts enabled by default, so it complained, even though this worked on Darwin... --- lldb/source/Commands/CommandObjectCommands.cpp | 6 +++--- .../API/commands/command/script/add/TestAddParsedCommand.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index 3dfd452b92509d..b7cd65059b2214 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -1419,9 +1419,9 @@ class CommandObjectScriptingObjectParsed : public CommandObjectParsed { m_options_definition_up.reset(new OptionDefinition[m_num_options]); // We need to hand out pointers to contents of these vectors; we reserve // as much as we'll need up front so they don't get freed on resize... - m_usage_container.reserve(m_num_options); - m_enum_storage.reserve(m_num_options); - m_enum_vector.reserve(m_num_options); + m_usage_container.resize(m_num_options); + m_enum_storage.resize(m_num_options); + m_enum_vector.resize(m_num_options); size_t counter = 0; size_t short_opt_counter = 0; diff --git a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py index c044e2bf8c8d28..bbf330500568b5 100644 --- a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py +++ b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py @@ -15,7 +15,6 @@ class ParsedCommandTestCase(TestBase): # This crashes on the x86_64 Debian bot, but the failure is not helpful. # Disable the test while I try to find a way to reproduce. - @skipIfLinux def test(self): self.pycmd_tests() From f45b9d987dfc5904d4129aa006ab20614b3174e3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 13 Feb 2024 16:17:50 -0800 Subject: [PATCH 079/240] [RISCV] Add canonical ISA string as Module metadata in IR. (#80760) In an LTO build, we don't set the ELF attributes to indicate what extensions were compiled with. The target CPU/Attrs in RISCVTargetMachine do not get set for an LTO build. Each function gets a target-cpu/feature attribute, but this isn't usable to set ELF attributs since we wouldn't know what function to use. We can't just once since it might have been compiler with an attribute likes target_verson. This patch adds the ISA as Module metadata so we can retrieve it in the backend. Individual translation units can still be compiled with different strings so we need to collect the unique set when Modules are merged. The backend will need to combine the unique ISA strings to produce a single value for the ELF attributes. This will be done in a separate patch. --- clang/lib/CodeGen/CodeGenModule.cpp | 14 + .../RISCV/ntlh-intrinsics/riscv32-zihintntl.c | 350 +++++++++--------- .../test/CodeGen/RISCV/riscv-metadata-arch.c | 20 + 3 files changed, 209 insertions(+), 175 deletions(-) create mode 100644 clang/test/CodeGen/RISCV/riscv-metadata-arch.c diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 2f923d5457f9cf..c984260b082cd1 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -68,6 +68,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/RISCVISAInfo.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/Triple.h" @@ -1057,6 +1058,19 @@ void CodeGenModule::Release() { llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag(llvm::Module::Error, "target-abi", llvm::MDString::get(Ctx, ABIStr)); + + // Add the canonical ISA string as metadata so the backend can set the ELF + // attributes correctly. We use AppendUnique so LTO will keep all of the + // unique ISA strings that were linked together. + const std::vector &Features = + getTarget().getTargetOpts().Features; + auto ParseResult = + llvm::RISCVISAInfo::parseFeatures(T.isRISCV64() ? 64 : 32, Features); + if (!errorToBool(ParseResult.takeError())) + getModule().addModuleFlag( + llvm::Module::AppendUnique, "riscv-isa", + llvm::MDNode::get( + Ctx, llvm::MDString::get(Ctx, (*ParseResult)->toString()))); } if (CodeGenOpts.SanitizeCfiCrossDso) { diff --git a/clang/test/CodeGen/RISCV/ntlh-intrinsics/riscv32-zihintntl.c b/clang/test/CodeGen/RISCV/ntlh-intrinsics/riscv32-zihintntl.c index 897edbc6450af6..b11c2ca010e7ce 100644 --- a/clang/test/CodeGen/RISCV/ntlh-intrinsics/riscv32-zihintntl.c +++ b/clang/test/CodeGen/RISCV/ntlh-intrinsics/riscv32-zihintntl.c @@ -28,190 +28,190 @@ vint8m1_t *scvc1, *scvc2; // clang-format off void ntl_all_sizes() { // CHECK-LABEL: ntl_all_sizes - uc = __riscv_ntl_load(&sc, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !5 - sc = __riscv_ntl_load(&uc, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !5 - us = __riscv_ntl_load(&ss, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5 - ss = __riscv_ntl_load(&us, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5 - ui = __riscv_ntl_load(&si, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5 - si = __riscv_ntl_load(&ui, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5 - ull = __riscv_ntl_load(&sll, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - sll = __riscv_ntl_load(&ull, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5 - f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5 - d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !5 - v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !5 - v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !5 - *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 + uc = __riscv_ntl_load(&sc, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !7 + sc = __riscv_ntl_load(&uc, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !7 + us = __riscv_ntl_load(&ss, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7 + ss = __riscv_ntl_load(&us, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7 + ui = __riscv_ntl_load(&si, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7 + si = __riscv_ntl_load(&ui, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7 + ull = __riscv_ntl_load(&sll, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + sll = __riscv_ntl_load(&ull, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7 + f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7 + d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !7 + v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !7 + v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !7 + *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 - uc = __riscv_ntl_load(&sc, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !6 - sc = __riscv_ntl_load(&uc, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !6 - us = __riscv_ntl_load(&ss, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6 - ss = __riscv_ntl_load(&us, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6 - ui = __riscv_ntl_load(&si, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6 - si = __riscv_ntl_load(&ui, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6 - ull = __riscv_ntl_load(&sll, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - sll = __riscv_ntl_load(&ull, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6 - f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6 - d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6 - v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6 - v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6 - *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 + uc = __riscv_ntl_load(&sc, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !8 + sc = __riscv_ntl_load(&uc, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !8 + us = __riscv_ntl_load(&ss, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8 + ss = __riscv_ntl_load(&us, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8 + ui = __riscv_ntl_load(&si, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8 + si = __riscv_ntl_load(&ui, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8 + ull = __riscv_ntl_load(&sll, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + sll = __riscv_ntl_load(&ull, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8 + f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8 + d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8 + v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8 + v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8 + *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 - uc = __riscv_ntl_load(&sc, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !7 - sc = __riscv_ntl_load(&uc, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !7 - us = __riscv_ntl_load(&ss, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7 - ss = __riscv_ntl_load(&us, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7 - ui = __riscv_ntl_load(&si, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7 - si = __riscv_ntl_load(&ui, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7 - ull = __riscv_ntl_load(&sll, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - sll = __riscv_ntl_load(&ull, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7 - f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7 - d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7 - v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7 - v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7 - *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 + uc = __riscv_ntl_load(&sc, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !9 + sc = __riscv_ntl_load(&uc, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !9 + us = __riscv_ntl_load(&ss, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9 + ss = __riscv_ntl_load(&us, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9 + ui = __riscv_ntl_load(&si, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9 + si = __riscv_ntl_load(&ui, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9 + ull = __riscv_ntl_load(&sll, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + sll = __riscv_ntl_load(&ull, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9 + f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9 + d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9 + v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9 + v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9 + *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 - uc = __riscv_ntl_load(&sc, __RISCV_NTLH_ALL); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8 - sc = __riscv_ntl_load(&uc, __RISCV_NTLH_ALL); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8 - us = __riscv_ntl_load(&ss, __RISCV_NTLH_ALL); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - ss = __riscv_ntl_load(&us, __RISCV_NTLH_ALL); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - ui = __riscv_ntl_load(&si, __RISCV_NTLH_ALL); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - si = __riscv_ntl_load(&ui, __RISCV_NTLH_ALL); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - ull = __riscv_ntl_load(&sll, __RISCV_NTLH_ALL); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - sll = __riscv_ntl_load(&ull, __RISCV_NTLH_ALL); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_ALL); // CHECK: load half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_ALL); // CHECK: load float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_ALL); // CHECK: load double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_ALL); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_ALL); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_ALL); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_ALL); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_ALL); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_ALL); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 + uc = __riscv_ntl_load(&sc, __RISCV_NTLH_ALL); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10 + sc = __riscv_ntl_load(&uc, __RISCV_NTLH_ALL); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10 + us = __riscv_ntl_load(&ss, __RISCV_NTLH_ALL); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + ss = __riscv_ntl_load(&us, __RISCV_NTLH_ALL); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + ui = __riscv_ntl_load(&si, __RISCV_NTLH_ALL); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + si = __riscv_ntl_load(&ui, __RISCV_NTLH_ALL); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + ull = __riscv_ntl_load(&sll, __RISCV_NTLH_ALL); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + sll = __riscv_ntl_load(&ull, __RISCV_NTLH_ALL); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_ALL); // CHECK: load half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_ALL); // CHECK: load float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_ALL); // CHECK: load double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_ALL); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_ALL); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_ALL); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_ALL); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_ALL); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_ALL); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 - uc = __riscv_ntl_load(&sc); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8 - sc = __riscv_ntl_load(&uc); // CHECK: load i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8 - us = __riscv_ntl_load(&ss); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - ss = __riscv_ntl_load(&us); // CHECK: load i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - ui = __riscv_ntl_load(&si); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - si = __riscv_ntl_load(&ui); // CHECK: load i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - ull = __riscv_ntl_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - sll = __riscv_ntl_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - h1 = __riscv_ntl_load(&h2); // CHECK: load half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - f1 = __riscv_ntl_load(&f2); // CHECK: load float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - d1 = __riscv_ntl_load(&d2); // CHECK: load double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - v4si1 = __riscv_ntl_load(&v4si2); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - v8ss1 = __riscv_ntl_load(&v8ss2); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - v16sc1 = __riscv_ntl_load(&v16sc2); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - *scvi1 = __riscv_ntl_load(scvi2); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - *scvs1 = __riscv_ntl_load(scvs2); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - *scvc1 = __riscv_ntl_load(scvc2); // CHECK: load {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 + uc = __riscv_ntl_load(&sc); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10 + sc = __riscv_ntl_load(&uc); // CHECK: load i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10 + us = __riscv_ntl_load(&ss); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + ss = __riscv_ntl_load(&us); // CHECK: load i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + ui = __riscv_ntl_load(&si); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + si = __riscv_ntl_load(&ui); // CHECK: load i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + ull = __riscv_ntl_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + sll = __riscv_ntl_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + h1 = __riscv_ntl_load(&h2); // CHECK: load half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + f1 = __riscv_ntl_load(&f2); // CHECK: load float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + d1 = __riscv_ntl_load(&d2); // CHECK: load double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + v4si1 = __riscv_ntl_load(&v4si2); // CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + v8ss1 = __riscv_ntl_load(&v8ss2); // CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + v16sc1 = __riscv_ntl_load(&v16sc2); // CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + *scvi1 = __riscv_ntl_load(scvi2); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + *scvs1 = __riscv_ntl_load(scvs2); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + *scvc1 = __riscv_ntl_load(scvc2); // CHECK: load {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 - __riscv_ntl_store(&uc, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&sc, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&us, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&ss, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&ui, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&si, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&ull, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&sll, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 - __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5 + __riscv_ntl_store(&uc, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&sc, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&us, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&ss, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&ui, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&si, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&ull, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&sll, 1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 + __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&uc, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&sc, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&us, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&ss, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&ui, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&si, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&ull, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&sll, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 - __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6 + __riscv_ntl_store(&uc, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&sc, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&us, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&ss, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&ui, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&si, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&ull, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&sll, 1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 + __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_ALL_PRIVATE); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&uc, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&sc, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&us, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&ss, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&ui, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&si, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&ull, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&sll, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 - __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7 + __riscv_ntl_store(&uc, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&sc, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&us, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&ss, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&ui, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&si, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&ull, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&sll, 1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 + __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9 - __riscv_ntl_store(&uc, 1, __RISCV_NTLH_ALL); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&sc, 1, __RISCV_NTLH_ALL); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&us, 1, __RISCV_NTLH_ALL); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&ss, 1, __RISCV_NTLH_ALL); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&ui, 1, __RISCV_NTLH_ALL); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&si, 1, __RISCV_NTLH_ALL); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&ull, 1, __RISCV_NTLH_ALL); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&sll, 1, __RISCV_NTLH_ALL); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_ALL); // CHECK: store half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_ALL); // CHECK: store float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_ALL); // CHECK: store double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_ALL); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_ALL); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_ALL); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_ALL); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_ALL); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_ALL); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&uc, 1, __RISCV_NTLH_ALL); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&sc, 1, __RISCV_NTLH_ALL); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&us, 1, __RISCV_NTLH_ALL); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&ss, 1, __RISCV_NTLH_ALL); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&ui, 1, __RISCV_NTLH_ALL); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&si, 1, __RISCV_NTLH_ALL); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&ull, 1, __RISCV_NTLH_ALL); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&sll, 1, __RISCV_NTLH_ALL); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_ALL); // CHECK: store half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_ALL); // CHECK: store float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_ALL); // CHECK: store double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_ALL); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_ALL); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_ALL); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_ALL); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_ALL); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_ALL); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 - __riscv_ntl_store(&uc, 1); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&sc, 1); // CHECK: store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&us, 1); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&ss, 1); // CHECK: store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&ui, 1); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&si, 1); // CHECK: store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&ull, 1); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&sll, 1); // CHECK: store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&h1, 1.0); // CHECK: store half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&f1, 1.0); // CHECK: store float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&d1, 1.0); // CHECK: store double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&v4si1, v4si2); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&v8ss1, v8ss2); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(&v16sc1, v16sc2); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(scvi2, *scvi1); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(scvs2, *scvs1); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 - __riscv_ntl_store(scvc2, *scvc1); // CHECK: store {{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8 + __riscv_ntl_store(&uc, 1); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&sc, 1); // CHECK: store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&us, 1); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&ss, 1); // CHECK: store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&ui, 1); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&si, 1); // CHECK: store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&ull, 1); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&sll, 1); // CHECK: store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&h1, 1.0); // CHECK: store half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&f1, 1.0); // CHECK: store float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&d1, 1.0); // CHECK: store double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&v4si1, v4si2); // CHECK: store <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&v8ss1, v8ss2); // CHECK: store <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(&v16sc1, v16sc2); // CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(scvi2, *scvi1); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(scvs2, *scvs1); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 + __riscv_ntl_store(scvc2, *scvc1); // CHECK: store {{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10 } // clang-format on -// CHECK: !4 = !{i32 1} -// CHECK: !5 = !{i32 2} -// CHECK: !6 = !{i32 3} -// CHECK: !7 = !{i32 4} -// CHECK: !8 = !{i32 5} +// CHECK: !6 = !{i32 1} +// CHECK: !7 = !{i32 2} +// CHECK: !8 = !{i32 3} +// CHECK: !9 = !{i32 4} +// CHECK: !10 = !{i32 5} diff --git a/clang/test/CodeGen/RISCV/riscv-metadata-arch.c b/clang/test/CodeGen/RISCV/riscv-metadata-arch.c new file mode 100644 index 00000000000000..060eda108b54e9 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-metadata-arch.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple riscv32 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=RV32I %s +// RUN: %clang_cc1 -triple riscv32 -target-feature +v -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=RV32IV %s +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=RV64I %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=RV64IV %s + +// RV32I:!{{[0-9]+}} = !{i32 6, !"riscv-isa", ![[ID:[0-9]+]]} +// RV32I:![[ID]] = !{!"rv32i2p1"} + +// RV32IV:!{{[0-9]+}} = !{i32 6, !"riscv-isa", ![[ID:[0-9]+]]} +// RV32IV:![[ID]] = !{!"rv32i2p1_f2p2_d2p2_v1p0_zicsr2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"} + +// RV64I:!{{[0-9]+}} = !{i32 6, !"riscv-isa", ![[ID:[0-9]+]]} +// RV64I:![[ID]] = !{!"rv64i2p1"} + +// RV64IV:!{{[0-9]+}} = !{i32 6, !"riscv-isa", ![[ID:[0-9]+]]} +// RV64IV:![[ID]] = !{!"rv64i2p1_f2p2_d2p2_v1p0_zicsr2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"} From 21630efb5a8f411cdf39b15f21a104888145b03f Mon Sep 17 00:00:00 2001 From: Pranav Kant Date: Tue, 13 Feb 2024 16:39:59 -0800 Subject: [PATCH 080/240] [X86][CodeGen] Restrict F128 lowering to GNU environment (#81664) Otherwise it breaks some environment like X64 Android that doesn't have f128 functions available in its libc. Followup to #79611. --- llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 +- .../test/CodeGen/X86/fp128-libcalls-strict.ll | 702 ++++++++++++++---- llvm/test/CodeGen/X86/fp128-libcalls.ll | 406 +++++++--- 3 files changed, 879 insertions(+), 231 deletions(-) diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index dc766928e5dc94..646c0c345e54e0 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -123,7 +123,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C); // Use the f128 variants of math functions on x86_64 - if (TT.getArch() == Triple::ArchType::x86_64) { + if (TT.getArch() == Triple::ArchType::x86_64 && TT.isGNUEnvironment()) { setLibcallName(RTLIB::REM_F128, "fmodf128"); setLibcallName(RTLIB::FMA_F128, "fmaf128"); setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index 47234c365cad5f..f1d473f81a9fa1 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android \ ; RUN: -enable-legalize-types-checking \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefix=ANDROID ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu \ ; RUN: -enable-legalize-types-checking \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefix=GNU ; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+sse2 \ ; RUN: -enable-legalize-types-checking \ ; RUN: | FileCheck %s --check-prefix=X86 @@ -19,6 +19,20 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: add: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __addtf3@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: add: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __addtf3@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: add: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi @@ -56,6 +70,20 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: sub: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __subtf3@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: sub: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __subtf3@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: sub: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi @@ -93,6 +121,20 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: mul: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __multf3@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: mul: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __multf3@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: mul: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi @@ -130,6 +172,20 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: div: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __divtf3@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: div: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __divtf3@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: div: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi @@ -160,12 +216,19 @@ entry: } define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp { -; CHECK-LABEL: fma: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq fmaf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: fma: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq fmal@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: fma: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq fmaf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: fma: ; X86: # %bb.0: # %entry @@ -201,12 +264,19 @@ entry: } define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp { -; CHECK-LABEL: frem: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq fmodf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: frem: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq fmodl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: frem: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq fmodf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: frem: ; X86: # %bb.0: # %entry @@ -238,12 +308,19 @@ entry: } define fp128 @ceil(fp128 %x) nounwind strictfp { -; CHECK-LABEL: ceil: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq ceilf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: ceil: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq ceill@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: ceil: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq ceilf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: ceil: ; X86: # %bb.0: # %entry @@ -271,12 +348,19 @@ entry: } define fp128 @cos(fp128 %x) nounwind strictfp { -; CHECK-LABEL: cos: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq cosf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: cos: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq cosl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: cos: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq cosf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: cos: ; X86: # %bb.0: # %entry @@ -304,12 +388,19 @@ entry: } define fp128 @exp(fp128 %x) nounwind strictfp { -; CHECK-LABEL: exp: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq expf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: exp: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq expl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: exp: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq expf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: exp: ; X86: # %bb.0: # %entry @@ -337,12 +428,19 @@ entry: } define fp128 @exp2(fp128 %x) nounwind strictfp { -; CHECK-LABEL: exp2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq exp2f128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: exp2: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq exp2l@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: exp2: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq exp2f128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: exp2: ; X86: # %bb.0: # %entry @@ -370,12 +468,19 @@ entry: } define fp128 @floor(fp128 %x) nounwind strictfp { -; CHECK-LABEL: floor: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq floorf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: floor: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq floorl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: floor: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq floorf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: floor: ; X86: # %bb.0: # %entry @@ -403,12 +508,19 @@ entry: } define fp128 @log(fp128 %x) nounwind strictfp { -; CHECK-LABEL: log: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq logf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: log: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq logl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: log: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq logf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: log: ; X86: # %bb.0: # %entry @@ -436,12 +548,19 @@ entry: } define fp128 @log10(fp128 %x) nounwind strictfp { -; CHECK-LABEL: log10: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq log10f128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: log10: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq log10l@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: log10: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq log10f128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: log10: ; X86: # %bb.0: # %entry @@ -469,12 +588,19 @@ entry: } define fp128 @log2(fp128 %x) nounwind strictfp { -; CHECK-LABEL: log2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq log2f128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: log2: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq log2l@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: log2: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq log2f128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: log2: ; X86: # %bb.0: # %entry @@ -502,12 +628,19 @@ entry: } define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp { -; CHECK-LABEL: maxnum: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq fmaxf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: maxnum: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq fmaxl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: maxnum: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq fmaxf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: maxnum: ; X86: # %bb.0: # %entry @@ -539,12 +672,19 @@ entry: } define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp { -; CHECK-LABEL: minnum: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq fminf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: minnum: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq fminl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: minnum: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq fminf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: minnum: ; X86: # %bb.0: # %entry @@ -576,12 +716,19 @@ entry: } define fp128 @nearbyint(fp128 %x) nounwind strictfp { -; CHECK-LABEL: nearbyint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq nearbyintf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: nearbyint: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq nearbyintl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: nearbyint: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq nearbyintf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: nearbyint: ; X86: # %bb.0: # %entry @@ -609,12 +756,19 @@ entry: } define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp { -; CHECK-LABEL: pow: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq powf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: pow: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq powl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: pow: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq powf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: pow: ; X86: # %bb.0: # %entry @@ -653,6 +807,20 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: powi: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __powitf2@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: powi: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __powitf2@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: powi: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi @@ -680,12 +848,19 @@ entry: } define fp128 @rint(fp128 %x) nounwind strictfp { -; CHECK-LABEL: rint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq rintf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: rint: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq rintl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: rint: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq rintf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: rint: ; X86: # %bb.0: # %entry @@ -713,12 +888,19 @@ entry: } define fp128 @round(fp128 %x) nounwind strictfp { -; CHECK-LABEL: round: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq roundf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: round: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq roundl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: round: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq roundf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: round: ; X86: # %bb.0: # %entry @@ -746,12 +928,19 @@ entry: } define fp128 @roundeven(fp128 %x) nounwind strictfp { -; CHECK-LABEL: roundeven: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq roundevenf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: roundeven: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq roundevenl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: roundeven: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq roundevenf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: roundeven: ; X86: # %bb.0: # %entry @@ -779,12 +968,19 @@ entry: } define fp128 @sin(fp128 %x) nounwind strictfp { -; CHECK-LABEL: sin: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq sinf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: sin: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq sinl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: sin: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq sinf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: sin: ; X86: # %bb.0: # %entry @@ -812,12 +1008,19 @@ entry: } define fp128 @sqrt(fp128 %x) nounwind strictfp { -; CHECK-LABEL: sqrt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq sqrtf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: sqrt: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq sqrtl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: sqrt: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq sqrtf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: sqrt: ; X86: # %bb.0: # %entry @@ -845,12 +1048,19 @@ entry: } define fp128 @trunc(fp128 %x) nounwind strictfp { -; CHECK-LABEL: trunc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq truncf128 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: trunc: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq truncl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: trunc: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq truncf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: trunc: ; X86: # %bb.0: # %entry @@ -878,12 +1088,19 @@ entry: } define i32 @lrint(fp128 %x) nounwind strictfp { -; CHECK-LABEL: lrint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq lrintf128 -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: retq +; ANDROID-LABEL: lrint: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq lrintl@PLT +; ANDROID-NEXT: popq %rcx +; ANDROID-NEXT: retq +; +; GNU-LABEL: lrint: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq lrintf128@PLT +; GNU-NEXT: popq %rcx +; GNU-NEXT: retq ; ; X86-LABEL: lrint: ; X86: # %bb.0: # %entry @@ -901,12 +1118,19 @@ entry: } define i64 @llrint(fp128 %x) nounwind strictfp { -; CHECK-LABEL: llrint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq llrintf128 -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: retq +; ANDROID-LABEL: llrint: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq llrintl@PLT +; ANDROID-NEXT: popq %rcx +; ANDROID-NEXT: retq +; +; GNU-LABEL: llrint: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq llrintf128@PLT +; GNU-NEXT: popq %rcx +; GNU-NEXT: retq ; ; X86-LABEL: llrint: ; X86: # %bb.0: # %entry @@ -924,12 +1148,19 @@ entry: } define i32 @lround(fp128 %x) nounwind strictfp { -; CHECK-LABEL: lround: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq lroundf128 -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: retq +; ANDROID-LABEL: lround: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq lroundl@PLT +; ANDROID-NEXT: popq %rcx +; ANDROID-NEXT: retq +; +; GNU-LABEL: lround: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq lroundf128@PLT +; GNU-NEXT: popq %rcx +; GNU-NEXT: retq ; ; X86-LABEL: lround: ; X86: # %bb.0: # %entry @@ -947,12 +1178,19 @@ entry: } define i64 @llround(fp128 %x) nounwind strictfp { -; CHECK-LABEL: llround: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq llroundf128 -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: retq +; ANDROID-LABEL: llround: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq llroundl@PLT +; ANDROID-NEXT: popq %rcx +; ANDROID-NEXT: retq +; +; GNU-LABEL: llround: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq llroundf128@PLT +; GNU-NEXT: popq %rcx +; GNU-NEXT: retq ; ; X86-LABEL: llround: ; X86: # %bb.0: # %entry @@ -986,6 +1224,38 @@ define i64 @cmp(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: retq ; +; ANDROID-LABEL: cmp: +; ANDROID: # %bb.0: +; ANDROID-NEXT: pushq %r14 +; ANDROID-NEXT: pushq %rbx +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: movq %rsi, %rbx +; ANDROID-NEXT: movq %rdi, %r14 +; ANDROID-NEXT: callq __eqtf2@PLT +; ANDROID-NEXT: testl %eax, %eax +; ANDROID-NEXT: cmovneq %rbx, %r14 +; ANDROID-NEXT: movq %r14, %rax +; ANDROID-NEXT: addq $8, %rsp +; ANDROID-NEXT: popq %rbx +; ANDROID-NEXT: popq %r14 +; ANDROID-NEXT: retq +; +; GNU-LABEL: cmp: +; GNU: # %bb.0: +; GNU-NEXT: pushq %r14 +; GNU-NEXT: pushq %rbx +; GNU-NEXT: pushq %rax +; GNU-NEXT: movq %rsi, %rbx +; GNU-NEXT: movq %rdi, %r14 +; GNU-NEXT: callq __eqtf2@PLT +; GNU-NEXT: testl %eax, %eax +; GNU-NEXT: cmovneq %rbx, %r14 +; GNU-NEXT: movq %r14, %rax +; GNU-NEXT: addq $8, %rsp +; GNU-NEXT: popq %rbx +; GNU-NEXT: popq %r14 +; GNU-NEXT: retq +; ; X86-LABEL: cmp: ; X86: # %bb.0: ; X86-NEXT: subl $12, %esp @@ -1032,6 +1302,38 @@ define i64 @cmps(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: retq ; +; ANDROID-LABEL: cmps: +; ANDROID: # %bb.0: +; ANDROID-NEXT: pushq %r14 +; ANDROID-NEXT: pushq %rbx +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: movq %rsi, %rbx +; ANDROID-NEXT: movq %rdi, %r14 +; ANDROID-NEXT: callq __eqtf2@PLT +; ANDROID-NEXT: testl %eax, %eax +; ANDROID-NEXT: cmovneq %rbx, %r14 +; ANDROID-NEXT: movq %r14, %rax +; ANDROID-NEXT: addq $8, %rsp +; ANDROID-NEXT: popq %rbx +; ANDROID-NEXT: popq %r14 +; ANDROID-NEXT: retq +; +; GNU-LABEL: cmps: +; GNU: # %bb.0: +; GNU-NEXT: pushq %r14 +; GNU-NEXT: pushq %rbx +; GNU-NEXT: pushq %rax +; GNU-NEXT: movq %rsi, %rbx +; GNU-NEXT: movq %rdi, %r14 +; GNU-NEXT: callq __eqtf2@PLT +; GNU-NEXT: testl %eax, %eax +; GNU-NEXT: cmovneq %rbx, %r14 +; GNU-NEXT: movq %r14, %rax +; GNU-NEXT: addq $8, %rsp +; GNU-NEXT: popq %rbx +; GNU-NEXT: popq %r14 +; GNU-NEXT: retq +; ; X86-LABEL: cmps: ; X86: # %bb.0: ; X86-NEXT: subl $12, %esp @@ -1089,6 +1391,60 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq ; +; ANDROID-LABEL: cmp_ueq_q: +; ANDROID: # %bb.0: +; ANDROID-NEXT: pushq %rbp +; ANDROID-NEXT: pushq %r14 +; ANDROID-NEXT: pushq %rbx +; ANDROID-NEXT: subq $32, %rsp +; ANDROID-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; ANDROID-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; ANDROID-NEXT: movq %rsi, %rbx +; ANDROID-NEXT: movq %rdi, %r14 +; ANDROID-NEXT: callq __eqtf2@PLT +; ANDROID-NEXT: testl %eax, %eax +; ANDROID-NEXT: sete %bpl +; ANDROID-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; ANDROID-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; ANDROID-NEXT: callq __unordtf2@PLT +; ANDROID-NEXT: testl %eax, %eax +; ANDROID-NEXT: setne %al +; ANDROID-NEXT: orb %bpl, %al +; ANDROID-NEXT: cmoveq %rbx, %r14 +; ANDROID-NEXT: movq %r14, %rax +; ANDROID-NEXT: addq $32, %rsp +; ANDROID-NEXT: popq %rbx +; ANDROID-NEXT: popq %r14 +; ANDROID-NEXT: popq %rbp +; ANDROID-NEXT: retq +; +; GNU-LABEL: cmp_ueq_q: +; GNU: # %bb.0: +; GNU-NEXT: pushq %rbp +; GNU-NEXT: pushq %r14 +; GNU-NEXT: pushq %rbx +; GNU-NEXT: subq $32, %rsp +; GNU-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; GNU-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; GNU-NEXT: movq %rsi, %rbx +; GNU-NEXT: movq %rdi, %r14 +; GNU-NEXT: callq __eqtf2@PLT +; GNU-NEXT: testl %eax, %eax +; GNU-NEXT: sete %bpl +; GNU-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; GNU-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; GNU-NEXT: callq __unordtf2@PLT +; GNU-NEXT: testl %eax, %eax +; GNU-NEXT: setne %al +; GNU-NEXT: orb %bpl, %al +; GNU-NEXT: cmoveq %rbx, %r14 +; GNU-NEXT: movq %r14, %rax +; GNU-NEXT: addq $32, %rsp +; GNU-NEXT: popq %rbx +; GNU-NEXT: popq %r14 +; GNU-NEXT: popq %rbp +; GNU-NEXT: retq +; ; X86-LABEL: cmp_ueq_q: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -1172,6 +1528,60 @@ define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 { ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq ; +; ANDROID-LABEL: cmp_one_q: +; ANDROID: # %bb.0: +; ANDROID-NEXT: pushq %rbp +; ANDROID-NEXT: pushq %r14 +; ANDROID-NEXT: pushq %rbx +; ANDROID-NEXT: subq $32, %rsp +; ANDROID-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; ANDROID-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; ANDROID-NEXT: movq %rsi, %rbx +; ANDROID-NEXT: movq %rdi, %r14 +; ANDROID-NEXT: callq __eqtf2@PLT +; ANDROID-NEXT: testl %eax, %eax +; ANDROID-NEXT: setne %bpl +; ANDROID-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; ANDROID-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; ANDROID-NEXT: callq __unordtf2@PLT +; ANDROID-NEXT: testl %eax, %eax +; ANDROID-NEXT: sete %al +; ANDROID-NEXT: testb %bpl, %al +; ANDROID-NEXT: cmoveq %rbx, %r14 +; ANDROID-NEXT: movq %r14, %rax +; ANDROID-NEXT: addq $32, %rsp +; ANDROID-NEXT: popq %rbx +; ANDROID-NEXT: popq %r14 +; ANDROID-NEXT: popq %rbp +; ANDROID-NEXT: retq +; +; GNU-LABEL: cmp_one_q: +; GNU: # %bb.0: +; GNU-NEXT: pushq %rbp +; GNU-NEXT: pushq %r14 +; GNU-NEXT: pushq %rbx +; GNU-NEXT: subq $32, %rsp +; GNU-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; GNU-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; GNU-NEXT: movq %rsi, %rbx +; GNU-NEXT: movq %rdi, %r14 +; GNU-NEXT: callq __eqtf2@PLT +; GNU-NEXT: testl %eax, %eax +; GNU-NEXT: setne %bpl +; GNU-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; GNU-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; GNU-NEXT: callq __unordtf2@PLT +; GNU-NEXT: testl %eax, %eax +; GNU-NEXT: sete %al +; GNU-NEXT: testb %bpl, %al +; GNU-NEXT: cmoveq %rbx, %r14 +; GNU-NEXT: movq %r14, %rax +; GNU-NEXT: addq $32, %rsp +; GNU-NEXT: popq %rbx +; GNU-NEXT: popq %r14 +; GNU-NEXT: popq %rbp +; GNU-NEXT: retq +; ; X86-LABEL: cmp_one_q: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll index 6946ca2d575b7f..bb75ec10851197 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android \ -; RUN: -enable-legalize-types-checking | FileCheck %s +; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=ANDROID ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu \ -; RUN: -enable-legalize-types-checking | FileCheck %s +; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=GNU ; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=sse2 \ ; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=X86 @@ -20,6 +20,22 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: Test128Add: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __addtf3@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Add: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __addtf3@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: Test128Add: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $40, %esp @@ -56,6 +72,26 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: Test128_1Add: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: movaps %xmm0, %xmm1 +; ANDROID-NEXT: movaps vf128(%rip), %xmm0 +; ANDROID-NEXT: callq __addtf3@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128_1Add: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: movaps %xmm0, %xmm1 +; GNU-NEXT: movaps vf128(%rip), %xmm0 +; GNU-NEXT: callq __addtf3@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: Test128_1Add: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $40, %esp @@ -91,6 +127,22 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: Test128Sub: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __subtf3@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Sub: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __subtf3@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: Test128Sub: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $40, %esp @@ -127,6 +179,26 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: Test128_1Sub: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: movaps %xmm0, %xmm1 +; ANDROID-NEXT: movaps vf128(%rip), %xmm0 +; ANDROID-NEXT: callq __subtf3@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128_1Sub: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: movaps %xmm0, %xmm1 +; GNU-NEXT: movaps vf128(%rip), %xmm0 +; GNU-NEXT: callq __subtf3@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: Test128_1Sub: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $40, %esp @@ -162,6 +234,22 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: Test128Mul: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __multf3@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Mul: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __multf3@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: Test128Mul: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $40, %esp @@ -198,6 +286,26 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: Test128_1Mul: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: movaps %xmm0, %xmm1 +; ANDROID-NEXT: movaps vf128(%rip), %xmm0 +; ANDROID-NEXT: callq __multf3@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128_1Mul: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: movaps %xmm0, %xmm1 +; GNU-NEXT: movaps vf128(%rip), %xmm0 +; GNU-NEXT: callq __multf3@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: Test128_1Mul: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $40, %esp @@ -233,6 +341,22 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: Test128Div: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq __divtf3@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Div: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq __divtf3@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: Test128Div: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $40, %esp @@ -269,6 +393,26 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind { ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq ; +; ANDROID-LABEL: Test128_1Div: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: movaps %xmm0, %xmm1 +; ANDROID-NEXT: movaps vf128(%rip), %xmm0 +; ANDROID-NEXT: callq __divtf3@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128_1Div: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: movaps %xmm0, %xmm1 +; GNU-NEXT: movaps vf128(%rip), %xmm0 +; GNU-NEXT: callq __divtf3@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; ; X86-LABEL: Test128_1Div: ; X86: # %bb.0: # %entry ; X86-NEXT: subl $40, %esp @@ -296,13 +440,21 @@ entry: } define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind { -; CHECK-LABEL: Test128Rem: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq fmodf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Rem: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq fmodl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Rem: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq fmodf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Rem: ; X86: # %bb.0: # %entry @@ -330,15 +482,25 @@ entry: } define dso_local void @Test128_1Rem(fp128 %d1) nounwind { -; CHECK-LABEL: Test128_1Rem: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: movaps vf128(%rip), %xmm0 -; CHECK-NEXT: callq fmodf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128_1Rem: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: movaps %xmm0, %xmm1 +; ANDROID-NEXT: movaps vf128(%rip), %xmm0 +; ANDROID-NEXT: callq fmodl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128_1Rem: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: movaps %xmm0, %xmm1 +; GNU-NEXT: movaps vf128(%rip), %xmm0 +; GNU-NEXT: callq fmodf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128_1Rem: ; X86: # %bb.0: # %entry @@ -367,13 +529,21 @@ entry: } define dso_local void @Test128Sqrt(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Sqrt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq sqrtf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Sqrt: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq sqrtl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Sqrt: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq sqrtf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Sqrt: ; X86: # %bb.0: # %entry @@ -398,13 +568,21 @@ entry: declare fp128 @llvm.sqrt.f128(fp128) define dso_local void @Test128Sin(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Sin: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq sinf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Sin: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq sinl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Sin: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq sinf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Sin: ; X86: # %bb.0: # %entry @@ -429,13 +607,21 @@ entry: declare fp128 @llvm.sin.f128(fp128) define dso_local void @Test128Cos(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Cos: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq cosf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Cos: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq cosl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Cos: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq cosf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Cos: ; X86: # %bb.0: # %entry @@ -460,13 +646,21 @@ entry: declare fp128 @llvm.cos.f128(fp128) define dso_local void @Test128Ceil(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Ceil: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq ceilf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Ceil: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq ceill@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Ceil: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq ceilf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Ceil: ; X86: # %bb.0: # %entry @@ -491,13 +685,21 @@ entry: declare fp128 @llvm.ceil.f128(fp128) define dso_local void @Test128Floor(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Floor: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq floorf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Floor: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq floorl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Floor: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq floorf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Floor: ; X86: # %bb.0: # %entry @@ -522,13 +724,21 @@ entry: declare fp128 @llvm.floor.f128(fp128) define dso_local void @Test128Trunc(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Trunc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq truncf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Trunc: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq truncl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Trunc: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq truncf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Trunc: ; X86: # %bb.0: # %entry @@ -553,13 +763,21 @@ entry: declare fp128 @llvm.trunc.f128(fp128) define dso_local void @Test128Nearbyint(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Nearbyint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq nearbyintf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Nearbyint: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq nearbyintl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Nearbyint: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq nearbyintf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Nearbyint: ; X86: # %bb.0: # %entry @@ -584,13 +802,21 @@ entry: declare fp128 @llvm.nearbyint.f128(fp128) define dso_local void @Test128Rint(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Rint: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq rintf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Rint: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq rintl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Rint: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq rintf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Rint: ; X86: # %bb.0: # %entry @@ -615,13 +841,21 @@ entry: declare fp128 @llvm.rint.f128(fp128) define dso_local void @Test128Round(fp128 %d1) nounwind { -; CHECK-LABEL: Test128Round: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq roundf128 -; CHECK-NEXT: movaps %xmm0, vf128(%rip) -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; ANDROID-LABEL: Test128Round: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq roundl@PLT +; ANDROID-NEXT: movaps %xmm0, vf128(%rip) +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: Test128Round: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq roundf128@PLT +; GNU-NEXT: movaps %xmm0, vf128(%rip) +; GNU-NEXT: popq %rax +; GNU-NEXT: retq ; ; X86-LABEL: Test128Round: ; X86: # %bb.0: # %entry @@ -646,9 +880,13 @@ entry: declare fp128 @llvm.round.f128(fp128) define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind { -; CHECK-LABEL: Test128FMA: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp fmaf128@PLT # TAILCALL +; ANDROID-LABEL: Test128FMA: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: jmp fmal@PLT # TAILCALL +; +; GNU-LABEL: Test128FMA: +; GNU: # %bb.0: # %entry +; GNU-NEXT: jmp fmaf128@PLT # TAILCALL ; ; X86-LABEL: Test128FMA: ; X86: # %bb.0: # %entry From 429919e32823ad735a19ab385f37e313512cedde Mon Sep 17 00:00:00 2001 From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> Date: Tue, 13 Feb 2024 18:45:22 -0600 Subject: [PATCH 081/240] =?UTF-8?q?[mlir][sparse][pybind][CAPI]=20remove?= =?UTF-8?q?=20LevelType=20enum=20from=20CAPI,=20constru=E2=80=A6=20(#81682?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ct LevelType from LevelFormat and properties instead. **Rationale** We used to explicitly declare every possible combination between `LevelFormat` and `LevelProperties`, and it now becomes difficult to scale as more properties/level formats are going to be introduced. --- mlir/include/mlir-c/Dialect/SparseTensor.h | 24 +++--- .../mlir/Dialect/SparseTensor/IR/Enums.h | 78 +++++-------------- .../Bindings/Python/DialectSparseTensor.cpp | 49 +++++------- mlir/lib/CAPI/Dialect/SparseTensor.cpp | 61 ++++++++------- .../Dialect/SparseTensor/python/test_SDDMM.py | 13 ++-- .../Dialect/SparseTensor/python/test_SpMM.py | 13 ++-- .../SparseTensor/python/test_output.py | 19 +++-- .../SparseTensor/python/test_stress.py | 6 +- .../python/dialects/sparse_tensor/dialect.py | 11 ++- 9 files changed, 123 insertions(+), 151 deletions(-) diff --git a/mlir/include/mlir-c/Dialect/SparseTensor.h b/mlir/include/mlir-c/Dialect/SparseTensor.h index d549f5dddc1318..898d2f12779e39 100644 --- a/mlir/include/mlir-c/Dialect/SparseTensor.h +++ b/mlir/include/mlir-c/Dialect/SparseTensor.h @@ -27,23 +27,19 @@ MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(SparseTensor, sparse_tensor); /// file. typedef uint64_t MlirSparseTensorLevelType; -enum MlirBaseSparseTensorLevelType { +enum MlirSparseTensorLevelFormat { MLIR_SPARSE_TENSOR_LEVEL_DENSE = 0x000000010000, MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED = 0x000000020000, - MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NU = 0x000000020001, - MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NO = 0x000000020002, - MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NU_NO = 0x000000020003, MLIR_SPARSE_TENSOR_LEVEL_SINGLETON = 0x000000040000, - MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NU = 0x000000040001, - MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NO = 0x000000040002, - MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NU_NO = 0x000000040003, MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED = 0x000000080000, - MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NU = 0x000000080001, - MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NO = 0x000000080002, - MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NU_NO = 0x000000080003, MLIR_SPARSE_TENSOR_LEVEL_N_OUT_OF_M = 0x000000100000, }; +enum MlirSparseTensorLevelPropertyNondefault { + MLIR_SPARSE_PROPERTY_NON_UNIQUE = 0x0001, + MLIR_SPARSE_PROPERTY_NON_ORDERED = 0x0002, +}; + //===----------------------------------------------------------------------===// // SparseTensorEncodingAttr //===----------------------------------------------------------------------===// @@ -66,6 +62,10 @@ mlirSparseTensorEncodingGetLvlRank(MlirAttribute attr); MLIR_CAPI_EXPORTED MlirSparseTensorLevelType mlirSparseTensorEncodingAttrGetLvlType(MlirAttribute attr, intptr_t lvl); +/// Returns a specified level-format of the `sparse_tensor.encoding` attribute. +MLIR_CAPI_EXPORTED enum MlirSparseTensorLevelFormat +mlirSparseTensorEncodingAttrGetLvlFmt(MlirAttribute attr, intptr_t lvl); + /// Returns the dimension-to-level mapping of the `sparse_tensor.encoding` /// attribute. MLIR_CAPI_EXPORTED MlirAffineMap @@ -92,7 +92,9 @@ mlirSparseTensorEncodingAttrGetStructuredM(MlirSparseTensorLevelType lvlType); MLIR_CAPI_EXPORTED MlirSparseTensorLevelType mlirSparseTensorEncodingAttrBuildLvlType( - enum MlirBaseSparseTensorLevelType lvlType, unsigned n, unsigned m); + enum MlirSparseTensorLevelFormat lvlFmt, + const enum MlirSparseTensorLevelPropertyNondefault *properties, + unsigned propSize, unsigned n, unsigned m); #ifdef __cplusplus } diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index e940d203be9ed5..74cc0dee554a17 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -35,6 +35,7 @@ #include #include #include +#include namespace mlir { namespace sparse_tensor { @@ -343,17 +344,31 @@ constexpr std::optional getLevelFormat(LevelType lt) { /// Convert a LevelFormat to its corresponding LevelType with the given /// properties. Returns std::nullopt when the properties are not applicable /// for the input level format. -constexpr std::optional buildLevelType(LevelFormat lf, bool ordered, - bool unique, uint64_t n = 0, - uint64_t m = 0) { +inline std::optional +buildLevelType(LevelFormat lf, + const std::vector &properties, + uint64_t n = 0, uint64_t m = 0) { uint64_t newN = n << 32; uint64_t newM = m << 40; - auto lt = - static_cast(static_cast(lf) | (ordered ? 0 : 2) | - (unique ? 0 : 1) | newN | newM); + uint64_t ltInt = static_cast(lf) | newN | newM; + for (auto p : properties) { + ltInt |= static_cast(p); + } + auto lt = static_cast(ltInt); return isValidLT(lt) ? std::optional(lt) : std::nullopt; } +inline std::optional buildLevelType(LevelFormat lf, bool ordered, + bool unique, uint64_t n = 0, + uint64_t m = 0) { + std::vector properties; + if (!ordered) + properties.push_back(LevelPropertyNondefault::Nonordered); + if (!unique) + properties.push_back(LevelPropertyNondefault::Nonunique); + return buildLevelType(lf, properties, n, m); +} + // // Ensure the above methods work as intended. // @@ -380,57 +395,6 @@ static_assert( *getLevelFormat(LevelType::NOutOfM) == LevelFormat::NOutOfM), "getLevelFormat conversion is broken"); -static_assert( - (buildLevelType(LevelFormat::Dense, false, true) == std::nullopt && - buildLevelType(LevelFormat::Dense, true, false) == std::nullopt && - buildLevelType(LevelFormat::Dense, false, false) == std::nullopt && - *buildLevelType(LevelFormat::Dense, true, true) == LevelType::Dense && - *buildLevelType(LevelFormat::Compressed, true, true) == - LevelType::Compressed && - *buildLevelType(LevelFormat::Compressed, true, false) == - LevelType::CompressedNu && - *buildLevelType(LevelFormat::Compressed, false, true) == - LevelType::CompressedNo && - *buildLevelType(LevelFormat::Compressed, false, false) == - LevelType::CompressedNuNo && - *buildLevelType(LevelFormat::Singleton, true, true) == - LevelType::Singleton && - *buildLevelType(LevelFormat::Singleton, true, false) == - LevelType::SingletonNu && - *buildLevelType(LevelFormat::Singleton, false, true) == - LevelType::SingletonNo && - *buildLevelType(LevelFormat::Singleton, false, false) == - LevelType::SingletonNuNo && - *buildLevelType(LevelFormat::LooseCompressed, true, true) == - LevelType::LooseCompressed && - *buildLevelType(LevelFormat::LooseCompressed, true, false) == - LevelType::LooseCompressedNu && - *buildLevelType(LevelFormat::LooseCompressed, false, true) == - LevelType::LooseCompressedNo && - *buildLevelType(LevelFormat::LooseCompressed, false, false) == - LevelType::LooseCompressedNuNo && - buildLevelType(LevelFormat::NOutOfM, false, true) == std::nullopt && - buildLevelType(LevelFormat::NOutOfM, true, false) == std::nullopt && - buildLevelType(LevelFormat::NOutOfM, false, false) == std::nullopt && - *buildLevelType(LevelFormat::NOutOfM, true, true) == LevelType::NOutOfM), - "buildLevelType conversion is broken"); - -static_assert( - (getN(*buildLevelType(LevelFormat::NOutOfM, true, true, 2, 4)) == 2 && - getM(*buildLevelType(LevelFormat::NOutOfM, true, true, 2, 4)) == 4 && - getN(*buildLevelType(LevelFormat::NOutOfM, true, true, 8, 10)) == 8 && - getM(*buildLevelType(LevelFormat::NOutOfM, true, true, 8, 10)) == 10), - "getN/M conversion is broken"); - -static_assert( - (isValidNOutOfMLT(*buildLevelType(LevelFormat::NOutOfM, true, true, 2, 4), - 2, 4) && - isValidNOutOfMLT(*buildLevelType(LevelFormat::NOutOfM, true, true, 8, 10), - 8, 10) && - !isValidNOutOfMLT(*buildLevelType(LevelFormat::NOutOfM, true, true, 3, 4), - 2, 4)), - "isValidNOutOfMLT definition is broken"); - static_assert( (isValidLT(LevelType::Undef) && isValidLT(LevelType::Dense) && isValidLT(LevelType::Compressed) && isValidLT(LevelType::CompressedNu) && diff --git a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp index 74f4d2413a6993..171faf9e008746 100644 --- a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp +++ b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp @@ -23,24 +23,17 @@ using namespace mlir; using namespace mlir::python::adaptors; static void populateDialectSparseTensorSubmodule(const py::module &m) { - py::enum_(m, "LevelType", py::module_local()) + py::enum_(m, "LevelFormat", py::module_local()) .value("dense", MLIR_SPARSE_TENSOR_LEVEL_DENSE) .value("n_out_of_m", MLIR_SPARSE_TENSOR_LEVEL_N_OUT_OF_M) .value("compressed", MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED) - .value("compressed_nu", MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NU) - .value("compressed_no", MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NO) - .value("compressed_nu_no", MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NU_NO) .value("singleton", MLIR_SPARSE_TENSOR_LEVEL_SINGLETON) - .value("singleton_nu", MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NU) - .value("singleton_no", MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NO) - .value("singleton_nu_no", MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NU_NO) - .value("loose_compressed", MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED) - .value("loose_compressed_nu", - MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NU) - .value("loose_compressed_no", - MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NO) - .value("loose_compressed_nu_no", - MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NU_NO); + .value("loose_compressed", MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED); + + py::enum_(m, "LevelProperty", + py::module_local()) + .value("non_ordered", MLIR_SPARSE_PROPERTY_NON_ORDERED) + .value("non_unique", MLIR_SPARSE_PROPERTY_NON_UNIQUE); mlir_attribute_subclass(m, "EncodingAttr", mlirAttributeIsASparseTensorEncodingAttr) @@ -62,12 +55,17 @@ static void populateDialectSparseTensorSubmodule(const py::module &m) { "Gets a sparse_tensor.encoding from parameters.") .def_classmethod( "build_level_type", - [](py::object cls, MlirBaseSparseTensorLevelType lvlType, unsigned n, - unsigned m) { - return mlirSparseTensorEncodingAttrBuildLvlType(lvlType, n, m); + [](py::object cls, MlirSparseTensorLevelFormat lvlFmt, + const std::vector + &properties, + unsigned n, unsigned m) { + return mlirSparseTensorEncodingAttrBuildLvlType( + lvlFmt, properties.data(), properties.size(), n, m); }, - py::arg("cls"), py::arg("lvl_type"), py::arg("n") = 0, - py::arg("m") = 0, + py::arg("cls"), py::arg("lvl_fmt"), + py::arg("properties") = + std::vector(), + py::arg("n") = 0, py::arg("m") = 0, "Builds a sparse_tensor.encoding.level_type from parameters.") .def_property_readonly( "lvl_types", @@ -113,17 +111,12 @@ static void populateDialectSparseTensorSubmodule(const py::module &m) { return mlirSparseTensorEncodingAttrGetStructuredM( mlirSparseTensorEncodingAttrGetLvlType(self, lvlRank - 1)); }) - .def_property_readonly("lvl_types_enum", [](MlirAttribute self) { + .def_property_readonly("lvl_formats_enum", [](MlirAttribute self) { const int lvlRank = mlirSparseTensorEncodingGetLvlRank(self); - std::vector ret; + std::vector ret; ret.reserve(lvlRank); - for (int l = 0; l < lvlRank; l++) { - // Convert level type to 32 bits to ignore n and m for n_out_of_m - // format. - ret.push_back( - static_cast(static_cast( - mlirSparseTensorEncodingAttrGetLvlType(self, l)))); - } + for (int l = 0; l < lvlRank; l++) + ret.push_back(mlirSparseTensorEncodingAttrGetLvlFmt(self, l)); return ret; }); } diff --git a/mlir/lib/CAPI/Dialect/SparseTensor.cpp b/mlir/lib/CAPI/Dialect/SparseTensor.cpp index 4e1bd45863fdac..55af8becbba20e 100644 --- a/mlir/lib/CAPI/Dialect/SparseTensor.cpp +++ b/mlir/lib/CAPI/Dialect/SparseTensor.cpp @@ -22,34 +22,23 @@ MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(SparseTensor, sparse_tensor, // Ensure the C-API enums are int-castable to C++ equivalents. static_assert( static_cast(MLIR_SPARSE_TENSOR_LEVEL_DENSE) == - static_cast(LevelType::Dense) && + static_cast(LevelFormat::Dense) && static_cast(MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED) == - static_cast(LevelType::Compressed) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NU) == - static_cast(LevelType::CompressedNu) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NO) == - static_cast(LevelType::CompressedNo) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_COMPRESSED_NU_NO) == - static_cast(LevelType::CompressedNuNo) && + static_cast(LevelFormat::Compressed) && static_cast(MLIR_SPARSE_TENSOR_LEVEL_SINGLETON) == - static_cast(LevelType::Singleton) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NU) == - static_cast(LevelType::SingletonNu) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NO) == - static_cast(LevelType::SingletonNo) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_SINGLETON_NU_NO) == - static_cast(LevelType::SingletonNuNo) && + static_cast(LevelFormat::Singleton) && static_cast(MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED) == - static_cast(LevelType::LooseCompressed) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NU) == - static_cast(LevelType::LooseCompressedNu) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NO) == - static_cast(LevelType::LooseCompressedNo) && - static_cast(MLIR_SPARSE_TENSOR_LEVEL_LOOSE_COMPRESSED_NU_NO) == - static_cast(LevelType::LooseCompressedNuNo) && + static_cast(LevelFormat::LooseCompressed) && static_cast(MLIR_SPARSE_TENSOR_LEVEL_N_OUT_OF_M) == - static_cast(LevelType::NOutOfM), - "MlirSparseTensorLevelType (C-API) and LevelType (C++) mismatch"); + static_cast(LevelFormat::NOutOfM), + "MlirSparseTensorLevelFormat (C-API) and LevelFormat (C++) mismatch"); + +static_assert(static_cast(MLIR_SPARSE_PROPERTY_NON_ORDERED) == + static_cast(LevelPropertyNondefault::Nonordered) && + static_cast(MLIR_SPARSE_PROPERTY_NON_UNIQUE) == + static_cast(LevelPropertyNondefault::Nonunique), + "MlirSparseTensorLevelProperty (C-API) and " + "LevelPropertyNondefault (C++) mismatch"); bool mlirAttributeIsASparseTensorEncodingAttr(MlirAttribute attr) { return isa(unwrap(attr)); @@ -87,6 +76,13 @@ mlirSparseTensorEncodingAttrGetLvlType(MlirAttribute attr, intptr_t lvl) { cast(unwrap(attr)).getLvlType(lvl)); } +enum MlirSparseTensorLevelFormat +mlirSparseTensorEncodingAttrGetLvlFmt(MlirAttribute attr, intptr_t lvl) { + LevelType lt = + static_cast(mlirSparseTensorEncodingAttrGetLvlType(attr, lvl)); + return static_cast(*getLevelFormat(lt)); +} + int mlirSparseTensorEncodingAttrGetPosWidth(MlirAttribute attr) { return cast(unwrap(attr)).getPosWidth(); } @@ -95,12 +91,17 @@ int mlirSparseTensorEncodingAttrGetCrdWidth(MlirAttribute attr) { return cast(unwrap(attr)).getCrdWidth(); } -MlirSparseTensorLevelType -mlirSparseTensorEncodingAttrBuildLvlType(MlirBaseSparseTensorLevelType lvlType, - unsigned n, unsigned m) { - LevelType lt = static_cast(lvlType); - return static_cast(*buildLevelType( - *getLevelFormat(lt), isOrderedLT(lt), isUniqueLT(lt), n, m)); +MlirSparseTensorLevelType mlirSparseTensorEncodingAttrBuildLvlType( + enum MlirSparseTensorLevelFormat lvlFmt, + const enum MlirSparseTensorLevelPropertyNondefault *properties, + unsigned size, unsigned n, unsigned m) { + + std::vector props; + for (unsigned i = 0; i < size; i++) + props.push_back(static_cast(properties[i])); + + return static_cast( + *buildLevelType(static_cast(lvlFmt), props, n, m)); } unsigned diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py index 199777c79ef838..e2050b98728f21 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py @@ -139,12 +139,15 @@ def main(): # search the full state space to reduce runtime of the test. It is # straightforward to adapt the code below to explore more combinations. # For these simple orderings, dim2lvl and lvl2dim are the same. + builder = st.EncodingAttr.build_level_type + fmt = st.LevelFormat + prop = st.LevelProperty levels = [ - [st.LevelType.compressed_nu, st.LevelType.singleton], - [st.LevelType.dense, st.LevelType.dense], - [st.LevelType.dense, st.LevelType.compressed], - [st.LevelType.compressed, st.LevelType.dense], - [st.LevelType.compressed, st.LevelType.compressed], + [builder(fmt.compressed, [prop.non_unique]), builder(fmt.singleton)], + [builder(fmt.dense), builder(fmt.dense)], + [builder(fmt.dense), builder(fmt.compressed)], + [builder(fmt.compressed), builder(fmt.dense)], + [builder(fmt.compressed), builder(fmt.compressed)], ] orderings = [ ir.AffineMap.get_permutation([0, 1]), diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py index 0aa4f92a7bf4ef..e7354c24d619e0 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py @@ -125,12 +125,15 @@ def main(): vl = 1 e = False opt = f"parallelization-strategy=none" + builder = st.EncodingAttr.build_level_type + fmt = st.LevelFormat + prop = st.LevelProperty levels = [ - [st.LevelType.compressed_nu, st.LevelType.singleton], - [st.LevelType.dense, st.LevelType.dense], - [st.LevelType.dense, st.LevelType.compressed], - [st.LevelType.compressed, st.LevelType.dense], - [st.LevelType.compressed, st.LevelType.compressed], + [builder(fmt.compressed, [prop.non_unique]), builder(fmt.singleton)], + [builder(fmt.dense), builder(fmt.dense)], + [builder(fmt.dense), builder(fmt.compressed)], + [builder(fmt.compressed), builder(fmt.dense)], + [builder(fmt.compressed), builder(fmt.compressed)], ] orderings = [ ir.AffineMap.get_permutation([0, 1]), diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py index d994e8d0a8a19d..7da05303c7e1e1 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py @@ -124,11 +124,14 @@ def main(): # Loop over various sparse types (COO, CSR, DCSR, CSC, DCSC) with # regular and loose compression and various metadata bitwidths. # For these simple orderings, dim2lvl and lvl2dim are the same. + builder = st.EncodingAttr.build_level_type + fmt = st.LevelFormat + prop = st.LevelProperty levels = [ - [st.LevelType.compressed_nu, st.LevelType.singleton], - [st.LevelType.dense, st.LevelType.compressed], - [st.LevelType.dense, st.LevelType.loose_compressed], - [st.LevelType.compressed, st.LevelType.compressed], + [builder(fmt.compressed, [prop.non_unique]), builder(fmt.singleton)], + [builder(fmt.dense), builder(fmt.compressed)], + [builder(fmt.dense), builder(fmt.loose_compressed)], + [builder(fmt.compressed), builder(fmt.compressed)], ] orderings = [ (ir.AffineMap.get_permutation([0, 1]), 0), @@ -149,10 +152,10 @@ def main(): # Now do the same for BSR. level = [ - st.LevelType.dense, - st.LevelType.compressed, - st.LevelType.dense, - st.LevelType.dense, + builder(fmt.dense), + builder(fmt.compressed), + builder(fmt.dense), + builder(fmt.dense), ] d0 = ir.AffineDimExpr.get(0) d1 = ir.AffineDimExpr.get(1) diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py index 2b79c1416562dc..ce3516e2edaf03 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_stress.py @@ -203,10 +203,10 @@ def main(): shape = range(2, 3) rank = len(shape) # All combinations. + dense_lvl = st.EncodingAttr.build_level_type(st.LevelFormat.dense) + sparse_lvl = st.EncodingAttr.build_level_type(st.LevelFormat.compressed) levels = list( - itertools.product( - *itertools.repeat([st.LevelType.dense, st.LevelType.compressed], rank) - ) + itertools.product(*itertools.repeat([dense_lvl, sparse_lvl], rank)) ) # All permutations. orderings = list( diff --git a/mlir/test/python/dialects/sparse_tensor/dialect.py b/mlir/test/python/dialects/sparse_tensor/dialect.py index 1fa7030ca1be91..2c0603216ef2c2 100644 --- a/mlir/test/python/dialects/sparse_tensor/dialect.py +++ b/mlir/test/python/dialects/sparse_tensor/dialect.py @@ -73,8 +73,8 @@ def testEncodingAttrStructure(): # CHECK: lvl_types: [65536, 65536, 4406637494272] print(f"lvl_types: {casted.lvl_types}") - # CHECK: lvl_types_enum: [, , ] - print(f"lvl_types_enum: {casted.lvl_types_enum}") + # CHECK: lvl_formats_enum: [, , ] + print(f"lvl_formats_enum: {casted.lvl_formats_enum}") # CHECK: structured_n: 2 print(f"structured_n: {casted.structured_n}") # CHECK: structured_m: 4 @@ -96,7 +96,10 @@ def testEncodingAttrStructure(): # CHECK: created_equal: False print(f"created_equal: {created == casted}") - built_2_4 = st.EncodingAttr.build_level_type(st.LevelType.n_out_of_m, 2, 4) + built_2_4 = st.EncodingAttr.build_level_type( + st.LevelFormat.n_out_of_m, [], 2, 4 + ) + built_dense = st.EncodingAttr.build_level_type(st.LevelFormat.dense) dim_to_lvl = AffineMap.get( 2, 0, @@ -118,7 +121,7 @@ def testEncodingAttrStructure(): ], ) built = st.EncodingAttr.get( - [st.LevelType.dense, st.LevelType.dense, built_2_4], + [built_dense, built_dense, built_2_4], dim_to_lvl, lvl_to_dim, 0, From 1ec81976e4937ea9a09cefd41f25b3c5d1394f2c Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Tue, 13 Feb 2024 16:48:42 -0800 Subject: [PATCH 082/240] Temporarily skip this test for Python 3.9. When the parsed command python code is run on 3.9, I get: File ".../lib/python3.9/site-packages/lldb/plugins/parsed_cmd.py", line 124, in translate_value return cls.translators[value_type](value) TypeError: 'staticmethod' object is not callable But this works correctly in Python 3.10 on macOS and Linux. I'm guessing something changed between those versions, and I'll have to do something to work around the difference. But I'm going to skip the test on 3.9 while I figure that out. --- .../test/API/commands/command/script/add/TestAddParsedCommand.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py index bbf330500568b5..6cbe888af99dc6 100644 --- a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py +++ b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py @@ -15,6 +15,7 @@ class ParsedCommandTestCase(TestBase): # This crashes on the x86_64 Debian bot, but the failure is not helpful. # Disable the test while I try to find a way to reproduce. + @skipIf(py_version=("<=", (3, 9))) def test(self): self.pycmd_tests() From 7180c23cf695dcfaa62fad4141afc7d7bdc707c1 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 13 Feb 2024 17:10:23 -0800 Subject: [PATCH 083/240] [SeparateConstOffsetFromGEP] Reland: Reorder trivial GEP chains to separate constants (#81671) Actually update tests w.r.t https://github.com/llvm/llvm-project/commit/9e5a77f252badfc932d1e28ee998746072ddc33f and reland https://github.com/llvm/llvm-project/pull/73056 --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 73 ++++- ...ne-sink-temporal-divergence-swdev407790.ll | 34 ++- .../AMDGPU/splitkit-getsubrangeformask.ll | 251 ++++++++---------- llvm/test/CodeGen/PowerPC/licm-remat.ll | 2 +- .../AMDGPU/reorder-gep-inbounds.ll | 51 ++++ .../AMDGPU/reorder-gep.ll | 175 ++++++++++++ .../NVPTX/lower-gep-reorder.ll | 65 +++++ .../SeparateConstOffsetFromGEP/reorder-gep.ll | 188 +++++++++++++ 8 files changed, 682 insertions(+), 157 deletions(-) create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4481375054ecf1..5124909696aadb 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -391,6 +391,11 @@ class SeparateConstOffsetFromGEP { /// and returns true if the splitting succeeds. bool splitGEP(GetElementPtrInst *GEP); + /// Tries to reorder the given GEP with the GEP that produces the base if + /// doing so results in producing a constant offset as the outermost + /// index. + bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI); + /// Lower a GEP with multiple indices into multiple GEPs with a single index. /// Function splitGEP already split the original GEP into a variadic part and /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the @@ -964,6 +969,66 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, Variadic->eraseFromParent(); } +bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, + TargetTransformInfo &TTI) { + Type *GEPType = GEP->getResultElementType(); + // TODO: support reordering for non-trivial GEP chains + if (GEPType->isAggregateType() || GEP->getNumIndices() != 1) + return false; + + auto PtrGEP = dyn_cast(GEP->getPointerOperand()); + if (!PtrGEP) + return false; + Type *PtrGEPType = PtrGEP->getResultElementType(); + // TODO: support reordering for non-trivial GEP chains + if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1) + return false; + + // TODO: support reordering for non-trivial GEP chains + if (PtrGEPType != GEPType || + PtrGEP->getSourceElementType() != GEP->getSourceElementType()) + return false; + + bool NestedNeedsExtraction; + int64_t NestedByteOffset = + accumulateByteOffset(PtrGEP, NestedNeedsExtraction); + if (!NestedNeedsExtraction) + return false; + + unsigned AddrSpace = PtrGEP->getPointerAddressSpace(); + if (!TTI.isLegalAddressingMode(GEP->getResultElementType(), + /*BaseGV=*/nullptr, NestedByteOffset, + /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) + return false; + + IRBuilder<> Builder(GEP); + Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); + bool GEPInBounds = GEP->isInBounds(); + bool PtrGEPInBounds = PtrGEP->isInBounds(); + bool IsChainInBounds = GEPInBounds && PtrGEPInBounds; + if (IsChainInBounds) { + auto GEPIdx = GEP->indices().begin(); + auto KnownGEPIdx = computeKnownBits(GEPIdx->get(), *DL); + IsChainInBounds &= KnownGEPIdx.isNonNegative(); + if (IsChainInBounds) { + auto PtrGEPIdx = GEP->indices().begin(); + auto KnownPtrGEPIdx = computeKnownBits(PtrGEPIdx->get(), *DL); + IsChainInBounds &= KnownPtrGEPIdx.isNonNegative(); + } + } + + // For trivial GEP chains, we can swap the indicies. + auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(), + SmallVector(GEP->indices())); + cast(NewSrc)->setIsInBounds(IsChainInBounds); + auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, + SmallVector(PtrGEP->indices())); + cast(NewGEP)->setIsInBounds(IsChainInBounds); + GEP->replaceAllUsesWith(NewGEP); + RecursivelyDeleteTriviallyDeadInstructions(GEP); + return true; +} + bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Skip vector GEPs. if (GEP->getType()->isVectorTy()) @@ -979,11 +1044,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { bool NeedsExtraction; int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); - if (!NeedsExtraction) - return Changed; - TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); + if (!NeedsExtraction) { + Changed |= reorderGEP(GEP, TTI); + return Changed; + } + // If LowerGEP is disabled, before really splitting the GEP, check whether the // backend supports the addressing mode we are about to produce. If no, this // splitting probably won't be beneficial. diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 138a6a86cee984..0bb5288f43efc8 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -273,11 +273,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v58 ; CHECK-NEXT: s_branch .LBB0_7 -; CHECK-NEXT: .LBB0_16: ; %Flow43 +; CHECK-NEXT: .LBB0_16: ; %Flow45 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53 ; CHECK-NEXT: v_mov_b32_e32 v57, v0 -; CHECK-NEXT: .LBB0_17: ; %Flow44 +; CHECK-NEXT: .LBB0_17: ; %Flow46 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: s_mov_b32 s49, exec_lo @@ -323,11 +323,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v57 ; CHECK-NEXT: s_branch .LBB0_19 -; CHECK-NEXT: .LBB0_22: ; %Flow41 +; CHECK-NEXT: .LBB0_22: ; %Flow43 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 -; CHECK-NEXT: .LBB0_23: ; %Flow42 +; CHECK-NEXT: .LBB0_23: ; %Flow44 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s49 ; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1 @@ -340,7 +340,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_or_b32 s43, s4, s43 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s43 ; CHECK-NEXT: s_cbranch_execnz .LBB0_5 -; CHECK-NEXT: .LBB0_25: ; %Flow49 +; CHECK-NEXT: .LBB0_25: ; %Flow51 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s42 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 @@ -362,12 +362,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v41 ; CHECK-NEXT: s_cbranch_execz .LBB0_33 ; CHECK-NEXT: ; %bb.26: -; CHECK-NEXT: s_add_u32 s42, s44, 8 -; CHECK-NEXT: s_addc_u32 s43, s45, 0 -; CHECK-NEXT: s_mov_b32 s44, 0 +; CHECK-NEXT: s_mov_b32 s42, 0 ; CHECK-NEXT: s_branch .LBB0_28 ; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s43 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_add_u32 s8, s34, 40 @@ -383,12 +381,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7] ; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41 -; CHECK-NEXT: s_or_b32 s44, vcc_lo, s44 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44 +; CHECK-NEXT: s_or_b32 s42, vcc_lo, s42 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s42 ; CHECK-NEXT: s_cbranch_execz .LBB0_33 ; CHECK-NEXT: .LBB0_28: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41 -; CHECK-NEXT: s_mov_b32 s45, exec_lo +; CHECK-NEXT: s_mov_b32 s43, exec_lo ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0 @@ -397,15 +395,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62 ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72 -; CHECK-NEXT: v_add_co_u32 v2, s4, s42, v1 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s43, 0, s4 +; CHECK-NEXT: v_add_co_u32 v2, s4, s44, v1 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s45, 0, s4 ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 ; CHECK-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; CHECK-NEXT: s_clause 0x1 -; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:8 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_xor_b32_e32 v46, v9, v5 ; CHECK-NEXT: v_xor_b32_e32 v45, v8, v4 @@ -417,8 +415,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_cbranch_execz .LBB0_27 ; CHECK-NEXT: ; %bb.29: ; in Loop: Header=BB0_28 Depth=1 ; CHECK-NEXT: s_clause 0x1 -; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16 -; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16 +; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:24 +; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:24 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v45 ; CHECK-NEXT: v_alignbit_b32 v1, v46, v45, 12 ; CHECK-NEXT: v_and_b32_e32 v2, 0xf0000, v45 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index 8c806e76bde6ec..b87439a9d6fae7 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -31,205 +31,188 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10 ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8 ; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: KILL undef %125:sgpr_128 ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc + ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc + ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc + ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: KILL undef %74:sreg_64 - ; CHECK-NEXT: KILL undef %132:sgpr_128 ; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: KILL undef %89:sgpr_128 - ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc - ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc + ; CHECK-NEXT: KILL undef %118:sgpr_128 ; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY6]], 64, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 0, 0 :: (invariant load (s128) from %ir.87, addrspace 4) - ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY7]].sub0, 224, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_9]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY7]].sub0, 576, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_13]], undef %171:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4) + ; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1 + ; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1 + ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %312:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %301:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %367:sgpr_128, undef %368:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %378:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 0, 0 :: (invariant load (s128) from %ir.92, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 0, 0 :: (invariant load (s128) from %ir.97, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.104, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.109, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.114, addrspace 4) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %356:sgpr_128, undef %357:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %367:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %373:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %351:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 0, 0 :: (invariant load (s128) from %ir.121, addrspace 4) - ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %394:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.126, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.127, addrspace 4) + ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %383:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.132, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.137, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -313, implicit-def dead $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4) ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], 160, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_25:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_24]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_25:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_26:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_24]], [[S_LSHL_B32_4]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_26:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ADD_U32_27:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, 168, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.147, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_28:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_5]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_28:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_28]], 0, 0 :: (invariant load (s32) from %ir.269, align 8, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.154, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 0, 0 :: (invariant load (s128) from %ir.159, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 0, 0 :: (invariant load (s128) from %ir.167, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4) ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1 ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_18]], 0, 0 :: (invariant load (s128) from %ir.175, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_19]], 0, 0 :: (invariant load (s128) from %ir.180, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_29:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_6]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_29:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_29]], 0, 0 :: (invariant load (s64) from %ir.277, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_20]], 0, 0 :: (invariant load (s128) from %ir.202, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_21]], 0, 0 :: (invariant load (s128) from %ir.208, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 0, 0 :: (invariant load (s128) from %ir.213, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4) ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 0, 0 :: (invariant load (s128) from %ir.218, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_30:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_7]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_30:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_30]], 0, 0 :: (invariant load (s64) from %ir.287, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0 ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_25]], 0, 0 :: (invariant load (s128) from %ir.253, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %484:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) - ; CHECK-NEXT: KILL [[S_ADD_U32_25]].sub0, [[S_ADD_U32_25]].sub1 - ; CHECK-NEXT: KILL undef %484:sreg_64 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %469:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1 + ; CHECK-NEXT: KILL undef %469:sreg_64 ; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3 ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_26]], 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_31:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_27]], [[S_LSHL_B32_8]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_31:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_31]], 0, 0 :: (invariant load (s32) from %ir.298, align 8, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] @@ -241,24 +224,22 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_32:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 96, implicit-def $scc - ; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_33:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_33:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_33]], 0, 0 :: (invariant load (s128) from %ir.316, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_34:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_1]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_34:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_34]], 0, 0 :: (invariant load (s128) from %ir.321, addrspace 4) - ; CHECK-NEXT: undef [[S_ADD_U32_35:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_32]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_U32_35:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_35]], 0, 0 :: (invariant load (s128) from %ir.326, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.329, addrspace 4) + ; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.335, addrspace 4) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] - ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] + ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec @@ -370,13 +351,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec - ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %559:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %542:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4) ; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec ; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec ; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 [[V_CNDMASK_B32_e64_]], undef %573:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) + ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 [[V_CNDMASK_B32_e64_]], undef %556:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/CodeGen/PowerPC/licm-remat.ll b/llvm/test/CodeGen/PowerPC/licm-remat.ll index ffdaf5d2481e3b..cf3e3ac089a498 100644 --- a/llvm/test/CodeGen/PowerPC/licm-remat.ll +++ b/llvm/test/CodeGen/PowerPC/licm-remat.ll @@ -21,7 +21,7 @@ define linkonce_odr void @ZN6snappyDecompressor_(ptr %this, ptr %writer) { ; CHECK-LABEL: ZN6snappyDecompressor_: ; CHECK: # %bb.0: # %entry ; CHECK: addis 4, 2, .L__ModuleStringPool@toc@ha -; CHECK: addi 25, 4, .L__ModuleStringPool@toc@l +; CHECK: addi 26, 4, .L__ModuleStringPool@toc@l ; CHECK: .LBB0_2: # %for.cond ; CHECK-NOT: addis {{[0-9]+}}, 2, .L__ModuleStringPool@toc@ha ; CHECK: bctrl diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll new file mode 100644 index 00000000000000..c24bbd5f658f94 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -S -passes=separate-const-offset-from-gep < %s | FileCheck %s + +define void @inboundsPossiblyNegative(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsPossiblyNegative( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1 + ret void +} + +define void @inboundsNonNegative(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonNegative( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i32 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 + %idx1 = getelementptr inbounds <2 x i8>, ptr %const1, i32 %in.idx1.nneg + ret void +} + +define void @inboundsNonchained(ptr %in.ptr, i32 %in.idx1) { +; CHECK-LABEL: define void @inboundsNonchained( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <2 x i8>, ptr [[IN_PTR]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <2 x i8>, ptr [[TMP0]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %in.idx1.nneg = and i32 %in.idx1, 2147483647 + %const1 = getelementptr inbounds <2 x i8>, ptr %in.ptr, i32 1 + %idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll new file mode 100644 index 00000000000000..7137f0fb66fdb9 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --start-before=separate-const-offset-from-gep < %s | FileCheck %s + +define protected amdgpu_kernel void @sink_addr(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: sink_addr: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_lshl_b32 s3, s1, 1 +; CHECK-NEXT: s_add_i32 s0, s0, s3 +; CHECK-NEXT: s_lshl_b32 s2, s2, 1 +; CHECK-NEXT: s_add_i32 s0, s0, s2 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v12 +; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512 +; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB0_2: ; %end +; CHECK-NEXT: s_add_i32 s1, s0, 0x200 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: s_add_i32 s2, s0, 0x400 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: s_add_i32 s3, s0, 0x600 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i32 256 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i32 512 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i32 768 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} + +define protected amdgpu_kernel void @illegal_addr_mode(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) { +; CHECK-LABEL: illegal_addr_mode: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_lshl_b32 s0, s5, 1 +; CHECK-NEXT: s_lshl_b32 s1, s6, 1 +; CHECK-NEXT: s_add_i32 s3, s4, s0 +; CHECK-NEXT: s_add_i32 s3, s3, s1 +; CHECK-NEXT: s_add_i32 s2, s3, 0x12a60 +; CHECK-NEXT: s_add_i32 s1, s3, 0x12c60 +; CHECK-NEXT: s_add_i32 s0, s3, 0x12ed8 +; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %bb.1 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_mov_b32_e32 v4, s2 +; CHECK-NEXT: v_mov_b32_e32 v8, s1 +; CHECK-NEXT: v_mov_b32_e32 v12, s0 +; CHECK-NEXT: ds_read_b128 v[0:3], v0 +; CHECK-NEXT: ds_read_b128 v[4:7], v4 +; CHECK-NEXT: ds_read_b128 v[8:11], v8 +; CHECK-NEXT: ds_read_b128 v[12:15], v12 +; CHECK-NEXT: s_waitcnt lgkmcnt(3) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(2) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[8:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v[12:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB1_2: ; %end +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s1 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0 + %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1 + %const1 = getelementptr half, ptr addrspace(3) %base, i32 38192 + %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1 + %const2 = getelementptr half, ptr addrspace(3) %base, i32 38448 + %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1 + %const3 = getelementptr half, ptr addrspace(3) %base, i32 38764 + %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1 + %cmp0 = icmp eq i32 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16 + %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16 + %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16 + %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x half> %val0) + call void asm sideeffect "; use $0", "v"(<8 x half> %val1) + call void asm sideeffect "; use $0", "v"(<8 x half> %val2) + call void asm sideeffect "; use $0", "v"(<8 x half> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2) + call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3) + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll new file mode 100644 index 00000000000000..a91c8172177f9d --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lower-gep-reorder.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -mtriple=nvptx64-nvidia-cuda -S -passes=separate-const-offset-from-gep < %s | FileCheck %s + +define protected amdgpu_kernel void @sink_addr(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define protected amdgpu_kernel void @sink_addr( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 +; CHECK-NEXT: [[CONST11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 2048 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST11]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 +; CHECK-NEXT: [[CONST22:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4096 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST22]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 [[IN_IDX0]], i64 0 +; CHECK-NEXT: [[CONST33:%.*]] = getelementptr i8, ptr [[TMP2]], i64 6144 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST33]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %idx0 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 %in.idx1 + %const1 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 256 + %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 + %const2 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 512 + %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 + %const3 = getelementptr [8192 x i64], ptr %in.ptr, i64 %in.idx0, i64 768 + %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll new file mode 100644 index 00000000000000..a15f11a634db5d --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=separate-const-offset-from-gep < %s | FileCheck %s + +define void @illegal_addr_mode(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @illegal_addr_mode( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i64, ptr [[BASE]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i64, ptr [[BASE]], i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i64, ptr [[BASE]], i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i64, ptr [[BASE]], i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr i64, ptr %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i64, ptr %base, i64 %in.idx1 + %const1 = getelementptr i64, ptr %base, i64 256 + %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 + %const2 = getelementptr i64, ptr %base, i64 512 + %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 + %const3 = getelementptr i64, ptr %base, i64 768 + %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} + + +define void @multi_index_reorder(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @multi_index_reorder( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr [8192 x i64], ptr [[IN_PTR]], i64 0, i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %idx0 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 %in.idx1 + %const1 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 256 + %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 + %const2 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 512 + %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 + %const3 = getelementptr [8192 x i64], ptr %in.ptr, i64 0, i64 768 + %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} + + +define void @different_type_reorder(ptr %in.ptr, i64 %in.idx0, i64 %in.idx1) { +; CHECK-LABEL: define void @different_type_reorder( +; CHECK-SAME: ptr [[IN_PTR:%.*]], i64 [[IN_IDX0:%.*]], i64 [[IN_IDX1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BASE:%.*]] = getelementptr i64, ptr [[IN_PTR]], i64 [[IN_IDX0]] +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i64, ptr [[BASE]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST1:%.*]] = getelementptr i8, ptr [[BASE]], i64 256 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i64, ptr [[CONST1]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST2:%.*]] = getelementptr i8, ptr [[BASE]], i64 512 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i64, ptr [[CONST2]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CONST3:%.*]] = getelementptr i8, ptr [[BASE]], i64 768 +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i64, ptr [[CONST3]], i64 [[IN_IDX1]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 [[IN_IDX0]], 0 +; CHECK-NEXT: br i1 [[CMP0]], label [[BB_1:%.*]], label [[END:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL0:%.*]] = load <8 x i64>, ptr [[IDX0]], align 16 +; CHECK-NEXT: [[VAL1:%.*]] = load <8 x i64>, ptr [[IDX1]], align 16 +; CHECK-NEXT: [[VAL2:%.*]] = load <8 x i64>, ptr [[IDX2]], align 16 +; CHECK-NEXT: [[VAL3:%.*]] = load <8 x i64>, ptr [[IDX3]], align 16 +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; +entry: + %base = getelementptr i64, ptr %in.ptr, i64 %in.idx0 + %idx0 = getelementptr i64, ptr %base, i64 %in.idx1 + %const1 = getelementptr i8, ptr %base, i64 256 + %idx1 = getelementptr i64, ptr %const1, i64 %in.idx1 + %const2 = getelementptr i8, ptr %base, i64 512 + %idx2 = getelementptr i64, ptr %const2, i64 %in.idx1 + %const3 = getelementptr i8, ptr %base, i64 768 + %idx3 = getelementptr i64, ptr %const3, i64 %in.idx1 + %cmp0 = icmp eq i64 %in.idx0, 0 + br i1 %cmp0, label %bb.1, label %end + +bb.1: + %val0 = load <8 x i64>, ptr %idx0, align 16 + %val1 = load <8 x i64>, ptr %idx1, align 16 + %val2 = load <8 x i64>, ptr %idx2, align 16 + %val3 = load <8 x i64>, ptr %idx3, align 16 + call void asm sideeffect "; use $0", "v"(<8 x i64> %val0) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val1) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val2) + call void asm sideeffect "; use $0", "v"(<8 x i64> %val3) + br label %end + +end: + call void asm sideeffect "; use $0", "v"(ptr %idx0) + call void asm sideeffect "; use $0", "v"(ptr %idx1) + call void asm sideeffect "; use $0", "v"(ptr %idx2) + call void asm sideeffect "; use $0", "v"(ptr %idx3) + ret void +} From 29d1aca05caa06e30d7a76ee15fd166fa32e1043 Mon Sep 17 00:00:00 2001 From: erman-gurses <99776114+erman-gurses@users.noreply.github.com> Date: Tue, 13 Feb 2024 20:42:04 -0500 Subject: [PATCH 084/240] [AMDGPU][MLIR]Add shmem-optimization as an op using transform dialect (#81550) This PR adds functionality to use shared memory optimization as an op using transform dialect. --- .../mlir/Dialect/AMDGPU/CMakeLists.txt | 1 + .../AMDGPU/TransformOps/AMDGPUTransformOps.h | 48 ++++++++++++++ .../AMDGPU/TransformOps/AMDGPUTransformOps.td | 45 +++++++++++++ .../AMDGPU/TransformOps/CMakeLists.txt | 4 ++ .../Dialect/AMDGPU/Transforms/Transforms.h | 3 + mlir/include/mlir/InitAllExtensions.h | 2 + mlir/lib/Dialect/AMDGPU/CMakeLists.txt | 3 +- .../TransformOps/AMDGPUTransformOps.cpp | 66 +++++++++++++++++++ .../AMDGPU/TransformOps/CMakeLists.txt | 25 +++++++ .../Transforms/OptimizeSharedMemory.cpp | 48 +++++++++----- ...transform_optimize_shmem_reads_writes.mlir | 64 ++++++++++++++++++ .../llvm-project-overlay/mlir/BUILD.bazel | 66 +++++++++++++++++++ 12 files changed, 356 insertions(+), 19 deletions(-) create mode 100644 mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h create mode 100644 mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td create mode 100644 mlir/include/mlir/Dialect/AMDGPU/TransformOps/CMakeLists.txt create mode 100644 mlir/lib/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp create mode 100644 mlir/lib/Dialect/AMDGPU/TransformOps/CMakeLists.txt create mode 100644 mlir/test/Dialect/AMDGPU/transform_optimize_shmem_reads_writes.mlir diff --git a/mlir/include/mlir/Dialect/AMDGPU/CMakeLists.txt b/mlir/include/mlir/Dialect/AMDGPU/CMakeLists.txt index 9f57627c321fb0..660deb21479d29 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/AMDGPU/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) +add_subdirectory(TransformOps) add_subdirectory(Transforms) diff --git a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h new file mode 100644 index 00000000000000..4fb4ab08a0da34 --- /dev/null +++ b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h @@ -0,0 +1,48 @@ +//===- AMDGPUTransformOps.h - AMDGPU transform ops ---------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_AMDGPU_TRANSFORMOPS_AMDGPUTRANSFORMOPS_H +#define MLIR_DIALECT_AMDGPU_TRANSFORMOPS_AMDGPUTRANSFORMOPS_H + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Transform/IR/TransformAttrs.h" +#include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Dialect/Transform/IR/TransformInterfaces.h" +#include "mlir/IR/OpImplementation.h" +#include "mlir/IR/RegionKindInterface.h" + +namespace mlir { +namespace transform { +class TransformHandleTypeInterface; +} // namespace transform +} // namespace mlir + +namespace mlir { +class DialectRegistry; + +namespace linalg { +class LinalgOp; +} // namespace linalg + +namespace scf { +class ForOp; +} // namespace scf + +namespace amdgpu { +void registerTransformDialectExtension(DialectRegistry ®istry); +} // namespace amdgpu +} // namespace mlir + +//===----------------------------------------------------------------------===// +// AMDGPU Transform Operations +//===----------------------------------------------------------------------===// + +#define GET_OP_CLASSES +#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h.inc" + +#endif // MLIR_DIALECT_AMDGPU_TRANSFORMOPS_AMDGPUTRANSFORMOPS_H diff --git a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td new file mode 100644 index 00000000000000..23873d86b495c6 --- /dev/null +++ b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td @@ -0,0 +1,45 @@ +//===- AMDGPUTransformOps.td - AMDGPU transform ops --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_TRANSFORM_OPS +#define AMDGPU_TRANSFORM_OPS + +include "mlir/Dialect/Transform/IR/TransformAttrs.td" +include "mlir/Dialect/Transform/IR/TransformDialect.td" +include "mlir/Dialect/Transform/IR/TransformInterfaces.td" +include "mlir/Dialect/Transform/IR/TransformTypes.td" +include "mlir/Interfaces/SideEffectInterfaces.td" + +//===----------------------------------------------------------------------===// +// ApplyOptimizeSharedMemoryReadsAndWritesOp +//===----------------------------------------------------------------------===// + +def ApplyOptimizeSharedMemoryReadsAndWritesOp : + Op, + TransformOpInterface, TransformEachOpTrait]> { + let summary = "Reduce shared memory bank conflicts"; + let description = [{ This op attempts to optimize GPU Shared memory + reads/writes with the goal of avoiding bank conflicts. + }]; + + let arguments = (ins TransformHandleTypeInterface:$target); + let results = (outs); + + let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)"; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure applyToOne( + ::mlir::transform::TransformRewriter &rewriter, + ::mlir::func::FuncOp funcOp, + ::mlir::transform::ApplyToEachResultList &results, + ::mlir::transform::TransformState &state); + }]; +} + +#endif // AMDGPU_TRANSFORM_OPS diff --git a/mlir/include/mlir/Dialect/AMDGPU/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/CMakeLists.txt new file mode 100644 index 00000000000000..07bfebc9f96d2e --- /dev/null +++ b/mlir/include/mlir/Dialect/AMDGPU/TransformOps/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_TARGET_DEFINITIONS AMDGPUTransformOps.td) +mlir_tablegen(AMDGPUTransformOps.h.inc -gen-op-decls) +mlir_tablegen(AMDGPUTransformOps.cpp.inc -gen-op-defs) +add_public_tablegen_target(MLIRAMDGPUTransformOpsIncGen) diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Transforms.h b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Transforms.h index 140bc12deed690..b4e9ad27003db1 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Transforms.h @@ -14,6 +14,7 @@ #ifndef MLIR_DIALECT_AMDGPU_TRANSFORMS_TRANSFORMS_H_ #define MLIR_DIALECT_AMDGPU_TRANSFORMS_TRANSFORMS_H_ +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Operation.h" #include "mlir/Support/LogicalResult.h" @@ -48,6 +49,8 @@ namespace amdgpu { mlir::LogicalResult optimizeSharedMemoryReadsAndWrites(Operation *parentOp, Value memrefValue); +void optimizeSharedMemoryReadsAndWritesOp(mlir::func::FuncOp funcOp); + } // namespace amdgpu } // namespace mlir diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h index 7708ca5571de3b..b31fb26f00f8f4 100644 --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -23,6 +23,7 @@ #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" #include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h" #include "mlir/Conversion/UBToLLVM/UBToLLVM.h" +#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h" #include "mlir/Dialect/Affine/TransformOps/AffineTransformOps.h" #include "mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.h" #include "mlir/Dialect/Func/Extensions/AllExtensions.h" @@ -66,6 +67,7 @@ inline void registerAllExtensions(DialectRegistry ®istry) { ub::registerConvertUBToLLVMInterface(registry); // Register all transform dialect extensions. + amdgpu::registerTransformDialectExtension(registry); affine::registerTransformDialectExtension(registry); bufferization::registerTransformDialectExtension(registry); func::registerTransformDialectExtension(registry); diff --git a/mlir/lib/Dialect/AMDGPU/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/CMakeLists.txt index 31167e6af908b9..63b4d8b99f53fd 100644 --- a/mlir/lib/Dialect/AMDGPU/CMakeLists.txt +++ b/mlir/lib/Dialect/AMDGPU/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(IR) -add_subdirectory(Transforms) add_subdirectory(Utils) +add_subdirectory(TransformOps) +add_subdirectory(Transforms) diff --git a/mlir/lib/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp b/mlir/lib/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp new file mode 100644 index 00000000000000..ff29f9f6938535 --- /dev/null +++ b/mlir/lib/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp @@ -0,0 +1,66 @@ +//===- AMDGPUTransformOps.cpp - Implementation of AMDGPU transform ops-----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h" + +#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" +#include "mlir/Dialect/AMDGPU/Transforms/Transforms.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" + +using namespace mlir; +using namespace mlir::amdgpu; +using namespace mlir::transform; +using namespace mlir::func; + +#define DEBUG_TYPE "amdgpu-transforms" +#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") +#define DBGSNL() (llvm::dbgs() << "\n") +#define LDBG(X) LLVM_DEBUG(DBGS() << (X) << "\n") + +DiagnosedSilenceableFailure +ApplyOptimizeSharedMemoryReadsAndWritesOp::applyToOne( + TransformRewriter &rewriter, FuncOp funcOp, ApplyToEachResultList &results, + TransformState &state) { + optimizeSharedMemoryReadsAndWritesOp(funcOp); + return DiagnosedSilenceableFailure::success(); +} + +void ApplyOptimizeSharedMemoryReadsAndWritesOp::getEffects( + SmallVectorImpl &effects) { + onlyReadsHandle(getTarget(), effects); + modifiesPayload(effects); +} + +//===----------------------------------------------------------------------===// +// Transform op registration +//===----------------------------------------------------------------------===// + +namespace { +class AMDGPUTransformDialectExtension + : public TransformDialectExtension { +public: + AMDGPUTransformDialectExtension() { + declareGeneratedDialect(); + declareGeneratedDialect(); + declareGeneratedDialect(); + declareGeneratedDialect(); + registerTransformOps< +#define GET_OP_LIST +#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp.inc" + >(); + } +}; +} // namespace + +#define GET_OP_CLASSES +#include "mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp.inc" + +void amdgpu::registerTransformDialectExtension(DialectRegistry ®istry) { + registry.addExtensions(); +} diff --git a/mlir/lib/Dialect/AMDGPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/TransformOps/CMakeLists.txt new file mode 100644 index 00000000000000..c39a3b55eabca4 --- /dev/null +++ b/mlir/lib/Dialect/AMDGPU/TransformOps/CMakeLists.txt @@ -0,0 +1,25 @@ +add_mlir_dialect_library(MLIRAMDGPUTransformOps + AMDGPUTransformOps.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/AMDGPU/TransformOps + + DEPENDS + MLIRAMDGPUTransformOpsIncGen + + LINK_LIBS PUBLIC + MLIRAffineDialect + MLIRArithDialect + MLIRIR + MLIRLinalgDialect + MLIRAMDGPUDialect + MLIRAMDGPUTransforms + MLIRParser + MLIRSideEffectInterfaces + MLIRSCFDialect + MLIRSCFTransforms + MLIRTransformDialect + MLIRTransformDialectUtils + MLIRVectorTransforms + + ) diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp index c7001fc6d57d5f..7c50a876e78f45 100644 --- a/mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp +++ b/mlir/lib/Dialect/AMDGPU/Transforms/OptimizeSharedMemory.cpp @@ -24,8 +24,6 @@ #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Support/LogicalResult.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/MathExtras.h" namespace mlir { namespace amdgpu { @@ -52,12 +50,12 @@ constexpr int64_t kDefaultVectorSizeBits = 64; static Value permuteVectorOffset(OpBuilder &b, Location loc, ArrayRef indices, MemRefType memrefTy, int64_t srcDim, int64_t tgtDim) { - // Adjust the src index to change how often the permutation changes - // if necessary. + /// Adjust the src index to change how often the permutation changes + /// if necessary. Value src = indices[srcDim]; - // We only want to permute every N iterations of the target dim where N is - // ceil(sharedMemoryLineSizeBytes / dimSizeBytes(tgtDim)). + /// We only want to permute every N iterations of the target dim where N is + /// ceil(sharedMemoryLineSizeBytes / dimSizeBytes(tgtDim)). const int64_t permuteEveryN = std::max( 1, kSharedMemoryLineSizeBytes / ((memrefTy.getDimSize(tgtDim) * memrefTy.getElementTypeBitWidth()) / @@ -83,8 +81,8 @@ static Value permuteVectorOffset(OpBuilder &b, Location loc, Value srcBits = b.create(loc, mask); srcBits = b.create(loc, src, srcBits); - // Use the src bits to permute the target bits b[N:M] containing the - // vector offset. + /// Use the src bits to permute the target bits b[N:M] containing the + /// vector offset. if (permuteEveryN > 1) { int64_t shlBits = n - llvm::Log2_64(permuteEveryN); if (shlBits > 0) { @@ -133,8 +131,8 @@ getShmReadAndWriteOps(Operation *parentOp, Value shmMemRef, writeOps.push_back(op); }); - // Restrict to a supported set of ops. We also require at least 2D access, - // although this could be relaxed. + /// Restrict to a supported set of ops. We also require at least 2D access, + /// although this could be relaxed. if (llvm::any_of(readOps, [](Operation *op) { return !isa( op) || @@ -159,15 +157,15 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp, !amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(memRefType)) return failure(); - // Abort if the given value has any sub-views; we do not do any alias - // analysis. + /// Abort if the given value has any sub-views; we do not do any alias + /// analysis. bool hasSubView = false; parentOp->walk([&](memref::SubViewOp subView) { hasSubView = true; }); if (hasSubView) return failure(); - // Check if this is necessary given the assumption of 128b accesses: - // If dim[rank-1] is small enough to fit 8 rows in a 128B line. + /// Check if this is necessary given the assumption of 128b accesses: + /// If dim[rank-1] is small enough to fit 8 rows in a 128B line. const int64_t rowSize = memRefType.getDimSize(memRefType.getRank() - 1); const int64_t rowsPerLine = (8 * kSharedMemoryLineSizeBytes / memRefType.getElementTypeBitWidth()) / @@ -177,8 +175,8 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp, if (rowsPerLine >= threadGroupSize) return failure(); - // Get sets of operations within the function that read/write to shared - // memory. + /// Get sets of operations within the function that read/write to shared + /// memory. SmallVector shmReadOps; SmallVector shmWriteOps; if (failed(getShmReadAndWriteOps(parentOp, memrefValue, shmReadOps, @@ -193,7 +191,7 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp, int64_t tgtDim = memRefType.getRank() - 1; int64_t srcDim = memRefType.getRank() - 2; - // Transform indices for the ops writing to shared memory. + /// Transform indices for the ops writing to shared memory. while (!shmWriteOps.empty()) { Operation *shmWriteOp = shmWriteOps.pop_back_val(); builder.setInsertionPoint(shmWriteOp); @@ -205,7 +203,7 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp, amdgpu::setIndices(shmWriteOp, transformedIndices); } - // Transform indices for the ops reading from shared memory. + /// Transform indices for the ops reading from shared memory. while (!shmReadOps.empty()) { Operation *shmReadOp = shmReadOps.pop_back_val(); builder.setInsertionPoint(shmReadOp); @@ -220,6 +218,20 @@ mlir::amdgpu::optimizeSharedMemoryReadsAndWrites(Operation *parentOp, return success(); } +void amdgpu::optimizeSharedMemoryReadsAndWritesOp(func::FuncOp funcOp) { + SmallVector shmAllocOps; + funcOp.walk([&](memref::AllocOp allocOp) { + if (!amdgpu::AMDGPUDialect::hasSharedMemoryAddressSpace(allocOp.getType())) + return; + shmAllocOps.push_back(allocOp); + }); + for (auto allocOp : shmAllocOps) { + if (failed(amdgpu::optimizeSharedMemoryReadsAndWrites(funcOp, + allocOp.getMemref()))) + return; + } +} + struct OptimizeSharedMemoryPass : public amdgpu::impl::OptimizeSharedMemoryBase { public: diff --git a/mlir/test/Dialect/AMDGPU/transform_optimize_shmem_reads_writes.mlir b/mlir/test/Dialect/AMDGPU/transform_optimize_shmem_reads_writes.mlir new file mode 100644 index 00000000000000..dfdd1b17e244e3 --- /dev/null +++ b/mlir/test/Dialect/AMDGPU/transform_optimize_shmem_reads_writes.mlir @@ -0,0 +1,64 @@ +// RUN: mlir-opt %s -transform-interpreter | FileCheck %s + + // CHECK: @optimize_shmem([[arg0:%.+]]: memref<{{.*}}>, [[readRow:%.+]]: index, [[readCol:%.+]]: index, [[writeRow:%.+]]: index, [[writeCol:%.+]]: index, [[fragRow:%.+]]: index, [[fragCol:%.+]]: index, [[fragColPerm:%.+]]: index, [[stRow:%.+]]: index, [[stCol:%.+]]: index) + func.func @optimize_shmem(%arg0: memref<4096x4096xf16>, + %readRow: index, %readCol: index, + %writeRow: index, %writeCol: index, + %fragRow: index, %fragCol: index, + %fragColPerm: index, + %stRow: index, %stCol: index) { + // CHECK: %[[cst:.+]] = arith.constant 0.000000e+00 : f16 + %cst = arith.constant 0.000000e+00 : f16 + + // CHECK: [[shmA:%.+]] = memref.alloc + // CHECK: [[shmB:%.+]] = memref.alloc + %shmA = memref.alloc() {alignment = 64 : i64} : memref<128x32xf16, 3> + %shmB = memref.alloc() {alignment = 64 : i64} : memref<256x32xf16, 3> + + // CHECK: %[[D0:.+]] = vector.transfer_read [[arg0:%.+]][[[readRow:%.+]], [[readCol:%.+]]], [[cst:.+]] {in_bounds = [true, true]} : memref<4096x4096xf16>, vector<1x8xf16> + %0 = vector.transfer_read %arg0[%readRow, %readCol], %cst {in_bounds = [true, true]} : memref<4096x4096xf16>, vector<1x8xf16> + // CHECK: [[c7:%.+]] = arith.constant 7 : index + // CHECK: [[srcBits:%.+]] = arith.andi [[stRow:%.+]], [[c7]] + // CHECK: [[c2:%.+]] = arith.constant 2 : index + // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] + // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol:%.+]], [[xorBits]] + // CHECK: vector.transfer_write %[[D0:.+]], [[shmB]][[[writeRow:%.+]], [[writeCol:%.+]]] {in_bounds = [true, true]} : vector<1x8xf16>, memref<256x32xf16, 3> + vector.transfer_write %0, %shmB[%writeRow, %writeCol] {in_bounds = [true, true]} : vector<1x8xf16>, memref<256x32xf16, 3> + gpu.barrier + gpu.barrier + // CHECK: [[c7:%.+]] = arith.constant 7 : index + // CHECK: [[srcBits:%.+]] = arith.andi [[fragRow]], [[c7]] + // CHECK: [[c2:%.+]] = arith.constant 2 : index + // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] + // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol:%.+]], [[xorBits]] + // CHECK: vector.load [[shmB:%.+]][[[fragRow:%.+]], [[fragColPerm]]] : memref<256x32xf16, 3>, vector<8xf16> + %1 = vector.load %shmB[%fragRow, %fragColPerm] : memref<256x32xf16, 3>, vector<8xf16> + + // CHECK: %[[D2:.+]] = vector.transfer_read [[arg0:%.+]][[[readRow:%.+]], [[readCol:%.+]]], [[cst:.+]] {in_bounds = [true, true]} : memref<4096x4096xf16>, vector<1x8xf16> + %2 = vector.transfer_read %arg0[%readRow, %readCol], %cst {in_bounds = [true, true]} : memref<4096x4096xf16>, vector<1x8xf16> + // CHECK: [[c7:%.+]] = arith.constant 7 : index + // CHECK: [[srcBits:%.+]] = arith.andi [[stRow:%.+]], [[c7]] + // CHECK: [[c2:%.+]] = arith.constant 2 : index + // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] + // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol:%.+]], [[xorBits]] + // CHECK: vector.transfer_write %[[D2:.+]], [[shmA:%.+]][[[writeRow:%.+]], [[writeCol:%.+]]] {in_bounds = [true, true]} : vector<1x8xf16>, memref<128x32xf16, 3> + vector.transfer_write %2, %shmA[%writeRow, %writeCol] {in_bounds = [true, true]} : vector<1x8xf16>, memref<128x32xf16, 3> + gpu.barrier + gpu.barrier + // CHECK: [[c7:%.+]] = arith.constant 7 : index + // CHECK: [[srcBits:%.+]] = arith.andi [[fragRow]], [[c7]] + // CHECK: [[c2:%.+]] = arith.constant 2 : index + // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]] + // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol:%.+]], [[xorBits]] + // CHECK: vector.load [[shmA:%.+]][[[fragRow:%.+]], [[fragColPerm]]] : memref<128x32xf16, 3>, vector<8xf16> + %3 = vector.load %shmA[%fragRow, %fragColPerm] : memref<128x32xf16, 3>, vector<8xf16> + return + } + +module attributes { transform.with_named_sequence } { + transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.any_op + transform.amdgpu.optimize_shared_memory_reads_and_writes %0 : (!transform.any_op) -> () + transform.yield + } // @__transform_main +} // module diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 821481ee272a56..2c534c7614b10d 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1511,6 +1511,70 @@ cc_library( ], ) +cc_library( + name = "AMDGPUTransformOps", + srcs = glob([ + "lib/Dialect/AMDGPU/TransformOps/*.cpp", + ]), + hdrs = glob([ + "include/mlir/Dialect/AMDGPU/TransformOps/*.h", + ]), + includes = ["include"], + deps = [ + ":AMDGPUDialect", + ":AMDGPUTransformOpsIncGen", + ":AMDGPUTransforms", + ":AffineDialect", + ":Analysis", + ":ArithDialect", + ":ArithUtils", + ":DialectUtils", + ":GPUCommonTransforms", + ":GPUCompilationAttrInterfacesIncGen", + ":GPUDialect", + ":IR", + ":LLVMCommonConversion", + ":LinalgDialect", + ":MemRefDialect", + ":SCFDialect", + ":SCFTransforms", + ":Support", + ":TransformDialect", + ":VectorDialect", + "//llvm:Support", + ], +) + +td_library( + name = "AMDGPUTransformOpsTdFiles", + srcs = glob([ + "include/mlir/Dialect/AMDGPU/TransformOps/*.td", + ]), + includes = ["include"], + deps = [ + ":TransformDialectTdFiles", + ], +) + +gentbl_cc_library( + name = "AMDGPUTransformOpsIncGen", + tbl_outs = [ + ( + ["-gen-op-decls"], + "include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.h.inc", + ), + ( + ["-gen-op-defs"], + "include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/AMDGPU/TransformOps/AMDGPUTransformOps.td", + deps = [ + ":AMDGPUTransformOpsTdFiles", + ], +) + gentbl_cc_library( name = "AMDGPUPassIncGen", tbl_outs = [ @@ -4614,6 +4678,7 @@ cc_library( name = "AllExtensions", hdrs = ["include/mlir/InitAllExtensions.h"], deps = [ + ":AMDGPUTransformOps", ":AffineTransformOps", ":ArithToLLVM", ":BufferizationTransformOps", @@ -8961,6 +9026,7 @@ cc_library( deps = [ ":AMDGPUDialect", ":AMDGPUToROCDL", + ":AMDGPUTransformOps", ":AMDGPUTransforms", ":AMXDialect", ":AMXTransforms", From 22d2f3aa3097feb9a91c6d7b8ef611a1cde6d0d5 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Tue, 13 Feb 2024 18:06:05 -0800 Subject: [PATCH 085/240] Move the parsed_cmd conversion def's to module level functions. Python3.9 does not allow you to put a reference to a class staticmethod in a table and call it from there. Python3.10 and following do allow this, but we still support 3.9. staticmethod was slightly cleaner, but this will do. --- lldb/examples/python/templates/parsed_cmd.py | 123 +++++++++--------- .../script/add/TestAddParsedCommand.py | 3 - 2 files changed, 60 insertions(+), 66 deletions(-) diff --git a/lldb/examples/python/templates/parsed_cmd.py b/lldb/examples/python/templates/parsed_cmd.py index 61ea57c275aae4..06124adf43420a 100644 --- a/lldb/examples/python/templates/parsed_cmd.py +++ b/lldb/examples/python/templates/parsed_cmd.py @@ -52,6 +52,65 @@ def __call__(self, debugger, args_list, exe_ctx, result): import sys from abc import abstractmethod +# Some methods to translate common value types. Should return a +# tuple of the value and an error value (True => error) if the +# type can't be converted. These are called internally when the +# command line is parsed into the 'dest' properties, you should +# not need to call them directly. +# FIXME: Need a way to push the conversion error string back to lldb. +def to_bool(in_value): + error = True + value = False + if type(in_value) != str or len(in_value) == 0: + return (value, error) + + low_in = in_value.lower() + if low_in in ["y", "yes", "t", "true", "1"]: + value = True + error = False + + if not value and low_in in ["n", "no", "f", "false", "0"]: + value = False + error = False + + return (value, error) + +def to_int(in_value): + #FIXME: Not doing errors yet... + return (int(in_value), False) + +def to_unsigned(in_value): + # FIXME: find an unsigned converter... + # And handle errors. + return (int(in_value), False) + +translators = { + lldb.eArgTypeBoolean : to_bool, + lldb.eArgTypeBreakpointID : to_unsigned, + lldb.eArgTypeByteSize : to_unsigned, + lldb.eArgTypeCount : to_unsigned, + lldb.eArgTypeFrameIndex : to_unsigned, + lldb.eArgTypeIndex : to_unsigned, + lldb.eArgTypeLineNum : to_unsigned, + lldb.eArgTypeNumLines : to_unsigned, + lldb.eArgTypeNumberPerLine : to_unsigned, + lldb.eArgTypeOffset : to_int, + lldb.eArgTypeThreadIndex : to_unsigned, + lldb.eArgTypeUnsignedInteger : to_unsigned, + lldb.eArgTypeWatchpointID : to_unsigned, + lldb.eArgTypeColumnNum : to_unsigned, + lldb.eArgTypeRecognizerID : to_unsigned, + lldb.eArgTypeTargetID : to_unsigned, + lldb.eArgTypeStopHookID : to_unsigned +} + +def translate_value(value_type, value): + try: + return translators[value_type](value) + except KeyError: + # If we don't have a translator, return the string value. + return (value, False) + class LLDBOptionValueParser: """ This class holds the option definitions for the command, and when @@ -63,68 +122,6 @@ def __init__(self): self.options_dict = {} self.args_array = [] - # Some methods to translate common value types. Should return a - # tuple of the value and an error value (True => error) if the - # type can't be converted. These are called internally when the - # command line is parsed into the 'dest' properties, you should - # not need to call them directly. - # FIXME: Need a way to push the conversion error string back to lldb. - @staticmethod - def to_bool(in_value): - error = True - value = False - if type(in_value) != str or len(in_value) == 0: - return (value, error) - - low_in = in_value.lower() - if low_in in ["y", "yes", "t", "true", "1"]: - value = True - error = False - - if not value and low_in in ["n", "no", "f", "false", "0"]: - value = False - error = False - - return (value, error) - - @staticmethod - def to_int(in_value): - #FIXME: Not doing errors yet... - return (int(in_value), False) - - @staticmethod - def to_unsigned(in_value): - # FIXME: find an unsigned converter... - # And handle errors. - return (int(in_value), False) - - translators = { - lldb.eArgTypeBoolean : to_bool, - lldb.eArgTypeBreakpointID : to_unsigned, - lldb.eArgTypeByteSize : to_unsigned, - lldb.eArgTypeCount : to_unsigned, - lldb.eArgTypeFrameIndex : to_unsigned, - lldb.eArgTypeIndex : to_unsigned, - lldb.eArgTypeLineNum : to_unsigned, - lldb.eArgTypeNumLines : to_unsigned, - lldb.eArgTypeNumberPerLine : to_unsigned, - lldb.eArgTypeOffset : to_int, - lldb.eArgTypeThreadIndex : to_unsigned, - lldb.eArgTypeUnsignedInteger : to_unsigned, - lldb.eArgTypeWatchpointID : to_unsigned, - lldb.eArgTypeColumnNum : to_unsigned, - lldb.eArgTypeRecognizerID : to_unsigned, - lldb.eArgTypeTargetID : to_unsigned, - lldb.eArgTypeStopHookID : to_unsigned - } - - @classmethod - def translate_value(cls, value_type, value): - try: - return cls.translators[value_type](value) - except KeyError: - # If we don't have a translator, return the string value. - return (value, False) # FIXME: would this be better done on the C++ side? # The common completers are missing some useful ones. @@ -219,7 +216,7 @@ def set_option_value(self, exe_ctx, opt_name, opt_value): if "enum_values" in elem: (value, error) = self.set_enum_value(elem["enum_values"], opt_value) else: - (value, error) = __class__.translate_value(elem["value_type"], opt_value) + (value, error) = translate_value(elem["value_type"], opt_value) if error: return False diff --git a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py index 6cbe888af99dc6..7dba9c6937f211 100644 --- a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py +++ b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py @@ -13,9 +13,6 @@ class ParsedCommandTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True - # This crashes on the x86_64 Debian bot, but the failure is not helpful. - # Disable the test while I try to find a way to reproduce. - @skipIf(py_version=("<=", (3, 9))) def test(self): self.pycmd_tests() From edff3ff4d37a9e051e33146393b064ce987af252 Mon Sep 17 00:00:00 2001 From: Michael Spencer Date: Tue, 13 Feb 2024 18:35:39 -0800 Subject: [PATCH 086/240] [llvm][Support] Add ExponentialBackoff helper (#81206) This provides a simple way to implement exponential backoff using a do while loop. Usage example (also see the change to LockFileManager.cpp): ``` ExponentialBackoff Backoff(10s); do { if (tryToDoSomething()) return ItWorked; } while (Backoff.waitForNextAttempt()); return Timeout; ``` Abstracting this out of `LockFileManager` as the module build daemon will need it. --- .../include/llvm/Support/ExponentialBackoff.h | 65 +++++++++++++++++++ llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/ExponentialBackoff.cpp | 29 +++++++++ llvm/lib/Support/LockFileManager.cpp | 38 ++--------- llvm/unittests/Support/CMakeLists.txt | 1 + .../Support/ExponentialBackoffTest.cpp | 31 +++++++++ 6 files changed, 134 insertions(+), 31 deletions(-) create mode 100644 llvm/include/llvm/Support/ExponentialBackoff.h create mode 100644 llvm/lib/Support/ExponentialBackoff.cpp create mode 100644 llvm/unittests/Support/ExponentialBackoffTest.cpp diff --git a/llvm/include/llvm/Support/ExponentialBackoff.h b/llvm/include/llvm/Support/ExponentialBackoff.h new file mode 100644 index 00000000000000..8208a748eac2a6 --- /dev/null +++ b/llvm/include/llvm/Support/ExponentialBackoff.h @@ -0,0 +1,65 @@ +//===- llvm/Support/ExponentialBackoff.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a helper class for implementing exponential backoff. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_EXPONENTIALBACKOFF_H +#define LLVM_EXPONENTIALBACKOFF_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { + +/// A class to help implement exponential backoff. +/// +/// Example usage: +/// \code +/// ExponentialBackoff Backoff(10s); +/// do { +/// if (tryToDoSomething()) +/// return ItWorked; +/// } while (Backoff.waitForNextAttempt()); +/// return Timeout; +/// \endcode +class ExponentialBackoff { +public: + using duration = std::chrono::steady_clock::duration; + using time_point = std::chrono::steady_clock::time_point; + + /// \param Timeout the maximum wall time this should run for starting when + /// this object is constructed. + /// \param MinWait the minimum amount of time `waitForNextAttempt` will sleep + /// for. + /// \param MaxWait the maximum amount of time `waitForNextAttempt` will sleep + /// for. + ExponentialBackoff(duration Timeout, + duration MinWait = std::chrono::milliseconds(10), + duration MaxWait = std::chrono::milliseconds(500)) + : MinWait(MinWait), MaxWait(MaxWait), + EndTime(std::chrono::steady_clock::now() + Timeout) {} + + /// Blocks while waiting for the next attempt. + /// \returns true if you should try again, false if the timeout has been + /// reached. + bool waitForNextAttempt(); + +private: + duration MinWait; + duration MaxWait; + time_point EndTime; + std::random_device RandDev; + int64_t CurrentMultiplier = 1; +}; + +} // end namespace llvm + +#endif // LLVM_EXPONENTIALBACKOFF_H diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index e19223fdef4f17..1f2d82427552f7 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -176,6 +176,7 @@ add_llvm_component_library(LLVMSupport ELFAttributes.cpp Error.cpp ErrorHandling.cpp + ExponentialBackoff.cpp ExtensibleRTTI.cpp FileCollector.cpp FileUtilities.cpp diff --git a/llvm/lib/Support/ExponentialBackoff.cpp b/llvm/lib/Support/ExponentialBackoff.cpp new file mode 100644 index 00000000000000..7e68cf67ad3857 --- /dev/null +++ b/llvm/lib/Support/ExponentialBackoff.cpp @@ -0,0 +1,29 @@ +//===- llvm/Support/ExponentialBackoff.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ExponentialBackoff.h" +#include + +using namespace llvm; + +bool ExponentialBackoff::waitForNextAttempt() { + auto Now = std::chrono::steady_clock::now(); + if (Now >= EndTime) + return false; + + duration CurMaxWait = std::min(MinWait * CurrentMultiplier, MaxWait); + std::uniform_int_distribution Dist(MinWait.count(), + CurMaxWait.count()); + // Use random_device directly instead of a PRNG as uniform_int_distribution + // often only takes a few samples anyway. + duration WaitDuration = std::min(duration(Dist(RandDev)), EndTime - Now); + if (CurMaxWait < MaxWait) + CurrentMultiplier *= 2; + std::this_thread::sleep_for(WaitDuration); + return true; +} diff --git a/llvm/lib/Support/LockFileManager.cpp b/llvm/lib/Support/LockFileManager.cpp index a2b0fe8ca8f2ef..34c7a16b24be41 100644 --- a/llvm/lib/Support/LockFileManager.cpp +++ b/llvm/lib/Support/LockFileManager.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/ExponentialBackoff.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" @@ -20,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -295,29 +295,15 @@ LockFileManager::waitForUnlock(const unsigned MaxSeconds) { return Res_Success; // Since we don't yet have an event-based method to wait for the lock file, - // implement randomized exponential backoff, similar to Ethernet collision + // use randomized exponential backoff, similar to Ethernet collision // algorithm. This improves performance on machines with high core counts // when the file lock is heavily contended by multiple clang processes - const unsigned long MinWaitDurationMS = 10; - const unsigned long MaxWaitMultiplier = 50; // 500ms max wait - unsigned long WaitMultiplier = 1; - unsigned long ElapsedTimeSeconds = 0; + using namespace std::chrono_literals; + ExponentialBackoff Backoff(std::chrono::seconds(MaxSeconds), 10ms, 500ms); - std::random_device Device; - std::default_random_engine Engine(Device()); - - auto StartTime = std::chrono::steady_clock::now(); - - do { + // Wait first as this is only called when the lock is known to be held. + while (Backoff.waitForNextAttempt()) { // FIXME: implement event-based waiting - - // Sleep for the designated interval, to allow the owning process time to - // finish up and remove the lock file. - std::uniform_int_distribution Distribution(1, - WaitMultiplier); - unsigned long WaitDurationMS = MinWaitDurationMS * Distribution(Engine); - std::this_thread::sleep_for(std::chrono::milliseconds(WaitDurationMS)); - if (sys::fs::access(LockFileName.c_str(), sys::fs::AccessMode::Exist) == errc::no_such_file_or_directory) { // If the original file wasn't created, somone thought the lock was dead. @@ -329,17 +315,7 @@ LockFileManager::waitForUnlock(const unsigned MaxSeconds) { // If the process owning the lock died without cleaning up, just bail out. if (!processStillExecuting((*Owner).first, (*Owner).second)) return Res_OwnerDied; - - WaitMultiplier *= 2; - if (WaitMultiplier > MaxWaitMultiplier) { - WaitMultiplier = MaxWaitMultiplier; - } - - ElapsedTimeSeconds = std::chrono::duration_cast( - std::chrono::steady_clock::now() - StartTime) - .count(); - - } while (ElapsedTimeSeconds < MaxSeconds); + } // Give up. return Res_Timeout; diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index df35a7b7f3626a..15a126279125c5 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -38,6 +38,7 @@ add_llvm_unittest(SupportTests ErrnoTest.cpp ErrorOrTest.cpp ErrorTest.cpp + ExponentialBackoffTest.cpp ExtensibleRTTITest.cpp FileCollectorTest.cpp FileOutputBufferTest.cpp diff --git a/llvm/unittests/Support/ExponentialBackoffTest.cpp b/llvm/unittests/Support/ExponentialBackoffTest.cpp new file mode 100644 index 00000000000000..257604908bfe43 --- /dev/null +++ b/llvm/unittests/Support/ExponentialBackoffTest.cpp @@ -0,0 +1,31 @@ +//===- unittests/ExponentialBackoffTest.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ExponentialBackoff.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace std::chrono_literals; + +namespace { + +TEST(ExponentialBackoffTest, Timeout) { + auto Start = std::chrono::steady_clock::now(); + // Use short enough times that this test runs quickly. + ExponentialBackoff Backoff(100ms, 1ms, 10ms); + do { + } while (Backoff.waitForNextAttempt()); + auto Duration = std::chrono::steady_clock::now() - Start; + EXPECT_GE(Duration, 100ms); +} + +// Testing individual wait duration is omitted as those tests would be +// non-deterministic. + +} // end anonymous namespace From 14b0d0de1fb3e55d16fac70a69980a003106d0aa Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 14 Feb 2024 02:35:48 +0000 Subject: [PATCH 087/240] [gn build] Port edff3ff4d37a --- llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index 472c3e43033252..6caad81edec53f 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -77,6 +77,7 @@ static_library("Support") { "ELFAttributes.cpp", "Error.cpp", "ErrorHandling.cpp", + "ExponentialBackoff.cpp", "ExtensibleRTTI.cpp", "FileCollector.cpp", "FileOutputBuffer.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index c6d40aeebbbec3..7a152fdcc05982 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -41,6 +41,7 @@ unittest("SupportTests") { "ErrnoTest.cpp", "ErrorOrTest.cpp", "ErrorTest.cpp", + "ExponentialBackoffTest.cpp", "ExtensibleRTTITest.cpp", "FSUniqueIDTest.cpp", "FileCollectorTest.cpp", From 09e98950bfcff7ad376922932efb2b56e4db9898 Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Tue, 13 Feb 2024 18:52:11 -0800 Subject: [PATCH 088/240] [clang][InstallAPI] Introduce basic driver to write out tbd files (#81571) This introduces a basic outline of installapi as a clang driver option. It captures relevant information as cc1 args, which are common arguments already passed to the linker to encode into TBD file outputs. This is effectively an upstream for what already exists as `tapi installapi` in Xcode toolchains, but directly in Clang. This patch does not handle any AST traversing on input yet. InstallAPI is broadly an operation that takes a series of header files that represent a single dynamic library and generates a TBD file out of it which represents all the linkable symbols and necessary attributes for statically linking in clients. It is the linkable object in all Apple SDKs and when building dylibs in Xcode. `clang -installapi` also will support verification where it compares all the information recorded for the TBD files against the already built binary, to catch possible mismatches like when a declaration is missing a definition for an exported symbol. --- .../clang/Basic/DiagnosticDriverKinds.td | 3 + clang/include/clang/Driver/Action.h | 12 ++++ clang/include/clang/Driver/Options.td | 12 +++- clang/include/clang/Driver/Types.def | 1 + .../include/clang/Frontend/CompilerInstance.h | 7 ++ .../clang/Frontend/CompilerInvocation.h | 9 ++- .../include/clang/Frontend/FrontendActions.h | 10 +++ .../include/clang/Frontend/FrontendOptions.h | 3 + .../clang/Frontend/InstallAPIOptions.h | 28 ++++++++ clang/include/clang/InstallAPI/Context.h | 65 +++++++++++++++++++ clang/lib/CMakeLists.txt | 1 + clang/lib/Driver/Action.cpp | 7 ++ clang/lib/Driver/Driver.cpp | 16 ++++- clang/lib/Driver/ToolChain.cpp | 1 + clang/lib/Driver/ToolChains/Clang.cpp | 11 ++++ clang/lib/Frontend/CMakeLists.txt | 3 + clang/lib/Frontend/CompilerInvocation.cpp | 41 +++++++++++- clang/lib/Frontend/InstallAPIConsumer.cpp | 43 ++++++++++++ .../ExecuteCompilerInvocation.cpp | 2 + clang/lib/InstallAPI/CMakeLists.txt | 11 ++++ clang/lib/InstallAPI/Context.cpp | 27 ++++++++ clang/test/CMakeLists.txt | 1 + clang/test/Driver/installapi.h | 13 ++++ clang/test/InstallAPI/installapi-basic.test | 34 ++++++++++ clang/test/lit.cfg.py | 1 + 25 files changed, 357 insertions(+), 5 deletions(-) create mode 100644 clang/include/clang/Frontend/InstallAPIOptions.h create mode 100644 clang/include/clang/InstallAPI/Context.h create mode 100644 clang/lib/Frontend/InstallAPIConsumer.cpp create mode 100644 clang/lib/InstallAPI/CMakeLists.txt create mode 100644 clang/lib/InstallAPI/Context.cpp create mode 100644 clang/test/Driver/installapi.h create mode 100644 clang/test/InstallAPI/installapi-basic.test diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index b13181f6e70894..0807d8877591a6 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -804,4 +804,7 @@ def warn_android_unversioned_fallback : Warning< def err_drv_triple_version_invalid : Error< "version '%0' in target triple '%1' is invalid">; + +def err_drv_installapi_unsupported : Error< + "InstallAPI is not supported for '%0'">; } diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index 04fa8b01b418f8..2768e2f5df1a9e 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -59,6 +59,7 @@ class Action { PreprocessJobClass, PrecompileJobClass, ExtractAPIJobClass, + InstallAPIJobClass, AnalyzeJobClass, MigrateJobClass, CompileJobClass, @@ -448,6 +449,17 @@ class ExtractAPIJobAction : public JobAction { void addHeaderInput(Action *Input) { getInputs().push_back(Input); } }; +class InstallAPIJobAction : public JobAction { + void anchor() override; + +public: + InstallAPIJobAction(Action *Input, types::ID OutputType); + + static bool classof(const Action *A) { + return A->getKind() == InstallAPIJobClass; + } +}; + class AnalyzeJobAction : public JobAction { void anchor() override; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c625d0dd1c0c72..95b464e7d61834 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -309,6 +309,8 @@ class AnalyzerOpts : KeyPathAndMacro<"AnalyzerOpts->", base, "ANALYZER_"> {} class MigratorOpts : KeyPathAndMacro<"MigratorOpts.", base, "MIGRATOR_"> {} +class InstallAPIOpts + : KeyPathAndMacro<"InstallAPIOpts.", base, "INSTALLAPI_"> {} // A boolean option which is opt-in in CC1. The positive option exists in CC1 and // Args.hasArg(OPT_ffoo) can be used to check that the flag is enabled. @@ -1114,7 +1116,8 @@ def config_user_dir_EQ : Joined<["--"], "config-user-dir=">, def coverage : Flag<["-", "--"], "coverage">, Group, Visibility<[ClangOption, CLOption]>; def cpp_precomp : Flag<["-"], "cpp-precomp">, Group; -def current__version : JoinedOrSeparate<["-"], "current_version">; +def current__version : JoinedOrSeparate<["-"], "current_version">, + Visibility<[ClangOption, CC1Option]>; def cxx_isystem : JoinedOrSeparate<["-"], "cxx-isystem">, Group, HelpText<"Add directory to the C++ SYSTEM include search path">, Visibility<[ClangOption, CC1Option]>, @@ -1529,6 +1532,9 @@ def static_libsan : Flag<["-"], "static-libsan">, HelpText<"Statically link the sanitizer runtime (Not supported for ASan, TSan or UBSan on darwin)">; def : Flag<["-"], "shared-libasan">, Alias; def fasm : Flag<["-"], "fasm">, Group; +def installapi : Flag<["-"], "installapi">, + Visibility<[ClangOption, CC1Option]>, Group, + HelpText<"Create a text-based stub file by scanning header files">; defm assume_unique_vtables : BoolFOption<"assume-unique-vtables", CodeGenOpts<"AssumeUniqueVTables">, DefaultTrue, @@ -4291,7 +4297,9 @@ def verify_pch : Flag<["-"], "verify-pch">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Load and verify that a pre-compiled header file is not stale">; def init : Separate<["-"], "init">; -def install__name : Separate<["-"], "install_name">; +def install__name : Separate<["-"], "install_name">, + Visibility<[ClangOption, CC1Option]>, + MarshallingInfoString>; def iprefix : JoinedOrSeparate<["-"], "iprefix">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Set the -iwithprefix/-iwithprefixbefore prefix">, MetaVarName<"">; diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def index f72c27e1ee7019..570a53441d1c74 100644 --- a/clang/include/clang/Driver/Types.def +++ b/clang/include/clang/Driver/Types.def @@ -94,6 +94,7 @@ TYPE("lto-bc", LTO_BC, INVALID, "o", phases TYPE("ast", AST, INVALID, "ast", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("ifs", IFS, INVALID, "ifs", phases::IfsMerge) TYPE("ifs-cpp", IFS_CPP, INVALID, "ifs", phases::Compile, phases::IfsMerge) +TYPE("tbd", TextAPI, INVALID, "tbd", phases::Precompile) TYPE("pcm", ModuleFile, INVALID, "pcm", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("header-unit", HeaderUnit, INVALID, "pcm", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("plist", Plist, INVALID, "plist", phases::Compile, phases::Backend, phases::Assemble, phases::Link) diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index ac2f940769fbe9..6eb7972f86ca5b 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -294,6 +294,13 @@ class CompilerInstance : public ModuleLoader { return Invocation->getFrontendOpts(); } + InstallAPIOptions &getInstallAPIOpts() { + return Invocation->getInstallAPIOpts(); + } + const InstallAPIOptions &getInstallAPIOpts() const { + return Invocation->getInstallAPIOpts(); + } + HeaderSearchOptions &getHeaderSearchOpts() { return Invocation->getHeaderSearchOpts(); } diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index c6528779bde7b2..a01d9695dce203 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -18,11 +18,12 @@ #include "clang/Basic/LangStandard.h" #include "clang/Frontend/DependencyOutputOptions.h" #include "clang/Frontend/FrontendOptions.h" +#include "clang/Frontend/InstallAPIOptions.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include #include @@ -111,6 +112,9 @@ class CompilerInvocationBase { /// Options controlling preprocessed output. std::shared_ptr PreprocessorOutputOpts; + /// Options controlling InstallAPI operations and output. + std::shared_ptr InstallAPIOpts; + /// Dummy tag type whose instance can be passed into the constructor to /// prevent creation of the reference-counted option objects. struct EmptyConstructor {}; @@ -145,6 +149,7 @@ class CompilerInvocationBase { const PreprocessorOutputOptions &getPreprocessorOutputOpts() const { return *PreprocessorOutputOpts; } + const InstallAPIOptions &getInstallAPIOpts() const { return *InstallAPIOpts; } /// @} /// Command line generation. @@ -237,6 +242,7 @@ class CompilerInvocation : public CompilerInvocationBase { using CompilerInvocationBase::getFrontendOpts; using CompilerInvocationBase::getDependencyOutputOpts; using CompilerInvocationBase::getPreprocessorOutputOpts; + using CompilerInvocationBase::getInstallAPIOpts; /// @} /// Mutable getters. @@ -258,6 +264,7 @@ class CompilerInvocation : public CompilerInvocationBase { PreprocessorOutputOptions &getPreprocessorOutputOpts() { return *PreprocessorOutputOpts; } + InstallAPIOptions &getInstallAPIOpts() { return *InstallAPIOpts; } /// @} /// Base class internals. diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h index fcce31ac0590ff..b8229252f5ed22 100644 --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -130,6 +130,16 @@ class GenerateModuleAction : public ASTFrontendAction { bool shouldEraseOutputFiles() override; }; +class InstallAPIAction : public ASTFrontendAction { +protected: + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) override; + +public: + static std::unique_ptr + CreateOutputFile(CompilerInstance &CI, StringRef InFile); +}; + class GenerateInterfaceStubsAction : public ASTFrontendAction { protected: std::unique_ptr CreateASTConsumer(CompilerInstance &CI, diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 53a8681cfdbba0..62d16ba542ea4d 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -100,6 +100,9 @@ enum ActionKind { /// Only execute frontend initialization. InitOnly, + // Create TextAPI stub. + InstallAPI, + /// Dump information about a module file. ModuleFileInfo, diff --git a/clang/include/clang/Frontend/InstallAPIOptions.h b/clang/include/clang/Frontend/InstallAPIOptions.h new file mode 100644 index 00000000000000..cf65a3350c6de6 --- /dev/null +++ b/clang/include/clang/Frontend/InstallAPIOptions.h @@ -0,0 +1,28 @@ +//===--- InstallAPIOptions.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FRONTEND_INSTALLAPIOPTIONS_H +#define LLVM_CLANG_FRONTEND_INSTALLAPIOPTIONS_H + +#include "llvm/TextAPI/PackedVersion.h" + +namespace clang { + +/// InstallAPIOptions - Options for controlling InstallAPI verification and +/// TextAPI output. +class InstallAPIOptions { +public: + /// The install name which is apart of the library's ID. + std::string InstallName; + + /// The current version which is apart of the library's ID. + llvm::MachO::PackedVersion CurrentVersion; +}; +} // namespace clang + +#endif diff --git a/clang/include/clang/InstallAPI/Context.h b/clang/include/clang/InstallAPI/Context.h new file mode 100644 index 00000000000000..a1ff7c12a2f835 --- /dev/null +++ b/clang/include/clang/InstallAPI/Context.h @@ -0,0 +1,65 @@ +//===- InstallAPI/Context.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Top level types for interacting with the generic clang driver and frontend +// for InstallAPI operations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_INSTALLAPI_CONTEXT_H +#define LLVM_CLANG_INSTALLAPI_CONTEXT_H + +#include "clang/AST/ASTConsumer.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/TextAPI/InterfaceFile.h" +#include "llvm/TextAPI/RecordVisitor.h" +#include "llvm/TextAPI/RecordsSlice.h" + +namespace clang { +namespace installapi { + +/// Struct used for generating validating InstallAPI. +/// The attributes captured represent all necessary information +/// to generate TextAPI output. +struct InstallAPIContext { + + /// Library attributes that are typically passed as linker inputs. + llvm::MachO::RecordsSlice::BinaryAttrs BA; + + /// Active target triple to parse. + llvm::Triple TargetTriple{}; + + /// Output stream to write TextAPI file to. + std::unique_ptr OS = nullptr; + + /// DiagnosticsEngine to report errors. + llvm::IntrusiveRefCntPtr Diags = nullptr; + + /// File Path of output location. + StringRef OutputLoc{}; + + /// What encoding to write output as. + llvm::MachO::FileType FT = llvm::MachO::FileType::TBD_V5; +}; + +class InstallAPIConsumer : public ASTConsumer { +public: + InstallAPIConsumer(InstallAPIContext InstallAPICtx) + : Ctx(std::move(InstallAPICtx)) {} + + void HandleTranslationUnit(ASTContext &ASTContext) override; + +private: + InstallAPIContext Ctx; +}; + +} // namespace installapi +} // namespace clang + +#endif // LLVM_CLANG_INSTALLAPI_CONTEXT_H diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt index 1526d65795f8ad..0cac86451f39e4 100644 --- a/clang/lib/CMakeLists.txt +++ b/clang/lib/CMakeLists.txt @@ -23,6 +23,7 @@ add_subdirectory(Tooling) add_subdirectory(DirectoryWatcher) add_subdirectory(Index) add_subdirectory(IndexSerialization) +add_subdirectory(InstallAPI) add_subdirectory(StaticAnalyzer) add_subdirectory(Format) if(CLANG_INCLUDE_TESTS) diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp index 849bf6035ebd2e..7b1a1bb0228c41 100644 --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -32,6 +32,8 @@ const char *Action::getClassName(ActionClass AC) { case CompileJobClass: return "compiler"; case BackendJobClass: return "backend"; case AssembleJobClass: return "assembler"; + case InstallAPIJobClass: + return "installapi"; case IfsMergeJobClass: return "interface-stub-merger"; case LinkJobClass: return "linker"; case LipoJobClass: return "lipo"; @@ -362,6 +364,11 @@ void ExtractAPIJobAction::anchor() {} ExtractAPIJobAction::ExtractAPIJobAction(Action *Inputs, types::ID OutputType) : JobAction(ExtractAPIJobClass, Inputs, OutputType) {} +void InstallAPIJobAction::anchor() {} + +InstallAPIJobAction::InstallAPIJobAction(Action *Inputs, types::ID OutputType) + : JobAction(InstallAPIJobClass, Inputs, OutputType) {} + void AnalyzeJobAction::anchor() {} AnalyzeJobAction::AnalyzeJobAction(Action *Input, types::ID OutputType) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 00e14071a4afec..cf84ef21dfa8ce 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4189,6 +4189,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, break; } + if (auto *IAA = dyn_cast(Current)) { + Current = nullptr; + break; + } + // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? @@ -4319,6 +4324,13 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, if (!MergerInputs.empty()) Actions.push_back( C.MakeAction(MergerInputs, types::TY_Image)); + } else if (Args.hasArg(options::OPT_installapi)) { + // TODO: Lift restriction once operation can handle multiple inputs. + assert(Inputs.size() == 1 && "InstallAPI action can only handle 1 input"); + const auto [InputType, InputArg] = Inputs.front(); + Action *Current = C.MakeAction(*InputArg, InputType); + Actions.push_back( + C.MakeAction(Current, types::TY_TextAPI)); } for (auto Opt : {options::OPT_print_supported_cpus, @@ -4762,6 +4774,8 @@ Action *Driver::ConstructPhaseAction( return C.MakeAction(Input, types::TY_Nothing); if (Args.hasArg(options::OPT_extract_api)) return C.MakeAction(Input, types::TY_API_INFO); + if (Args.hasArg(options::OPT_installapi)) + return C.MakeAction(Input, types::TY_TextAPI); return C.MakeAction(Input, types::TY_LLVM_BC); } case phases::Backend: { @@ -6441,7 +6455,7 @@ bool Driver::ShouldUseClangCompiler(const JobAction &JA) const { // And say "no" if this is not a kind of action clang understands. if (!isa(JA) && !isa(JA) && !isa(JA) && !isa(JA) && - !isa(JA)) + !isa(JA) && !isa(JA)) return false; return true; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 388030592b4836..657577cea6c7d8 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -532,6 +532,7 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const { case Action::PrecompileJobClass: case Action::PreprocessJobClass: case Action::ExtractAPIJobClass: + case Action::InstallAPIJobClass: case Action::AnalyzeJobClass: case Action::MigrateJobClass: case Action::VerifyPCHJobClass: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 4459d86e77d5d9..47305f798c5fee 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4939,6 +4939,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *ExtractAPIIgnoresFileArg = Args.getLastArg(options::OPT_extract_api_ignores_EQ)) ExtractAPIIgnoresFileArg->render(Args, CmdArgs); + } else if (isa(JA)) { + if (!Triple.isOSDarwin()) + D.Diag(diag::err_drv_installapi_unsupported) << Triple.str(); + + CmdArgs.push_back("-installapi"); + // Add necessary library arguments for InstallAPI. + if (const Arg *A = Args.getLastArg(options::OPT_install__name)) + A->render(Args, CmdArgs); + if (const Arg *A = Args.getLastArg(options::OPT_current__version)) + A->render(Args, CmdArgs); + } else { assert((isa(JA) || isa(JA)) && "Invalid action for clang tool."); diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt index a9166672088459..f443d88b5d30cb 100644 --- a/clang/lib/Frontend/CMakeLists.txt +++ b/clang/lib/Frontend/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS ProfileData Support TargetParser + TextAPI ) add_clang_library(clangFrontend @@ -27,6 +28,7 @@ add_clang_library(clangFrontend HeaderIncludeGen.cpp InitPreprocessor.cpp LayoutOverrideSource.cpp + InstallAPIConsumer.cpp LogDiagnosticPrinter.cpp ModuleDependencyCollector.cpp MultiplexConsumer.cpp @@ -53,6 +55,7 @@ add_clang_library(clangFrontend clangBasic clangDriver clangEdit + clangInstallAPI clangLex clangParse clangSema diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 8d7b75b56d6129..bcb31243056b7e 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -149,7 +149,8 @@ CompilerInvocationBase::CompilerInvocationBase() FSOpts(std::make_shared()), FrontendOpts(std::make_shared()), DependencyOutputOpts(std::make_shared()), - PreprocessorOutputOpts(std::make_shared()) {} + PreprocessorOutputOpts(std::make_shared()), + InstallAPIOpts(std::make_shared()) {} CompilerInvocationBase & CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) { @@ -167,6 +168,7 @@ CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) { FrontendOpts = make_shared_copy(X.getFrontendOpts()); DependencyOutputOpts = make_shared_copy(X.getDependencyOutputOpts()); PreprocessorOutputOpts = make_shared_copy(X.getPreprocessorOutputOpts()); + InstallAPIOpts = make_shared_copy(X.getInstallAPIOpts()); } return *this; } @@ -187,6 +189,7 @@ CompilerInvocationBase::shallow_copy_assign(const CompilerInvocationBase &X) { FrontendOpts = X.FrontendOpts; DependencyOutputOpts = X.DependencyOutputOpts; PreprocessorOutputOpts = X.PreprocessorOutputOpts; + InstallAPIOpts = X.InstallAPIOpts; } return *this; } @@ -2158,6 +2161,34 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, return Diags.getNumErrors() == NumErrorsBefore; } +static bool ParseInstallAPIArgs(InstallAPIOptions &Opts, ArgList &Args, + DiagnosticsEngine &Diags, + frontend::ActionKind Action) { + unsigned NumErrorsBefore = Diags.getNumErrors(); + + InstallAPIOptions &InstallAPIOpts = Opts; +#define INSTALLAPI_OPTION_WITH_MARSHALLING(...) \ + PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) +#include "clang/Driver/Options.inc" +#undef INSTALLAPI_OPTION_WITH_MARSHALLING + if (Arg *A = Args.getLastArg(options::OPT_current__version)) + Opts.CurrentVersion.parse64(A->getValue()); + + return Diags.getNumErrors() == NumErrorsBefore; +} + +static void GenerateInstallAPIArgs(const InstallAPIOptions &Opts, + ArgumentConsumer Consumer) { + const InstallAPIOptions &InstallAPIOpts = Opts; +#define INSTALLAPI_OPTION_WITH_MARSHALLING(...) \ + GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) +#include "clang/Driver/Options.inc" +#undef INSTALLAPI_OPTION_WITH_MARSHALLING + if (!Opts.CurrentVersion.empty()) + GenerateArg(Consumer, OPT_current__version, + std::string(Opts.CurrentVersion)); +} + static void GenerateDependencyOutputArgs(const DependencyOutputOptions &Opts, ArgumentConsumer Consumer) { const DependencyOutputOptions &DependencyOutputOpts = Opts; @@ -2557,6 +2588,7 @@ static const auto &getFrontendActionTable() { {frontend::GeneratePCH, OPT_emit_pch}, {frontend::GenerateInterfaceStubs, OPT_emit_interface_stubs}, {frontend::InitOnly, OPT_init_only}, + {frontend::InstallAPI, OPT_installapi}, {frontend::ParseSyntaxOnly, OPT_fsyntax_only}, {frontend::ModuleFileInfo, OPT_module_file_info}, {frontend::VerifyPCH, OPT_verify_pch}, @@ -4280,6 +4312,7 @@ static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) { case frontend::GenerateHeaderUnit: case frontend::GeneratePCH: case frontend::GenerateInterfaceStubs: + case frontend::InstallAPI: case frontend::ParseSyntaxOnly: case frontend::ModuleFileInfo: case frontend::VerifyPCH: @@ -4654,6 +4687,11 @@ bool CompilerInvocation::CreateFromArgsImpl( Res.getDependencyOutputOpts().Targets.empty()) Diags.Report(diag::err_fe_dependency_file_requires_MT); + if (Args.hasArg(OPT_installapi)) { + ParseInstallAPIArgs(Res.getInstallAPIOpts(), Args, Diags, + Res.getFrontendOpts().ProgramAction); + } + // If sanitizer is enabled, disable OPT_ffine_grained_bitfield_accesses. if (Res.getCodeGenOpts().FineGrainedBitfieldAccesses && !Res.getLangOpts().Sanitize.empty()) { @@ -4844,6 +4882,7 @@ void CompilerInvocationBase::generateCC1CommandLine( GeneratePreprocessorOutputArgs(getPreprocessorOutputOpts(), Consumer, getFrontendOpts().ProgramAction); GenerateDependencyOutputArgs(getDependencyOutputOpts(), Consumer); + GenerateInstallAPIArgs(getInstallAPIOpts(), Consumer); } std::vector CompilerInvocationBase::getCC1CommandLine() const { diff --git a/clang/lib/Frontend/InstallAPIConsumer.cpp b/clang/lib/Frontend/InstallAPIConsumer.cpp new file mode 100644 index 00000000000000..c0f22c1a589f38 --- /dev/null +++ b/clang/lib/Frontend/InstallAPIConsumer.cpp @@ -0,0 +1,43 @@ +//===--- InstallAPIConsumer.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/InstallAPI/Context.h" + +using namespace clang; +using namespace clang::installapi; + +std::unique_ptr +InstallAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { + const InstallAPIOptions &Opts = CI.getInstallAPIOpts(); + InstallAPIContext Ctx; + Ctx.BA.InstallName = Opts.InstallName; + Ctx.BA.AppExtensionSafe = CI.getLangOpts().AppExt; + Ctx.BA.CurrentVersion = Opts.CurrentVersion; + // InstallAPI requires two level namespacing. + Ctx.BA.TwoLevelNamespace = true; + Ctx.TargetTriple = CI.getTarget().getTriple(); + + Ctx.Diags = &CI.getDiagnostics(); + Ctx.OutputLoc = CI.getFrontendOpts().OutputFile; + Ctx.OS = CreateOutputFile(CI, InFile); + if (!Ctx.OS) + return nullptr; + return std::make_unique(std::move(Ctx)); +} + +std::unique_ptr +InstallAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) { + std::unique_ptr OS = + CI.createDefaultOutputFile(/*Binary=*/false, InFile, /*Extension=*/"tbd", + /*RemoveFileOnSignal=*/false); + if (!OS) + return nullptr; + return OS; +} diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 925879a68cbd09..a47c474e520a01 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -71,6 +71,8 @@ CreateFrontendBaseAction(CompilerInstance &CI) { case GenerateInterfaceStubs: return std::make_unique(); case InitOnly: return std::make_unique(); + case InstallAPI: + return std::make_unique(); case ParseSyntaxOnly: return std::make_unique(); case ModuleFileInfo: return std::make_unique(); case VerifyPCH: return std::make_unique(); diff --git a/clang/lib/InstallAPI/CMakeLists.txt b/clang/lib/InstallAPI/CMakeLists.txt new file mode 100644 index 00000000000000..b68d8fbbec1d37 --- /dev/null +++ b/clang/lib/InstallAPI/CMakeLists.txt @@ -0,0 +1,11 @@ +set(LLVM_LINK_COMPONENTS + Support + TextAPI + ) + +add_clang_library(clangInstallAPI + Context.cpp + + LINK_LIBS + clangAST + ) diff --git a/clang/lib/InstallAPI/Context.cpp b/clang/lib/InstallAPI/Context.cpp new file mode 100644 index 00000000000000..d4df52f66560c1 --- /dev/null +++ b/clang/lib/InstallAPI/Context.cpp @@ -0,0 +1,27 @@ +//===--- InstallAPI/Context.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/InstallAPI/Context.h" +#include "clang/AST/ASTContext.h" +#include "llvm/TextAPI/TextAPIWriter.h" + +using namespace clang; +using namespace clang::installapi; +using namespace llvm::MachO; + +void InstallAPIConsumer::HandleTranslationUnit(ASTContext &Context) { + if (Context.getDiagnostics().hasErrorOccurred()) + return; + InterfaceFile IF; + // Set library attributes captured through cc1 args. + Target T(Ctx.TargetTriple); + IF.addTarget(T); + IF.setFromBinaryAttrs(Ctx.BA, T); + if (auto Err = TextAPIWriter::writeToStream(*Ctx.OS, IF, Ctx.FT)) + Ctx.Diags->Report(diag::err_cannot_open_file) << Ctx.OutputLoc; +} diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index f17ded42a019c2..841317cef880a2 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -131,6 +131,7 @@ if( NOT CLANG_BUILT_STANDALONE ) llvm-rc llvm-readelf llvm-readobj + llvm-readtapi llvm-strip llvm-symbolizer llvm-windres diff --git a/clang/test/Driver/installapi.h b/clang/test/Driver/installapi.h new file mode 100644 index 00000000000000..99379b44d1379c --- /dev/null +++ b/clang/test/Driver/installapi.h @@ -0,0 +1,13 @@ +// Check non-darwin triple is rejected. +// RUN: not %clang -target x86_64-unknown-unknown -installapi %s 2> %t +// RUN: FileCheck --check-prefix INVALID_INSTALLAPI -input-file %t %s + +// INVALID_INSTALLAPI: error: InstallAPI is not supported for 'x86_64-unknown-unknown' + +// Check installapi phases. +// RUN: %clang -target x86_64-apple-macos11 -ccc-print-phases -installapi %s 2> %t +// RUN: FileCheck --check-prefix INSTALLAPI_PHASES -input-file %t %s + +// INSTALLAPI_PHASES: 0: input, +// INSTALLAPI_PHASES: installapi, +// INSTALLAPI_PHASES-SAME: tbd diff --git a/clang/test/InstallAPI/installapi-basic.test b/clang/test/InstallAPI/installapi-basic.test new file mode 100644 index 00000000000000..8035166d076dab --- /dev/null +++ b/clang/test/InstallAPI/installapi-basic.test @@ -0,0 +1,34 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -x objective-c -triple arm64-apple-ios13.0.0 -installapi \ +// RUN: -fapplication-extension -current_version 1 -install_name /usr/lib/basic.dylib \ +// RUN: %t/basic_inputs.json -o %t/basic.tbd 2>&1 | FileCheck %s --allow-empty +// RUN: llvm-readtapi -compare %t/basic.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty + +// CHECK-NOT: error: +// CHECK-NOT: warning: + +//--- basic_inputs.json + +//--- expected.tbd +{ + "main_library": { + "compatibility_versions": [ + { + "version": "0" + } + ], + "install_names": [ + { + "name": "/usr/lib/basic.dylib" + } + ], + "target_info": [ + { + "min_deployment": "13.0.0", + "target": "arm64-ios" + } + ] + }, + "tapi_tbd_version": 5 +} diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py index 271372b928ac55..f93b5d9c945886 100644 --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -97,6 +97,7 @@ "llvm-lto", "llvm-lto2", "llvm-profdata", + "llvm-readtapi", ToolSubst( "%clang_extdef_map", command=FindTool("clang-extdef-mapping"), From a3f61c8bfd277e0e6d65695f739f4432fb1fd721 Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Tue, 13 Feb 2024 21:53:05 -0500 Subject: [PATCH 089/240] [SHT_LLVM_BB_ADDR_MAP][obj2yaml] Implements PGOAnalysisMap for elf2yaml and tests. (#80924) Adds support to obj2yaml for PGO Analysis Map. Adds a test to both obj2yaml and yaml2obj. --- .../ELF/bb-addr-map-pgo-analysis-map.yaml | 232 ++++++++++++++++++ .../ELF/bb-addr-map-pgo-analysis-map.yaml | 83 +++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 37 +++ 3 files changed, 352 insertions(+) create mode 100644 llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml create mode 100644 llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml new file mode 100644 index 00000000000000..299bf463cf4bc9 --- /dev/null +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -0,0 +1,232 @@ +## Check how obj2yaml produces YAML PGO Analysis Map in .llvm_bb_addr_map. + +## Check that obj2yaml uses the "Entries" tag to describe an .llvm_bb_addr_map section. + +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: obj2yaml %t1 | FileCheck %s --check-prefix=VALID + +# VALID: --- !ELF +# VALID-NEXT: FileHeader: +# VALID-NEXT: Class: ELFCLASS64 +# VALID-NEXT: Data: ELFDATA2LSB +# VALID-NEXT: Type: ET_EXEC +# VALID-NEXT: Sections: +# VALID-NEXT: - Name: .llvm_bb_addr_map +# VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# VALID-NEXT: Entries: +# VALID-NEXT: - Version: 2 +# VALID-NEXT: Feature: 0x7 +## The 'BaseAddress' field is omitted when it's zero. +# VALID-NEXT: BBRanges: +# VALID-NEXT: - BBEntries: +# VALID-NEXT: - ID: 0 +# VALID-NEXT: AddressOffset: 0x1 +# VALID-NEXT: Size: 0x2 +# VALID-NEXT: Metadata: 0x3 +# VALID-NEXT: - ID: 2 +# VALID-NEXT: AddressOffset: 0x4 +# VALID-NEXT: Size: 0x5 +# VALID-NEXT: Metadata: 0x6 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: AddressOffset: 0xFFFFFFFFFFFFFFF7 +# VALID-NEXT: Size: 0xFFFFFFFFFFFFFFF8 +# VALID-NEXT: Metadata: 0xFFFFFFFFFFFFFFF9 +# VALID-NEXT: - Version: 2 +# VALID-NEXT: Feature: 0xA +# VALID-NEXT: BBRanges: +# VALID-NEXT: - BaseAddress: 0xFFFFFFFFFFFFFF20 +# VALID-NEXT: BBEntries: +# VALID-NEXT: - ID: 6 +# VALID-NEXT: AddressOffset: 0xA +# VALID-NEXT: Size: 0xB +# VALID-NEXT: Metadata: 0xC +# VALID-NEXT: PGOAnalyses: +# VALID-NEXT: - FuncEntryCount: 100 +# VALID-NEXT: PGOBBEntries: +# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: Successors: +# VALID-NEXT: - ID: 2 +# VALID-NEXT: BrProb: 0x80000000 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: BrProb: 0x80000000 +# VALID-NEXT: - BBFreq: 50 +# VALID-NEXT: Successors: +# VALID-NEXT: - ID: 4 +# VALID-NEXT: BrProb: 0xFFFFFFFF +# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: Successors: [] +# VALID-NEXT: PGOBBEntries: +# VALID-NEXT: - BBFreq: 20 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + ShSize: [[SIZE=]] + Entries: + - Version: 2 + Feature: 0x7 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - ID: 0 + AddressOffset: 0x1 + Size: 0x2 + Metadata: 0x3 + - ID: 2 + AddressOffset: 0x4 + Size: 0x5 + Metadata: 0x6 + - ID: 4 + AddressOffset: 0xFFFFFFFFFFFFFFF7 + Size: 0xFFFFFFFFFFFFFFF8 + Metadata: 0xFFFFFFFFFFFFFFF9 + - Version: 2 + Feature: 0xA + BBRanges: + - BaseAddress: 0xFFFFFFFFFFFFFF20 + BBEntries: + - ID: 6 + AddressOffset: 0xA + Size: 0xB + Metadata: 0xC + PGOAnalyses: + - FuncEntryCount: 100 + PGOBBEntries: + - BBFreq: 100 + Successors: + - ID: 2 + BrProb: 0x80000000 + - ID: 4 + BrProb: 0x80000000 + - BBFreq: 50 + Successors: + - ID: 4 + BrProb: 0xFFFFFFFF + - BBFreq: 100 + Successors: [] + - PGOBBEntries: + - BBFreq: 20 + +## Check obj2yaml can dump multiple .llvm_bb_addr_map sections. + +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: obj2yaml %t2 | FileCheck %s --check-prefix=MULTI + +# MULTI: --- !ELF +# MULTI-NEXT: FileHeader: +# MULTI-NEXT: Class: ELFCLASS64 +# MULTI-NEXT: Data: ELFDATA2LSB +# MULTI-NEXT: Type: ET_EXEC +# MULTI-NEXT: Sections: +# MULTI-NEXT: - Name: .llvm_bb_addr_map +# MULTI-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# MULTI-NEXT: Entries: +## Fieldx 'BaseAddress' is omitted when it is zero. +# MULTI-NEXT: - Version: 0 +# MULTI-NEXT: Feature: 0x3 +# MULTI-NEXT: BBRanges: +# MULTI-NEXT: - BBEntries: +# MULTI-NEXT: - ID: 0 +# MULTI-NEXT: AddressOffset: 0x1 +# MULTI-NEXT: Size: 0x2 +# MULTI-NEXT: Metadata: 0x3 +# MULTI-NEXT: PGOAnalyses: +# MULTI-NEXT: - FuncEntryCount: 0 +# MULTI-NEXT: PGOBBEntries: +# MULTI-NEXT: - BBFreq: 0 +# MULTI-NEXT: - Name: '.llvm_bb_addr_map (1)' +# MULTI-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# MULTI-NEXT: Entries: +# MULTI-NEXT: - Version: 0 +# MULTI-NEXT: Feature: 0x1 +# MULTI-NEXT: BBRanges: +# MULTI-NEXT: - BaseAddress: 0x20 +# MULTI-NEXT: BBEntries: [] +# MULTI-NEXT: PGOAnalyses: +# MULTI-NEXT: - FuncEntryCount: 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + Entries: +## Check that obj2yaml does not emit the 'BaseAddress' and 'Feature' fields when +## they are zero. + - Version: 0 + Feature: 0x3 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - AddressOffset: 0x1 + Size: 0x2 + Metadata: 0x3 + PGOAnalyses: + - FuncEntryCount: 0 + PGOBBEntries: + - BBFreq: 0 + - Name: '.llvm_bb_addr_map (1)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 0 + Feature: 0x1 + BBRanges: + - BaseAddress: 0x20 + PGOAnalyses: + - FuncEntryCount: 0 + +## Check that obj2yaml uses the "Content" tag to describe an .llvm_bb_addr_map section +## when it can't extract the entries, for example, when the section is truncated. + +# RUN: yaml2obj --docnum=1 -DSIZE=0x1D %s -o %t3 +# RUN: obj2yaml %t3 | FileCheck %s --check-prefixes=TRUNCATED,INVALID + +# INVALID: --- !ELF +# INVALID-NEXT: FileHeader: +# INVALID-NEXT: Class: ELFCLASS64 +# INVALID-NEXT: Data: ELFDATA2LSB +# INVALID-NEXT: Type: ET_EXEC +# INVALID-NEXT: Sections: +# INVALID-NEXT: - Name: .llvm_bb_addr_map +# INVALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# TRUNCATED-NEXT: Content: {{([[:xdigit:]]{58})}}{{$}} +# TRUNCATED-NEXT: Content: {{([[:xdigit:]]{58})}}{{$}} + +## Check that obj2yaml uses the "Content" tag when original YAML does not +## provide a PGO field that was enabled in the feature byte + +# RUN: yaml2obj --docnum=3 %s -o %t4 +# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=MISSING-FEC + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: '.llvm_bb_addr_map' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 0 + Feature: 0x1 + BBRanges: + - BaseAddress: 0x20 + +# MISSING-FEC: --- !ELF +# MISSING-FEC-NEXT: FileHeader: +# MISSING-FEC-NEXT: Class: ELFCLASS64 +# MISSING-FEC-NEXT: Data: ELFDATA2LSB +# MISSING-FEC-NEXT: Type: ET_EXEC +# MISSING-FEC-NEXT: Sections: +# MISSING-FEC-NEXT: - Name: .llvm_bb_addr_map +# MISSING-FEC-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# MISSING-FEC-NEXT: Content: '{{([[:xdigit:]]+)}}'{{$}} + diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml new file mode 100644 index 00000000000000..4dfaf60be3c0ed --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -0,0 +1,83 @@ +## Check how yaml2obj produces PGO Analysis Map in .llvm_bb_addr_map section. + +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: llvm-readobj --sections --section-data %t1 | FileCheck %s + +# Case 4: Specify Entries. +# CHECK: Name: .llvm_bb_addr_map (1) +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 02072000 00000000 0000010B 010203E8 +# CHECK-NEXT: 0010: 07E80702 0CEEDDBB F70E0D91 A2C48801 +# CHECK-NEXT: ) + +# Case 7: Not including a field which is enabled in feature doesn't emit value +# CHECK: Name: .llvm_bb_addr_map (1) +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 02012000 00000000 0000020D 010203 | +# CHECK-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + +## Test the following cases: + +## 1) We can produce an .llvm_bb_addr_map section from a description with +## Entries and PGO Analysis data. + - Name: '.llvm_bb_addr_map (1)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 + Feature: 0x7 + BBRanges: + - BaseAddress: 0x0000000000000020 + BBEntries: + - ID: 11 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 + PGOAnalyses: + - FuncEntryCount: 1000 + PGOBBEntries: + - BBFreq: 1000 + Successors: + - ID: 12 + BrProb: 0xeeeeeeee + - ID: 13 + BrProb: 0x11111111 + +## 2) According to feature we have FuncEntryCount but none is provided in yaml + - Name: '.llvm_bb_addr_map (2)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 + Feature: 0x1 + BBRanges: + - BaseAddress: 0x0000000000000020 + NumBlocks: 2 + BBEntries: + - ID: 13 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 + +## Check that yaml2obj generates a warning when we use unsupported feature. +# RUN: yaml2obj --docnum=2 %s 2>&1 | FileCheck %s --check-prefix=INVALID-FEATURE +# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xff + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: '.llvm_bb_addr_map' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 +## Specify unsupported feature + Feature: 0xFF + diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 38a69f77ff8362..6b9af906736c35 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -889,6 +889,8 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { DataExtractor Data(Content, Obj.isLE(), ELFT::Is64Bits ? 8 : 4); std::vector Entries; + bool HasAnyPGOAnalysisMapEntry = false; + std::vector PGOAnalyses; DataExtractor::Cursor Cur(0); uint8_t Version = 0; uint8_t Feature = 0; @@ -905,6 +907,7 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { } uint64_t NumBBRanges = 1; uint64_t NumBlocks = 0; + uint32_t TotalNumBlocks = 0; auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(Feature); if (!FeatureOrErr) return FeatureOrErr.takeError(); @@ -934,10 +937,42 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { uint64_t Metadata = Data.getULEB128(Cur); BBEntries.push_back({ID, Offset, Size, Metadata}); } + TotalNumBlocks += BBEntries.size(); BBRanges.push_back({BaseAddress, /*NumBlocks=*/{}, BBEntries}); } Entries.push_back( {Version, Feature, /*NumBBRanges=*/{}, std::move(BBRanges)}); + + ELFYAML::PGOAnalysisMapEntry &PGOAnalysis = PGOAnalyses.emplace_back(); + if (FeatureOrErr->hasPGOAnalysis()) { + HasAnyPGOAnalysisMapEntry = true; + + if (FeatureOrErr->FuncEntryCount) + PGOAnalysis.FuncEntryCount = Data.getULEB128(Cur); + + if (FeatureOrErr->hasPGOAnalysisBBData()) { + auto &PGOBBEntries = PGOAnalysis.PGOBBEntries.emplace(); + for (uint64_t BlockIndex = 0; Cur && BlockIndex < TotalNumBlocks; + ++BlockIndex) { + auto &PGOBBEntry = PGOBBEntries.emplace_back(); + if (FeatureOrErr->BBFreq) { + PGOBBEntry.BBFreq = Data.getULEB128(Cur); + if (!Cur) + break; + } + + if (FeatureOrErr->BrProb) { + auto &SuccEntries = PGOBBEntry.Successors.emplace(); + uint64_t SuccCount = Data.getULEB128(Cur); + for (uint64_t SuccIdx = 0; Cur && SuccIdx < SuccCount; ++SuccIdx) { + uint32_t ID = Data.getULEB128(Cur); + uint32_t BrProb = Data.getULEB128(Cur); + SuccEntries.push_back({ID, BrProb}); + } + } + } + } + } } if (!Cur) { @@ -946,6 +981,8 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { S->Content = yaml::BinaryRef(Content); } else { S->Entries = std::move(Entries); + if (HasAnyPGOAnalysisMapEntry) + S->PGOAnalyses = std::move(PGOAnalyses); } return S.release(); From ec5f4a4bc6f27b044bc73668414ecefe9690d283 Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Tue, 13 Feb 2024 18:58:53 -0800 Subject: [PATCH 090/240] [InstallAPI] Add missing link to clangBasic Fixes CI. --- clang/lib/InstallAPI/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/InstallAPI/CMakeLists.txt b/clang/lib/InstallAPI/CMakeLists.txt index b68d8fbbec1d37..1476b737c5e61c 100644 --- a/clang/lib/InstallAPI/CMakeLists.txt +++ b/clang/lib/InstallAPI/CMakeLists.txt @@ -8,4 +8,5 @@ add_clang_library(clangInstallAPI LINK_LIBS clangAST + clangBasic ) From 3f738a4c55dfb6476c47cab30e878cf5f0643351 Mon Sep 17 00:00:00 2001 From: Mariusz Borsa Date: Tue, 13 Feb 2024 20:14:46 -0800 Subject: [PATCH 091/240] [Sanitizers][ABI] Remove too strong assert in asan_abi_shim (#81696) Recently we enabled building the shim for arm64_32 arch. On this arch, sizeof(uptr) == sizeof(unsigned long) == 4 - so this assert will fail in runtime. Need to just remove this assert rdar://122927166 Co-authored-by: Mariusz Borsa --- compiler-rt/lib/asan_abi/asan_abi_shim.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/asan_abi/asan_abi_shim.cpp b/compiler-rt/lib/asan_abi/asan_abi_shim.cpp index 35c45dff96f6d2..2512abc6412502 100644 --- a/compiler-rt/lib/asan_abi/asan_abi_shim.cpp +++ b/compiler-rt/lib/asan_abi/asan_abi_shim.cpp @@ -54,7 +54,7 @@ void *__asan_memmove(void *dest, const void *src, uptr n) { // Functions concerning RTL startup and initialization void __asan_init(void) { - static_assert(sizeof(uptr) == 8); + static_assert(sizeof(uptr) == 8 || sizeof(uptr) == 4); static_assert(sizeof(u64) == 8); static_assert(sizeof(u32) == 4); From bc08cc2ac8b0fc0898d191e36db08d136d659f7d Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Tue, 13 Feb 2024 20:20:24 -0800 Subject: [PATCH 092/240] [mlir][tensor] Add support for tensor.pack static shapes inference. (#80848) Fixes https://github.com/openxla/iree/issues/16317 --- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 60 ++++++++++++++++++++++ mlir/test/Dialect/Tensor/canonicalize.mlir | 39 ++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 8298cf102e28a3..bb72cba96ad935 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -3983,6 +3983,41 @@ static bool paddingIsNotNeeded(PackOp op) { op.getMixedTiles()); } +/// Returns true if the `srcShape` or `destShape` is different from the one in +/// `packOp` and populates each with the inferred static shape. +static bool inferStaticShape(PackOp packOp, SmallVectorImpl &srcShape, + SmallVectorImpl &destShape) { + bool changeNeeded = false; + srcShape.assign(packOp.getSourceType().getShape().begin(), + packOp.getSourceType().getShape().end()); + destShape.assign(packOp.getDestType().getShape().begin(), + packOp.getDestType().getShape().end()); + llvm::SmallSetVector innerDims; + innerDims.insert(packOp.getInnerDimsPos().begin(), + packOp.getInnerDimsPos().end()); + auto outerDimsPerm = packOp.getOuterDimsPerm(); + int srcRank = packOp.getSourceRank(); + for (auto i : llvm::seq(0, srcRank)) { + if (innerDims.contains(i)) + continue; + int64_t srcPos = i; + int64_t destPos = i; + if (!outerDimsPerm.empty()) + destPos = outerDimsPerm[srcPos]; + if (ShapedType::isDynamic(srcShape[srcPos]) == + ShapedType::isDynamic(destShape[destPos])) { + continue; + } + int64_t size = srcShape[srcPos]; + if (ShapedType::isDynamic(size)) + size = destShape[destPos]; + srcShape[srcPos] = size; + destShape[destPos] = size; + changeNeeded = true; + } + return changeNeeded; +} + LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) { // Fold an unpack(pack(x)) to x. if (auto unPackOp = packOp.getSource().getDefiningOp()) { @@ -4003,6 +4038,31 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) { rewriter.finalizeOpModification(packOp); return success(); } + + // Insert tensor.cast ops if static shape inference is available.. + SmallVector srcShape, destShape; + if (inferStaticShape(packOp, srcShape, destShape)) { + Location loc = packOp.getLoc(); + Value source = packOp.getSource(); + if (srcShape != packOp.getSourceType().getShape()) { + auto newSrcType = packOp.getSourceType().clone(srcShape); + source = + rewriter.create(loc, newSrcType, packOp.getSource()); + } + Value dest = packOp.getDest(); + if (destShape != packOp.getDestType().getShape()) { + auto newDestType = packOp.getDestType().clone(destShape); + dest = + rewriter.create(loc, newDestType, packOp.getDest()); + } + Value newOp = rewriter.create( + loc, source, dest, packOp.getInnerDimsPos(), packOp.getMixedTiles(), + packOp.getPaddingValue(), packOp.getOuterDimsPerm()); + rewriter.replaceOpWithNewOp( + packOp, packOp.getResult().getType(), newOp); + return success(); + } + return failure(); } diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 90c715bf2eb2da..3b6cd799a6f348 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -809,6 +809,45 @@ func.func @fold_padding_value_pack(%arg0: tensor<1200x500000xf32>) -> tensor<312 // ----- +func.func @infer_src_shape_pack(%src: tensor, %dest: tensor<10x20x30x40x16xf32>) -> tensor<10x20x30x40x16xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %pack = tensor.pack %src + padding_value(%cst : f32) + outer_dims_perm = [2, 1, 3, 0] + inner_dims_pos = [2] + inner_tiles = [16] + into %dest : tensor -> tensor<10x20x30x40x16xf32> + return %pack : tensor<10x20x30x40x16xf32> +} +// CHECK-LABEL: func.func @infer_src_shape_pack +// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] +// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] +// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor to tensor<30x20x?x10xf32> +// CHECK: %[[PACK:.+]] = tensor.pack %[[CAST_SRC]] {{.+}} into %[[DEST]] +// CHECK: return %[[PACK]] + +// ----- + +func.func @infer_dest_shape_pack(%src: tensor<30x20x?x10xf32>, %dest: tensor) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %pack = tensor.pack %src + padding_value(%cst : f32) + outer_dims_perm = [2, 1, 3, 0] + inner_dims_pos = [2] + inner_tiles = [16] + into %dest : tensor<30x20x?x10xf32> -> tensor + return %pack : tensor +} +// CHECK-LABEL: func.func @infer_dest_shape_pack +// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] +// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] +// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor to tensor<10x20x30x?x16xf32> +// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC]] {{.+}} into %[[CAST_DEST]] +// CHECK: %[[CAST_PACK:.+]] = tensor.cast %[[PACK]] : tensor<10x20x30x?x16xf32> to tensor +// CHECK: return %[[CAST_PACK]] + +// ----- + func.func @fold_padding_value_pack_negative1(%arg0: tensor<1200x499999xf32>) -> tensor<31250x1200x16x1xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<31250x1200x16x1xf32> From b9567bc78f42a6850ddf3b3cf3f9a0d2cc08ffff Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 14 Feb 2024 12:30:29 +0800 Subject: [PATCH 093/240] [RISCV] Use SelectionDAG::getVScale in lowerVPReverseExperimental. NFCI (#81694) Use a slightly more idiomatic way of getting vscale. getVScale performs additional constant folding, but I presume computeKnownBits also catches these cases too. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 73492c28815b1d..4caadef694d725 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11106,8 +11106,8 @@ RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, // Slide off any elements from past EVL that were reversed into the low // elements. unsigned MinElts = GatherVT.getVectorMinNumElements(); - SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, - DAG.getConstant(MinElts, DL, XLenVT)); + SDValue VLMax = + DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts)); SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL); Result = getVSlidedown(DAG, Subtarget, DL, GatherVT, From 69bcb69bbac05c52129ffcae4f8995c2914ff3fc Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 08:05:30 -0700 Subject: [PATCH 094/240] Apply clang-tidy fixes for llvm-qualified-auto in TensorOps.cpp (NFC) --- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index bb72cba96ad935..945e8eb4e0f92b 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -817,7 +817,7 @@ struct DimOfDestStyleOp : public OpRewritePattern { return failure(); auto resultIndex = source.cast().getResultNumber(); - auto initOperand = destOp.getDpsInitOperand(resultIndex); + auto *initOperand = destOp.getDpsInitOperand(resultIndex); rewriter.modifyOpInPlace( dimOp, [&]() { dimOp.getSourceMutable().assign(initOperand->get()); }); From a854982aa10725571409c367ab59da7c809b59f4 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 08:07:18 -0700 Subject: [PATCH 095/240] Apply clang-tidy fixes for readability-simplify-boolean-expr in TensorOps.cpp (NFC) --- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 945e8eb4e0f92b..303b38c70d747f 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -2797,7 +2797,7 @@ RankedTensorType PadOp::inferResultType(RankedTensorType sourceType, return RankedTensorType(); if (staticHigh.size() != rank) return RankedTensorType(); - if (!(resultShape.empty() || resultShape.size() == rank)) + if (!resultShape.empty() && resultShape.size() != rank) return RankedTensorType(); SmallVector inferredShape; From d2f067693becb25e4479712c8290d321bf83d4bd Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 08:20:53 -0700 Subject: [PATCH 096/240] Apply clang-tidy fixes for readability-identifier-naming in TosaOps.cpp (NFC) --- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 729116da45e47d..950ee597b891b5 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -108,7 +108,7 @@ struct TosaDialectBytecodeInterface : public BytecodeDialectInterface { } LogicalResult upgradeFromVersion(Operation *topLevelOp, - const DialectVersion &version_) const final { + const DialectVersion &version) const final { return success(); } }; @@ -454,12 +454,12 @@ static void buildAvgPool2dOpWithQuantInfo(OpBuilder &builder, OperationState &result, Type outputType, Value input, DenseArrayAttr kernel, DenseArrayAttr stride, - DenseArrayAttr pad, TypeAttr acc_type) { + DenseArrayAttr pad, TypeAttr accType) { result.addOperands(input); result.addAttribute("kernel", kernel); result.addAttribute("stride", stride); result.addAttribute("pad", pad); - result.addAttribute("acc_type", acc_type); + result.addAttribute("acc_type", accType); auto quantAttr = buildUnaryOpQuantizationAttr(builder, input, outputType); if (quantAttr) result.addAttribute("quantization_info", quantAttr); From 153661db5c7b2b49c4bde1d91410952933e6c584 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 08:32:38 -0700 Subject: [PATCH 097/240] Apply clang-tidy fixes for llvm-qualified-auto in TransformOps.cpp (NFC) --- mlir/lib/Dialect/Transform/IR/TransformOps.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index f2a57383cc5bf9..7afa856fb12ce6 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -260,7 +260,7 @@ transform::AnnotateOp::apply(transform::TransformRewriter &rewriter, } attr = params[0]; } - for (auto target : targets) + for (auto *target : targets) target->setAttr(getName(), attr); return DiagnosedSilenceableFailure::success(); } @@ -330,7 +330,7 @@ DiagnosedSilenceableFailure transform::ApplyDeadCodeEliminationOp::applyToOne( auto eraseOp = [&](Operation *op) { // Remove op and nested ops from the worklist. op->walk([&](Operation *op) { - auto it = llvm::find(worklist, op); + const auto *it = llvm::find(worklist, op); if (it != worklist.end()) worklist.erase(it); }); @@ -666,7 +666,7 @@ void transform::ApplyToLLVMConversionPatternsOp::populatePatterns( TypeConverter &typeConverter, RewritePatternSet &patterns) { Dialect *dialect = getContext()->getLoadedDialect(getDialectName()); assert(dialect && "expected that dialect is loaded"); - auto iface = cast(dialect); + auto *iface = cast(dialect); // ConversionTarget is currently ignored because the enclosing // apply_conversion_patterns op sets up its own ConversionTarget. ConversionTarget target(*getContext()); @@ -686,7 +686,7 @@ LogicalResult transform::ApplyToLLVMConversionPatternsOp::verify() { if (!dialect) return emitOpError("unknown dialect or dialect not loaded: ") << getDialectName(); - auto iface = dyn_cast(dialect); + auto *iface = dyn_cast(dialect); if (!iface) return emitOpError( "dialect does not implement ConvertToLLVMPatternInterface or " From 70ebc78efb5df33395603329f4622b51f2c259ce Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 08:33:59 -0700 Subject: [PATCH 098/240] Apply clang-tidy fixes for performance-unnecessary-value-param in TransformOps.cpp (NFC) --- mlir/lib/Dialect/Transform/IR/TransformOps.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 7afa856fb12ce6..efb724006674f4 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -1754,7 +1754,7 @@ DiagnosedSilenceableFailure transform::MatchParamCmpIOp::apply(transform::TransformRewriter &rewriter, transform::TransformResults &results, transform::TransformState &state) { - auto signedAPIntAsString = [&](APInt value) { + auto signedAPIntAsString = [&](const APInt &value) { std::string str; llvm::raw_string_ostream os(str); value.print(os, /*isSigned=*/true); From 61c83e9491b2be71a54b255cdb11f65365245953 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Tue, 13 Feb 2024 20:40:19 -0800 Subject: [PATCH 099/240] Revert "[clang-format][NFC] Make LangOpts global in namespace Format" This reverts commit 32e65b0b8a743678974c7ca7913c1d6c41bb0772. It seems to break some PowerPC bots. See https://github.com/llvm/llvm-project/pull/81390#issuecomment-1941964803. --- clang/lib/Format/FormatTokenLexer.cpp | 12 ++++++++---- clang/lib/Format/FormatTokenLexer.h | 6 ++++++ clang/lib/Format/IntegerLiteralSeparatorFixer.cpp | 2 +- clang/lib/Format/TokenAnalyzer.cpp | 6 +----- clang/lib/Format/TokenAnalyzer.h | 2 -- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 492e7e96dd22e6..036f7e6a4efc1e 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -13,7 +13,11 @@ //===----------------------------------------------------------------------===// #include "FormatTokenLexer.h" -#include "TokenAnalyzer.h" +#include "FormatToken.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Format/Format.h" +#include "llvm/Support/Regex.h" namespace clang { namespace format { @@ -24,12 +28,12 @@ FormatTokenLexer::FormatTokenLexer( llvm::SpecificBumpPtrAllocator &Allocator, IdentifierTable &IdentTable) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Column(Column), TrailingWhitespace(0), + LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(IdentTable), Keywords(IdentTable), Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0), FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), MacroBlockEndRegex(Style.MacroBlockEnd) { - assert(LangOpts.CPlusPlus); Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts)); Lex->SetKeepWhitespaceMode(true); @@ -1438,7 +1442,7 @@ void FormatTokenLexer::readRawToken(FormatToken &Tok) { void FormatTokenLexer::resetLexer(unsigned Offset) { StringRef Buffer = SourceMgr.getBufferData(ID); - assert(LangOpts.CPlusPlus); + LangOpts = getFormattingLangOpts(Style); Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), LangOpts, Buffer.begin(), Buffer.begin() + Offset, Buffer.end())); Lex->SetKeepWhitespaceMode(true); diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index ca91c5b7d20d4e..65dd733bd53352 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -17,9 +17,14 @@ #include "Encoding.h" #include "FormatToken.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Format/Format.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Support/Regex.h" #include @@ -115,6 +120,7 @@ class FormatTokenLexer { unsigned Column; unsigned TrailingWhitespace; std::unique_ptr Lex; + LangOptions LangOpts; const SourceManager &SourceMgr; FileID ID; const FormatStyle &Style; diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp index 3c2ceddd5599cf..87823ae32b1138 100644 --- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp +++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp @@ -79,7 +79,7 @@ IntegerLiteralSeparatorFixer::process(const Environment &Env, AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); const auto ID = Env.getFileID(); - assert(LangOpts.CPlusPlus); + const auto LangOpts = getFormattingLangOpts(Style); Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); Lex.SetCommentRetentionState(true); diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp index f9d1fdb86f1ae9..bd648c430f9b0a 100644 --- a/clang/lib/Format/TokenAnalyzer.cpp +++ b/clang/lib/Format/TokenAnalyzer.cpp @@ -35,8 +35,6 @@ namespace clang { namespace format { -LangOptions LangOpts; - // FIXME: Instead of printing the diagnostic we should store it and have a // better way to return errors through the format APIs. class FatalDiagnosticConsumer : public DiagnosticConsumer { @@ -101,11 +99,9 @@ TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) std::pair TokenAnalyzer::process(bool SkipAnnotation) { - LangOpts = getFormattingLangOpts(Style); - tooling::Replacements Result; llvm::SpecificBumpPtrAllocator Allocator; - IdentifierTable IdentTable(LangOpts); + IdentifierTable IdentTable(getFormattingLangOpts(Style)); FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(), Env.getFirstStartColumn(), Style, Encoding, Allocator, IdentTable); diff --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h index 18c1431eb37612..4086dab1c94c3a 100644 --- a/clang/lib/Format/TokenAnalyzer.h +++ b/clang/lib/Format/TokenAnalyzer.h @@ -34,8 +34,6 @@ namespace clang { namespace format { -extern LangOptions LangOpts; - class Environment { public: // This sets up an virtual file system with file \p FileName containing the From eafe98f937c790632452d1ab741477e4fb7e5531 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Tue, 13 Feb 2024 22:35:52 -0800 Subject: [PATCH 100/240] [clang-format] Always add a space after #if and #elif (#81578) Fixes #81569. --- clang/lib/Format/TokenAnnotator.cpp | 2 ++ clang/unittests/Format/FormatTest.cpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index b9a000faae7cf7..08a49bc17f13f3 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1856,6 +1856,8 @@ class AnnotatingParser { case tok::pp_elif: Contexts.back().IsExpression = true; next(); + if (CurrentToken) + CurrentToken->SpacesRequiredBefore = true; parseLine(); break; default: diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 13937a15fdaee2..79cd521b6a9998 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -16491,6 +16491,10 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("T A::operator()();", NoSpace); verifyFormat("X A::operator++(T);", NoSpace); verifyFormat("auto lambda = []() { return 0; };", NoSpace); + verifyFormat("#if (foo || bar) && baz\n" + "#elif ((a || b) && c) || d\n" + "#endif", + NoSpace); FormatStyle Space = getLLVMStyle(); Space.SpaceBeforeParens = FormatStyle::SBPO_Always; From 3537ccccfe4e01ce79b3e9b6bb6ed378d8752fd1 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 14 Feb 2024 07:16:26 +0000 Subject: [PATCH 101/240] [mlir][Bazel] Add missing dependency after 29d1aca05caa06e30d7a76ee15fd166fa32e1043 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 2c534c7614b10d..c825d99b60a6b9 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1529,6 +1529,7 @@ cc_library( ":ArithDialect", ":ArithUtils", ":DialectUtils", + ":FuncDialect", ":GPUCommonTransforms", ":GPUCompilationAttrInterfacesIncGen", ":GPUDialect", From e6253102a7c63d73594699d93b1b412f0795ec11 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 13 Feb 2024 23:26:08 -0800 Subject: [PATCH 102/240] [DAGCombiner] Remove unnecessary commonAlignment from CombineExtLoad. (#81705) The getAlign function for a load returns the commonAlignment of the "base align" and the offset stored in the MachinePointerInfo. We're splitting a load here, so we should take the base alignment from the original load without any offset that may already exist in the original load. The new load can then maintain its own alignment using just the base alignment and its own offset. Noticed by inspection. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 52011e593f2e0a..f35466fb607360 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12971,12 +12971,12 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue BasePtr = LN0->getBasePtr(); for (unsigned Idx = 0; Idx < NumSplits; Idx++) { const unsigned Offset = Idx * Stride; - const Align Align = commonAlignment(LN0->getAlign(), Offset); - SDValue SplitLoad = DAG.getExtLoad( - ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, - LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, - LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); + SDValue SplitLoad = + DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), + BasePtr, LN0->getPointerInfo().getWithOffset(Offset), + SplitSrcVT, LN0->getOriginalAlign(), + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL); From 86ce491f308768137df627be9f32f6c191d809b2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 13 Feb 2024 23:26:25 -0800 Subject: [PATCH 103/240] [DAGCombiner] Remove unneeded commonAlignment from reduceLoadWidth. (#81707) We already have the PtrOff factored into MachinePointerInfo. Any calls to getAlign on the new load with do commonAlignment with the MachinePointerInfo offset and the base alignment. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++---- llvm/test/CodeGen/X86/fold-sext-trunc.ll | 2 +- .../update_llc_test_checks/Inputs/lanai_isel.ll.expected | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f35466fb607360..bdd2336fa42379 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14382,7 +14382,6 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt; uint64_t PtrOff = PtrAdjustmentInBits / 8; - Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff); SDLoc DL(LN0); // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; @@ -14394,13 +14393,14 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->getOriginalAlign(), LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, - NewAlign, LN0->getMemOperand()->getFlags(), - LN0->getAAInfo()); + LN0->getOriginalAlign(), + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); diff --git a/llvm/test/CodeGen/X86/fold-sext-trunc.ll b/llvm/test/CodeGen/X86/fold-sext-trunc.ll index 9963bb7b030dc6..26a4ed0a0cd4bf 100644 --- a/llvm/test/CodeGen/X86/fold-sext-trunc.ll +++ b/llvm/test/CodeGen/X86/fold-sext-trunc.ll @@ -18,7 +18,7 @@ define void @int322(i32 %foo) !dbg !5 { entry: %val = load i64, ptr @g_10, !dbg !16 %0 = load i32, ptr getelementptr inbounds (%struct.S1, ptr @g_10, i32 0, i32 1), align 4, !dbg !17 -; MIR: renamable {{\$r[a-z]+}} = MOVSX64rm32 {{.*}}, @g_10 + 4,{{.*}} debug-location !17 :: (dereferenceable load (s32) from @g_10 + 4) +; MIR: renamable {{\$r[a-z]+}} = MOVSX64rm32 {{.*}}, @g_10 + 4,{{.*}} debug-location !17 :: (dereferenceable load (s32) from @g_10 + 4, basealign 8) %1 = sext i32 %0 to i64, !dbg !18 %tmp4.i = lshr i64 %val, 32, !dbg !19 %tmp5.i = trunc i64 %tmp4.i to i32, !dbg !20 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_isel.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_isel.ll.expected index 7d152d9d3ec84c..80145c5e098e05 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_isel.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/lanai_isel.ll.expected @@ -53,7 +53,7 @@ define i64 @i16_test(i16 %i) nounwind readnone { ; CHECK-NEXT: t14: ch,glue = CopyToReg t0, Register:i32 $rv, t33 ; CHECK-NEXT: t1: i32 = ADD_I_LO TargetFrameIndex:i32<-1>, TargetConstant:i32<0> ; CHECK-NEXT: t21: i32 = OR_I_LO t1, TargetConstant:i32<2> -; CHECK-NEXT: t23: i32,ch = LDHz_RI t21, TargetConstant:i32<0>, TargetConstant:i32<0>, t0 +; CHECK-NEXT: t23: i32,ch = LDHz_RI t21, TargetConstant:i32<0>, TargetConstant:i32<0>, t0 ; CHECK-NEXT: t22: i32,ch = LDHz_RI TargetFrameIndex:i32<0>, TargetConstant:i32<0>, TargetConstant:i32<0>, t0 ; CHECK-NEXT: t24: i32 = ADD_R t23, t22, TargetConstant:i32<0> ; CHECK-NEXT: t27: i32 = AND_I_HI t24, TargetConstant:i32<0> @@ -76,7 +76,7 @@ define i64 @i8_test(i8 %i) nounwind readnone { ; CHECK-NEXT: t14: ch,glue = CopyToReg t0, Register:i32 $rv, t33 ; CHECK-NEXT: t1: i32 = ADD_I_LO TargetFrameIndex:i32<-1>, TargetConstant:i32<0> ; CHECK-NEXT: t21: i32 = OR_I_LO t1, TargetConstant:i32<3> -; CHECK-NEXT: t23: i32,ch = LDBz_RI t21, TargetConstant:i32<0>, TargetConstant:i32<0>, t0 +; CHECK-NEXT: t23: i32,ch = LDBz_RI t21, TargetConstant:i32<0>, TargetConstant:i32<0>, t0 ; CHECK-NEXT: t22: i32,ch = LDBz_RI TargetFrameIndex:i32<0>, TargetConstant:i32<0>, TargetConstant:i32<0>, t0 ; CHECK-NEXT: t24: i32 = ADD_R t23, t22, TargetConstant:i32<0> ; CHECK-NEXT: t26: i32 = SLI TargetConstant:i32<255> From b5d694ba14524e0161421b13c875747d5fa917de Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Wed, 14 Feb 2024 08:28:45 +0100 Subject: [PATCH 104/240] [mlir][nvvm] Introduce `nvvm.barrier` OP (#81487) This PR that introduces the `nvvm.barrier` OP to the NVVM dialect. Currently, NVVM only supports the `nvvm.barrier0`, which synchronizes all threads using barrier resource 0. The new `nvvm.barrier` has two essential arguments: the barrier resource and the number of threads. This added flexibility allows for selective synchronization of threads within a CTA, aligning with the capabilities provided by LLVM intrinsics or the PTX model. I think we can deprecate `nvvm.barrier0` in favor of the more generic `nvvm.barrier`. ``` // Equivalent to nvvm.barrier0 (or __syncthreads() in CUDA) nvvm.barrier // Synchronize all threads using the 3rd barrier resource. nvvm.barrier id = 3 // Synchronize %numberOfThreads threads using the 3rd barrier resource. nvvm.barrier id = 3 number_of_threads = %numberOfThreads ``` --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 19 +++++++++++++++++++ mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 7 +++++++ mlir/test/Dialect/LLVMIR/nvvm.mlir | 12 ++++++++++++ mlir/test/Target/LLVMIR/nvvmir.mlir | 19 +++++++++++++++++++ 4 files changed, 57 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 5a75944af0a4f1..8ec8e16f75c94b 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -390,6 +390,25 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> { let assemblyFormat = "attr-dict"; } +def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> { + let arguments = (ins + Optional:$barrierId, + Optional:$numberOfThreads); + string llvmBuilder = [{ + if ($numberOfThreads && $barrierId) { + createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier, + {$barrierId, $numberOfThreads}); + } else if($barrierId) { + createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_n, + {$barrierId}); + } else { + createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier0); + } + }]; + let hasVerifier = 1; + let assemblyFormat = "(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? attr-dict"; +} + def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> { let arguments = (ins OptionalAttr:$aligned); diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 9b1ef084ee7f16..4780ec09b81b9b 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -1022,6 +1022,13 @@ LogicalResult NVVM::SetMaxRegisterOp::verify() { return success(); } +LogicalResult NVVM::BarrierOp::verify() { + if (getNumberOfThreads() && !getBarrierId()) + return emitOpError( + "barrier id is missing, it should be set between 0 to 15"); + return success(); +} + //===----------------------------------------------------------------------===// // NVVMDialect initialization, type parsing, and registration. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 0369f45ca6a015..f35393c5e95748 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -43,6 +43,18 @@ func.func @llvm_nvvm_barrier0() { llvm.return } +// CHECK-LABEL: @llvm_nvvm_barrier +// CHECK-SAME: (%[[barId:.*]]: i32, %[[numberOfThreads:.*]]: i32) +llvm.func @llvm_nvvm_barrier(%barId : i32, %numberOfThreads : i32) { + // CHECK: nvvm.barrier + nvvm.barrier + // CHECK: nvvm.barrier id = %[[barId]] + nvvm.barrier id = %barId + // CHECK: nvvm.barrier id = %[[barId]] number_of_threads = %[[numberOfThreads]] + nvvm.barrier id = %barId number_of_threads = %numberOfThreads + llvm.return +} + // CHECK-LABEL: @llvm_nvvm_cluster_arrive func.func @llvm_nvvm_cluster_arrive() { // CHECK: nvvm.cluster.arrive diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 3a6a4544e20dc1..a8ae4d97888c90 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -80,6 +80,18 @@ llvm.func @llvm_nvvm_barrier0() { llvm.return } +// CHECK-LABEL: @llvm_nvvm_barrier( +// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]]) +llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32) { + // CHECK: call void @llvm.nvvm.barrier0() + nvvm.barrier + // CHECK: call void @llvm.nvvm.barrier.n(i32 %[[barId]]) + nvvm.barrier id = %barID + // CHECK: call void @llvm.nvvm.barrier(i32 %[[barId]], i32 %[[numThreads]]) + nvvm.barrier id = %barID number_of_threads = %numberOfThreads + llvm.return +} + // CHECK-LABEL: @llvm_nvvm_cluster_arrive llvm.func @llvm_nvvm_cluster_arrive() { // CHECK: call void @llvm.nvvm.barrier.cluster.arrive() @@ -512,6 +524,13 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array Date: Wed, 14 Feb 2024 15:33:08 +0800 Subject: [PATCH 105/240] [ValueTracking] Move the `isSignBitCheck` helper into ValueTracking. NFC. (#81704) This patch moves the `isSignBitCheck` helper into ValueTracking to reuse the logic in ValueTracking/InstSimplify. Addresses the comment https://github.com/llvm/llvm-project/pull/80740#discussion_r1488440050. --- llvm/include/llvm/Analysis/ValueTracking.h | 6 +++ .../Transforms/InstCombine/InstCombiner.h | 39 ------------------- llvm/lib/Analysis/InstructionSimplify.cpp | 15 ++++--- llvm/lib/Analysis/ValueTracking.cpp | 39 +++++++++++++++++++ .../InstCombine/InstCombineCompares.cpp | 2 +- .../InstCombine/InstCombineSelect.cpp | 7 ++-- 6 files changed, 56 insertions(+), 52 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 06f94f58ae5eff..f0d0ee554f12b2 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -197,6 +197,12 @@ unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI); +/// Given an exploded icmp instruction, return true if the comparison only +/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if +/// the result of the comparison is true when the input value is signed. +bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, + bool &TrueIfSigned); + /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the /// same result as an fcmp with the given operands. /// diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index 950cc8c9d1658a..93090431cbb69f 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -170,45 +170,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { } } - /// Given an exploded icmp instruction, return true if the comparison only - /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if - /// the result of the comparison is true when the input value is signed. - static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, - bool &TrueIfSigned) { - switch (Pred) { - case ICmpInst::ICMP_SLT: // True if LHS s< 0 - TrueIfSigned = true; - return RHS.isZero(); - case ICmpInst::ICMP_SLE: // True if LHS s<= -1 - TrueIfSigned = true; - return RHS.isAllOnes(); - case ICmpInst::ICMP_SGT: // True if LHS s> -1 - TrueIfSigned = false; - return RHS.isAllOnes(); - case ICmpInst::ICMP_SGE: // True if LHS s>= 0 - TrueIfSigned = false; - return RHS.isZero(); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == sign-bit-mask - 1 - TrueIfSigned = true; - return RHS.isMaxSignedValue(); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) - TrueIfSigned = true; - return RHS.isMinSignedValue(); - case ICmpInst::ICMP_ULT: - // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) - TrueIfSigned = false; - return RHS.isMinSignedValue(); - case ICmpInst::ICMP_ULE: - // True if LHS u<= RHS and RHS == sign-bit-mask - 1 - TrueIfSigned = false; - return RHS.isMaxSignedValue(); - default: - return false; - } - } - /// Add one to a Constant static Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 333b38f221cfc4..cbe183296d2c90 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3032,21 +3032,20 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, Type *ITy = getCompareTy(RHS); // The return type. Value *X; + const APInt *C; + if (!match(RHS, m_APIntAllowUndef(C))) + return nullptr; + // Sign-bit checks can be optimized to true/false after unsigned // floating-point casts: // icmp slt (bitcast (uitofp X)), 0 --> false // icmp sgt (bitcast (uitofp X)), -1 --> true if (match(LHS, m_ElementWiseBitCast(m_UIToFP(m_Value(X))))) { - if (Pred == ICmpInst::ICMP_SLT && match(RHS, m_Zero())) - return ConstantInt::getFalse(ITy); - if (Pred == ICmpInst::ICMP_SGT && match(RHS, m_AllOnes())) - return ConstantInt::getTrue(ITy); + bool TrueIfSigned; + if (isSignBitCheck(Pred, *C, TrueIfSigned)) + return ConstantInt::getBool(ITy, !TrueIfSigned); } - const APInt *C; - if (!match(RHS, m_APIntAllowUndef(C))) - return nullptr; - // Rule out tautological comparisons (eg., ult 0 or uge 0). ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C); if (RHS_CR.isEmptySet()) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 92c9162a1f8f0f..6c42facea3b2b3 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3810,6 +3810,45 @@ void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src, propagateNaN(Src, /*PreserveSign=*/true); } +/// Given an exploded icmp instruction, return true if the comparison only +/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if +/// the result of the comparison is true when the input value is signed. +bool llvm::isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, + bool &TrueIfSigned) { + switch (Pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS.isZero(); + case ICmpInst::ICMP_SLE: // True if LHS s<= -1 + TrueIfSigned = true; + return RHS.isAllOnes(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS.isAllOnes(); + case ICmpInst::ICMP_SGE: // True if LHS s>= 0 + TrueIfSigned = false; + return RHS.isZero(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = true; + return RHS.isMaxSignedValue(); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULT: + // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = false; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULE: + // True if LHS u<= RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = false; + return RHS.isMaxSignedValue(); + default: + return false; + } +} + /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the /// same result as an fcmp with the given operands. std::pair llvm::fcmpToClassTest(FCmpInst::Predicate Pred, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 280c4d77b6dfca..1104ea84e4bc70 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6021,7 +6021,7 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) { // If this is a normal comparison, it demands all bits. If it is a sign bit // comparison, it only demands the sign bit. bool UnusedBit; - if (InstCombiner::isSignBitCheck(I.getPredicate(), *RHS, UnusedBit)) + if (isSignBitCheck(I.getPredicate(), *RHS, UnusedBit)) return APInt::getSignMask(BitWidth); switch (I.getPredicate()) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 527037881edb19..71fa9b9ba41ebb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2381,8 +2381,7 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, ICmpInst::Predicate Pred; if (!match(Cond, m_OneUse(m_ICmp(Pred, m_ElementWiseBitCast(m_Value(X)), m_APInt(C)))) || - !InstCombiner::isSignBitCheck(Pred, *C, IsTrueIfSignSet) || - X->getType() != SelType) + !isSignBitCheck(Pred, *C, IsTrueIfSignSet) || X->getType() != SelType) return nullptr; // If needed, negate the value that will be the sign argument of the copysign: @@ -2581,7 +2580,7 @@ static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC, bool TrueIfSigned = false; if (!(match(CondVal, m_ICmp(Pred, m_Value(RemRes), m_APInt(C))) && - IC.isSignBitCheck(Pred, *C, TrueIfSigned))) + isSignBitCheck(Pred, *C, TrueIfSigned))) return nullptr; // If the sign bit is not set, we have a SGE/SGT comparison, and the operands @@ -2781,7 +2780,7 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI, bool TrueIfSigned; if (!match(CondVal, m_ICmp(Pred, m_ElementWiseBitCast(m_Specific(X)), m_APInt(C))) || - !IC.isSignBitCheck(Pred, *C, TrueIfSigned)) + !isSignBitCheck(Pred, *C, TrueIfSigned)) continue; if (!match(TrueVal, m_FNeg(m_Specific(X)))) return nullptr; From a2eb2340963c905b21ad3a2eda2688e3cd80192d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Wed, 14 Feb 2024 08:38:24 +0100 Subject: [PATCH 106/240] [clang][analyzer] Reformat code of BoolAssignmentChecker (NFC). (#81461) This is only a code reformatting and rename of variables to the newer format. --- .../Checkers/BoolAssignmentChecker.cpp | 55 +++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp index a09db6d2d0ec5b..837cbbce8f45f3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp @@ -23,19 +23,19 @@ using namespace clang; using namespace ento; namespace { - class BoolAssignmentChecker : public Checker< check::Bind > { - const BugType BT{this, "Assignment of a non-Boolean value"}; - void emitReport(ProgramStateRef state, CheckerContext &C, - bool IsTainted = false) const; - - public: - void checkBind(SVal loc, SVal val, const Stmt *S, CheckerContext &C) const; - }; +class BoolAssignmentChecker : public Checker { + const BugType BT{this, "Assignment of a non-Boolean value"}; + void emitReport(ProgramStateRef State, CheckerContext &C, + bool IsTainted = false) const; + +public: + void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const; +}; } // end anonymous namespace -void BoolAssignmentChecker::emitReport(ProgramStateRef state, CheckerContext &C, +void BoolAssignmentChecker::emitReport(ProgramStateRef State, CheckerContext &C, bool IsTainted) const { - if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) { + if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) { StringRef Msg = IsTainted ? "Might assign a tainted non-Boolean value" : "Assignment of a non-Boolean value"; C.emitReport(std::make_unique(BT, Msg, N)); @@ -47,59 +47,58 @@ static bool isBooleanType(QualType Ty) { return true; if (const TypedefType *TT = Ty->getAs()) - return TT->getDecl()->getName() == "BOOL" || // Objective-C - TT->getDecl()->getName() == "_Bool" || // stdbool.h < C99 - TT->getDecl()->getName() == "Boolean"; // MacTypes.h + return TT->getDecl()->getName() == "BOOL" || // Objective-C + TT->getDecl()->getName() == "_Bool" || // stdbool.h < C99 + TT->getDecl()->getName() == "Boolean"; // MacTypes.h return false; } -void BoolAssignmentChecker::checkBind(SVal loc, SVal val, const Stmt *S, +void BoolAssignmentChecker::checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const { // We are only interested in stores into Booleans. const TypedValueRegion *TR = - dyn_cast_or_null(loc.getAsRegion()); + dyn_cast_or_null(Loc.getAsRegion()); if (!TR) return; - QualType valTy = TR->getValueType(); + QualType RegTy = TR->getValueType(); - if (!isBooleanType(valTy)) + if (!isBooleanType(RegTy)) return; // Get the value of the right-hand side. We only care about values // that are defined (UnknownVals and UndefinedVals are handled by other // checkers). - std::optional NV = val.getAs(); + std::optional NV = Val.getAs(); if (!NV) return; // Check if the assigned value meets our criteria for correctness. It must // be a value that is either 0 or 1. One way to check this is to see if // the value is possibly < 0 (for a negative value) or greater than 1. - ProgramStateRef state = C.getState(); - SValBuilder &svalBuilder = C.getSValBuilder(); - BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); + ProgramStateRef State = C.getState(); + BasicValueFactory &BVF = C.getSValBuilder().getBasicValueFactory(); ConstraintManager &CM = C.getConstraintManager(); - llvm::APSInt Zero = BVF.getValue(0, valTy); - llvm::APSInt One = BVF.getValue(1, valTy); + llvm::APSInt Zero = BVF.getValue(0, RegTy); + llvm::APSInt One = BVF.getValue(1, RegTy); ProgramStateRef StIn, StOut; - std::tie(StIn, StOut) = CM.assumeInclusiveRangeDual(state, *NV, Zero, One); + std::tie(StIn, StOut) = CM.assumeInclusiveRangeDual(State, *NV, Zero, One); if (!StIn) emitReport(StOut, C); - if (StIn && StOut && taint::isTainted(state, *NV)) + if (StIn && StOut && taint::isTainted(State, *NV)) emitReport(StOut, C, /*IsTainted=*/true); } -void ento::registerBoolAssignmentChecker(CheckerManager &mgr) { - mgr.registerChecker(); +void ento::registerBoolAssignmentChecker(CheckerManager &Mgr) { + Mgr.registerChecker(); } -bool ento::shouldRegisterBoolAssignmentChecker(const CheckerManager &mgr) { +bool ento::shouldRegisterBoolAssignmentChecker(const CheckerManager &Mgr) { return true; } From 0fee2115bb78a8168fd752ca01f6646cfbf74d07 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 14 Feb 2024 16:12:37 +0800 Subject: [PATCH 107/240] [RISCV] Remove -riscv-v-fixed-length-vector-lmul-max from tests. NFC (#78299) Some fixed vector tests in test/CodeGen/RISCV/rvv have multiple run lines that check various configurations of -riscv-v-fixed-length-vector-lmul-max. From what I understand this flag was introduced in the early days of fixed length vector support, but now that fixed vector codegen has matured I'm not sure if it's as relevant today. This patch proposes to remove the various lmul-max run lines from the tests to make them more readable, and any changes to fixed vector codegen easier to review. We have removed them before for the same reason, so this would take care of the remaining test cases: https://reviews.llvm.org/D157973#4593268 (I don't have any strong motivation to remove the actual flag itself, my own personal motivation is just to clean up the tests) --- .../CodeGen/RISCV/rvv/fixed-vectors-abs.ll | 184 +- .../RISCV/rvv/fixed-vectors-bitreverse.ll | 1129 +--- .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll | 450 +- .../rvv/fixed-vectors-calling-conv-fastcc.ll | 541 +- .../RISCV/rvv/fixed-vectors-calling-conv.ll | 1642 +----- .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 3424 +++++------- .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll | 1177 +--- .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 3300 +++++------ .../rvv/fixed-vectors-extload-truncstore.ll | 1476 +---- .../RISCV/rvv/fixed-vectors-fp-conv.ll | 167 +- .../RISCV/rvv/fixed-vectors-fp-splat.ll | 234 +- .../RISCV/rvv/fixed-vectors-fp-vrgather.ll | 117 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 4860 ++++++----------- .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 972 +--- .../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 906 +-- .../rvv/fixed-vectors-insert-subvector.ll | 258 +- .../RISCV/rvv/fixed-vectors-int-exttrunc.ll | 183 +- .../RISCV/rvv/fixed-vectors-int-splat.ll | 827 +-- .../RISCV/rvv/fixed-vectors-int-vrgather.ll | 121 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 4198 ++++---------- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 468 +- .../rvv/fixed-vectors-mask-load-store.ll | 6 +- .../RISCV/rvv/fixed-vectors-mask-splat.ll | 125 +- .../RISCV/rvv/fixed-vectors-stepvector.ll | 294 +- .../rvv/fixed-vectors-vreductions-mask.ll | 386 +- 25 files changed, 7653 insertions(+), 19792 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll index 60561262d845d1..37d05f08d0ff3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define void @abs_v16i8(ptr %x) { ; CHECK-LABEL: abs_v16i8: @@ -87,43 +85,15 @@ define void @abs_v2i64(ptr %x) { declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) define void @abs_v32i8(ptr %x) { -; LMULMAX2-LABEL: abs_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-NEXT: vmax.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: abs_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle8.v v9, (a0) -; LMULMAX1-RV32-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vrsub.vi v10, v9, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vse8.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: abs_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle8.v v9, (a0) -; LMULMAX1-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vrsub.vi v10, v9, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vse8.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: abs_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 false) store <32 x i8> %b, ptr %x @@ -132,42 +102,14 @@ define void @abs_v32i8(ptr %x) { declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) define void @abs_v16i16(ptr %x) { -; LMULMAX2-LABEL: abs_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-NEXT: vmax.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: abs_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vrsub.vi v10, v9, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: abs_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vrsub.vi v10, v9, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: abs_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 false) store <16 x i16> %b, ptr %x @@ -176,42 +118,14 @@ define void @abs_v16i16(ptr %x) { declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) define void @abs_v8i32(ptr %x) { -; LMULMAX2-LABEL: abs_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-NEXT: vmax.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: abs_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vrsub.vi v10, v9, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: abs_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vrsub.vi v10, v9, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: abs_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 false) store <8 x i32> %b, ptr %x @@ -220,42 +134,14 @@ define void @abs_v8i32(ptr %x) { declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) define void @abs_v4i64(ptr %x) { -; LMULMAX2-LABEL: abs_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-NEXT: vmax.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: abs_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vrsub.vi v10, v9, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: abs_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vrsub.vi v10, v9, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: abs_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vrsub.vi v10, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 false) store <4 x i64> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index 1957829b1288ab..012f943b35d98e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -1,73 +1,40 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX1-RV64 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @bitreverse_v8i16(ptr %x, ptr %y) { -; RV32-LABEL: bitreverse_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a1, 1 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vand.vx v9, v9, a1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a1, 3 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vand.vx v9, v9, a1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a1, 5 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vand.vx v9, v9, a1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: vand.vx v9, v9, a1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a1, 3 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: vand.vx v9, v9, a1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a1, 5 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: vand.vx v9, v9, a1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vse16.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a1, 3 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addi a1, a1, 1365 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: bitreverse_v8i16: ; ZVBB: # %bb.0: @@ -85,83 +52,44 @@ define void @bitreverse_v8i16(ptr %x, ptr %y) { declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) define void @bitreverse_v4i32(ptr %x, ptr %y) { -; RV32-LABEL: bitreverse_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsrl.vi v9, v8, 8 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v9, v9, a1 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a1 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vand.vx v9, v9, a1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vand.vx v9, v9, a1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vand.vx v9, v9, a1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: bitreverse_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addi a1, a1, -256 -; RV64-NEXT: vand.vx v9, v9, a1 -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a1, 61681 -; RV64-NEXT: addi a1, a1, -241 -; RV64-NEXT: vand.vx v9, v9, a1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a1, 209715 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: vand.vx v9, v9, a1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a1, 349525 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: vand.vx v9, v9, a1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: bitreverse_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: addi a1, a1, -256 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vand.vx v10, v8, a1 +; CHECK-NEXT: vsll.vi v10, v10, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: lui a1, 61681 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: lui a1, 349525 +; CHECK-NEXT: addi a1, a1, 1365 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: bitreverse_v4i32: ; ZVBB: # %bb.0: @@ -328,171 +256,36 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) { declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) define void @bitreverse_v16i16(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: bitreverse_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: lui a1, 1 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: lui a1, 3 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 5 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bitreverse_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 1 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 3 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 5 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bitreverse_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: lui a2, 1 -; LMULMAX1-RV32-NEXT: addi a2, a2, -241 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: lui a3, 3 -; LMULMAX1-RV32-NEXT: addi a3, a3, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a4, 5 -; LMULMAX1-RV32-NEXT: addi a4, a4, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: bitreverse_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: lui a2, 1 -; LMULMAX1-RV64-NEXT: addi a2, a2, -241 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: lui a3, 3 -; LMULMAX1-RV64-NEXT: addi a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a4, 5 -; LMULMAX1-RV64-NEXT: addi a4, a4, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: bitreverse_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a1, 3 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addi a1, a1, 1365 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: bitreverse_v16i16: ; ZVBB: # %bb.0: @@ -510,215 +303,44 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) { declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>) define void @bitreverse_v8i32(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: bitreverse_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: lui a1, 16 -; LMULMAX2-RV32-NEXT: addi a1, a1, -256 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bitreverse_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: lui a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, -256 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bitreverse_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: lui a2, 16 -; LMULMAX1-RV32-NEXT: addi a2, a2, -256 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: lui a3, 61681 -; LMULMAX1-RV32-NEXT: addi a3, a3, -241 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: lui a4, 209715 -; LMULMAX1-RV32-NEXT: addi a4, a4, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a5, 349525 -; LMULMAX1-RV32-NEXT: addi a5, a5, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: bitreverse_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: lui a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, -256 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: lui a3, 61681 -; LMULMAX1-RV64-NEXT: addi a3, a3, -241 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: lui a4, 209715 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a5, 349525 -; LMULMAX1-RV64-NEXT: addi a5, a5, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: bitreverse_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: addi a1, a1, -256 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: vor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a1 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: lui a1, 61681 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a1, 349525 +; CHECK-NEXT: addi a1, a1, 1365 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: bitreverse_v8i32: ; ZVBB: # %bb.0: @@ -736,349 +358,138 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>) define void @bitreverse_v4i64(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: bitreverse_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -16 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: sw zero, 12(sp) -; LMULMAX2-RV32-NEXT: lui a1, 1044480 -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: li a2, 40 -; LMULMAX2-RV32-NEXT: vsrl.vx v12, v8, a2 -; LMULMAX2-RV32-NEXT: lui a3, 16 -; LMULMAX2-RV32-NEXT: addi a3, a3, -256 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: addi a4, sp, 8 -; LMULMAX2-RV32-NEXT: vlse64.v v14, (a4), zero -; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 -; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 -; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi sp, sp, 16 -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bitreverse_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: li a2, 40 -; LMULMAX2-RV64-NEXT: vsrl.vx v12, v8, a2 -; LMULMAX2-RV64-NEXT: lui a3, 16 -; LMULMAX2-RV64-NEXT: addiw a3, a3, -256 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a3 -; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: lui a4, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV64-NEXT: vsrl.vi v14, v8, 8 -; LMULMAX2-RV64-NEXT: li a5, 255 -; LMULMAX2-RV64-NEXT: slli a5, a5, 24 -; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a5 -; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12 -; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a5 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV64-NEXT: vand.vx v14, v8, a4 -; LMULMAX2-RV64-NEXT: vsll.vi v14, v14, 24 -; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12 -; LMULMAX2-RV64-NEXT: vsll.vx v14, v8, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX2-RV64-NEXT: vsll.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v14, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bitreverse_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -16 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: sw zero, 12(sp) -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: sw a2, 8(sp) -; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 -; LMULMAX1-RV32-NEXT: li a3, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v11, v9, a3 -; LMULMAX1-RV32-NEXT: lui a4, 16 -; LMULMAX1-RV32-NEXT: addi a4, a4, -256 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: addi a5, sp, 8 -; LMULMAX1-RV32-NEXT: vlse64.v v12, (a5), zero -; LMULMAX1-RV32-NEXT: lui a5, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vand.vv v11, v9, v12 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a5 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v9, a2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: lui a6, 61681 -; LMULMAX1-RV32-NEXT: addi a6, a6, -241 -; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a6 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: lui a6, 209715 -; LMULMAX1-RV32-NEXT: addi a6, a6, 819 -; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v13, a6 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: lui a6, 349525 -; LMULMAX1-RV32-NEXT: addi a6, a6, 1365 -; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v14, a6 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v14 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v14 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vsrl.vx v15, v8, a3 -; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v15, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v16, v16, v12 -; LMULMAX1-RV32-NEXT: vor.vv v15, v16, v15 -; LMULMAX1-RV32-NEXT: vor.vv v10, v15, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v15, v8, a2 -; LMULMAX1-RV32-NEXT: vand.vx v16, v8, a4 -; LMULMAX1-RV32-NEXT: vsll.vx v16, v16, a3 -; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v16 -; LMULMAX1-RV32-NEXT: vand.vx v16, v8, a5 -; LMULMAX1-RV32-NEXT: vsll.vi v16, v16, 24 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v16, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v14 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: addi sp, sp, 16 -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: bitreverse_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: lui a1, 1044480 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vlse64.v v14, (a4), zero +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v14 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: vand.vx v16, v8, a3 +; RV32-NEXT: vsll.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: bitreverse_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: li a2, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: li a3, 40 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, a3 -; LMULMAX1-RV64-NEXT: lui a4, 16 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -256 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: lui a5, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: li a6, 255 -; LMULMAX1-RV64-NEXT: slli a6, a6, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6 -; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a6 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v9, a5 -; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24 -; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: lui a7, 61681 -; LMULMAX1-RV64-NEXT: addiw a7, a7, -241 -; LMULMAX1-RV64-NEXT: slli t0, a7, 32 -; LMULMAX1-RV64-NEXT: add a7, a7, t0 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a7 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a7 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: lui t0, 209715 -; LMULMAX1-RV64-NEXT: addiw t0, t0, 819 -; LMULMAX1-RV64-NEXT: slli t1, t0, 32 -; LMULMAX1-RV64-NEXT: add t0, t0, t1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t0 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t0 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: lui t1, 349525 -; LMULMAX1-RV64-NEXT: addiw t1, t1, 1365 -; LMULMAX1-RV64-NEXT: slli t2, t1, 32 -; LMULMAX1-RV64-NEXT: add t1, t1, t2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t1 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t1 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, a3 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6 -; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a6 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v8, a5 -; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24 -; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a7 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a7 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t0 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t0 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: bitreverse_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vsrl.vx v10, v8, a1 +; RV64-NEXT: li a2, 40 +; RV64-NEXT: vsrl.vx v12, v8, a2 +; RV64-NEXT: lui a3, 16 +; RV64-NEXT: addiw a3, a3, -256 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: lui a4, 4080 +; RV64-NEXT: vand.vx v12, v12, a4 +; RV64-NEXT: vsrl.vi v14, v8, 8 +; RV64-NEXT: li a5, 255 +; RV64-NEXT: slli a5, a5, 24 +; RV64-NEXT: vand.vx v14, v14, a5 +; RV64-NEXT: vor.vv v12, v14, v12 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vand.vx v12, v8, a5 +; RV64-NEXT: vsll.vi v12, v12, 8 +; RV64-NEXT: vand.vx v14, v8, a4 +; RV64-NEXT: vsll.vi v14, v14, 24 +; RV64-NEXT: vor.vv v12, v14, v12 +; RV64-NEXT: vsll.vx v14, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vsll.vx v8, v8, a2 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: addiw a1, a1, -241 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret ; ; ZVBB-LABEL: bitreverse_v4i64: ; ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index 41f74255c31b06..d5338f9b6c6fc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB @@ -159,57 +157,15 @@ define void @bswap_v2i64(ptr %x, ptr %y) { declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) define void @bswap_v16i16(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: bswap_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bswap_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bswap_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: bswap_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: bswap_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVKB-LABEL: bswap_v16i16: ; ZVKB: # %bb.0: @@ -227,101 +183,23 @@ define void @bswap_v16i16(ptr %x, ptr %y) { declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>) define void @bswap_v8i32(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: bswap_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32-NEXT: lui a1, 16 -; LMULMAX2-RV32-NEXT: addi a1, a1, -256 -; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bswap_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64-NEXT: lui a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, -256 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bswap_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV32-NEXT: lui a2, 16 -; LMULMAX1-RV32-NEXT: addi a2, a2, -256 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: bswap_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-RV64-NEXT: lui a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, -256 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: bswap_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsrl.vi v10, v8, 8 +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: addi a1, a1, -256 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: vor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a1 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vsll.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVKB-LABEL: bswap_v8i32: ; ZVKB: # %bb.0: @@ -339,205 +217,81 @@ define void @bswap_v8i32(ptr %x, ptr %y) { declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) define void @bswap_v4i64(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: bswap_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -16 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: sw zero, 12(sp) -; LMULMAX2-RV32-NEXT: lui a1, 1044480 -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: li a2, 40 -; LMULMAX2-RV32-NEXT: vsrl.vx v12, v8, a2 -; LMULMAX2-RV32-NEXT: lui a3, 16 -; LMULMAX2-RV32-NEXT: addi a3, a3, -256 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: addi a4, sp, 8 -; LMULMAX2-RV32-NEXT: vlse64.v v14, (a4), zero -; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 -; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 -; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi sp, sp, 16 -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: bswap_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: li a2, 40 -; LMULMAX2-RV64-NEXT: vsrl.vx v12, v8, a2 -; LMULMAX2-RV64-NEXT: lui a3, 16 -; LMULMAX2-RV64-NEXT: addiw a3, a3, -256 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a3 -; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: lui a4, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV64-NEXT: vsrl.vi v14, v8, 8 -; LMULMAX2-RV64-NEXT: li a5, 255 -; LMULMAX2-RV64-NEXT: slli a5, a5, 24 -; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a5 -; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12 -; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a5 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV64-NEXT: vand.vx v14, v8, a4 -; LMULMAX2-RV64-NEXT: vsll.vi v14, v14, 24 -; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12 -; LMULMAX2-RV64-NEXT: vsll.vx v14, v8, a1 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX2-RV64-NEXT: vsll.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: vor.vv v8, v14, v8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: bswap_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -16 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: sw zero, 12(sp) -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: sw a2, 8(sp) -; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 -; LMULMAX1-RV32-NEXT: li a3, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v11, v9, a3 -; LMULMAX1-RV32-NEXT: lui a4, 16 -; LMULMAX1-RV32-NEXT: addi a4, a4, -256 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: addi a5, sp, 8 -; LMULMAX1-RV32-NEXT: vlse64.v v12, (a5), zero -; LMULMAX1-RV32-NEXT: lui a5, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vand.vv v11, v9, v12 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a5 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v9, a2 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a3 -; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a3 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a4 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v13 -; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a5 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v13, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: addi sp, sp, 16 -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: bswap_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: lui a1, 1044480 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vlse64.v v14, (a4), zero +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v14 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: vand.vx v16, v8, a3 +; RV32-NEXT: vsll.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: bswap_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a2, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV64-NEXT: li a3, 40 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, a3 -; LMULMAX1-RV64-NEXT: lui a4, 16 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -256 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: lui a5, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV64-NEXT: li a6, 255 -; LMULMAX1-RV64-NEXT: slli a6, a6, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6 -; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a6 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v8, a5 -; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24 -; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, a3 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6 -; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a6 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v9, a5 -; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24 -; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: bswap_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vsrl.vx v10, v8, a1 +; RV64-NEXT: li a2, 40 +; RV64-NEXT: vsrl.vx v12, v8, a2 +; RV64-NEXT: lui a3, 16 +; RV64-NEXT: addiw a3, a3, -256 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: lui a4, 4080 +; RV64-NEXT: vand.vx v12, v12, a4 +; RV64-NEXT: vsrl.vi v14, v8, 8 +; RV64-NEXT: li a5, 255 +; RV64-NEXT: slli a5, a5, 24 +; RV64-NEXT: vand.vx v14, v14, a5 +; RV64-NEXT: vor.vv v12, v14, v12 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vand.vx v12, v8, a5 +; RV64-NEXT: vsll.vi v12, v12, 8 +; RV64-NEXT: vand.vx v14, v8, a4 +; RV64-NEXT: vsll.vi v14, v14, 24 +; RV64-NEXT: vor.vv v12, v14, v12 +; RV64-NEXT: vsll.vx v14, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vsll.vx v8, v8, a2 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret ; ; ZVKB-LABEL: bswap_v4i64: ; ZVKB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll index 9ec15e5710f72d..63cd42e97ef6f1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define fastcc <4 x i8> @ret_v4i8(ptr %p) { ; CHECK-LABEL: ret_v4i8: @@ -33,19 +32,11 @@ define fastcc <8 x i32> @ret_v8i32(ptr %p) { } define fastcc <16 x i64> @ret_v16i64(ptr %p) { -; LMULMAX8-LABEL: ret_v16i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v16i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vle64.v v8, (a0) -; LMULMAX4-NEXT: addi a0, a0, 64 -; LMULMAX4-NEXT: vle64.v v12, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: ret_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: ret %v = load <16 x i64>, ptr %p ret <16 x i64> %v } @@ -73,86 +64,39 @@ define fastcc <32 x i1> @ret_mask_v32i1(ptr %p) { ; Return the vector via registers v8-v23 define fastcc <64 x i32> @ret_split_v64i32(ptr %x) { -; LMULMAX8-LABEL: ret_split_v64i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: addi a0, a0, 128 -; LMULMAX8-NEXT: vle32.v v16, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_split_v64i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v8, (a0) -; LMULMAX4-NEXT: addi a1, a0, 64 -; LMULMAX4-NEXT: vle32.v v12, (a1) -; LMULMAX4-NEXT: addi a1, a0, 128 -; LMULMAX4-NEXT: vle32.v v16, (a1) -; LMULMAX4-NEXT: addi a0, a0, 192 -; LMULMAX4-NEXT: vle32.v v20, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: ret_split_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x ret <64 x i32> %v } ; Return the vector fully via the stack define fastcc <128 x i32> @ret_split_v128i32(ptr %x) { -; LMULMAX8-LABEL: ret_split_v128i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi a2, a1, 128 -; LMULMAX8-NEXT: li a3, 32 -; LMULMAX8-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a2) -; LMULMAX8-NEXT: addi a2, a1, 256 -; LMULMAX8-NEXT: vle32.v v16, (a1) -; LMULMAX8-NEXT: addi a1, a1, 384 -; LMULMAX8-NEXT: vle32.v v24, (a1) -; LMULMAX8-NEXT: vle32.v v0, (a2) -; LMULMAX8-NEXT: vse32.v v16, (a0) -; LMULMAX8-NEXT: addi a1, a0, 384 -; LMULMAX8-NEXT: vse32.v v24, (a1) -; LMULMAX8-NEXT: addi a1, a0, 256 -; LMULMAX8-NEXT: vse32.v v0, (a1) -; LMULMAX8-NEXT: addi a0, a0, 128 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_split_v128i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi a2, a1, 64 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v8, (a2) -; LMULMAX4-NEXT: addi a2, a1, 128 -; LMULMAX4-NEXT: vle32.v v12, (a2) -; LMULMAX4-NEXT: addi a2, a1, 192 -; LMULMAX4-NEXT: vle32.v v16, (a2) -; LMULMAX4-NEXT: addi a2, a1, 256 -; LMULMAX4-NEXT: vle32.v v20, (a2) -; LMULMAX4-NEXT: addi a2, a1, 320 -; LMULMAX4-NEXT: vle32.v v24, (a2) -; LMULMAX4-NEXT: addi a2, a1, 384 -; LMULMAX4-NEXT: vle32.v v28, (a1) -; LMULMAX4-NEXT: addi a1, a1, 448 -; LMULMAX4-NEXT: vle32.v v0, (a1) -; LMULMAX4-NEXT: vle32.v v4, (a2) -; LMULMAX4-NEXT: vse32.v v28, (a0) -; LMULMAX4-NEXT: addi a1, a0, 448 -; LMULMAX4-NEXT: vse32.v v0, (a1) -; LMULMAX4-NEXT: addi a1, a0, 384 -; LMULMAX4-NEXT: vse32.v v4, (a1) -; LMULMAX4-NEXT: addi a1, a0, 320 -; LMULMAX4-NEXT: vse32.v v24, (a1) -; LMULMAX4-NEXT: addi a1, a0, 256 -; LMULMAX4-NEXT: vse32.v v20, (a1) -; LMULMAX4-NEXT: addi a1, a0, 192 -; LMULMAX4-NEXT: vse32.v v16, (a1) -; LMULMAX4-NEXT: addi a1, a0, 128 -; LMULMAX4-NEXT: vse32.v v12, (a1) -; LMULMAX4-NEXT: addi a0, a0, 64 -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: ret_split_v128i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a1, 128 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a1, 256 +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: addi a1, a1, 384 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v0, (a2) +; CHECK-NEXT: vse32.v v16, (a0) +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vse32.v v0, (a1) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %v = load <128 x i32>, ptr %x ret <128 x i32> %v } @@ -209,29 +153,15 @@ define fastcc <32 x i1> @ret_v32i1_param_v32i1_v32i1(<32 x i1> %v, <32 x i1> %w) } define fastcc <32 x i32> @ret_v32i32_param_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) { -; LMULMAX8-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a2, 32 -; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v24, (a0) -; LMULMAX8-NEXT: vadd.vv v8, v8, v16 -; LMULMAX8-NEXT: vadd.vv v8, v8, v24 -; LMULMAX8-NEXT: vadd.vx v8, v8, a1 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: addi a1, a0, 64 -; LMULMAX4-NEXT: vle32.v v24, (a1) -; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: vadd.vv v8, v8, v16 -; LMULMAX4-NEXT: vadd.vv v12, v12, v20 -; LMULMAX4-NEXT: vadd.vv v12, v12, v24 -; LMULMAX4-NEXT: vadd.vv v8, v8, v28 -; LMULMAX4-NEXT: vadd.vx v8, v8, a2 -; LMULMAX4-NEXT: vadd.vx v12, v12, a2 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vx v8, v8, a1 +; CHECK-NEXT: ret %r = add <32 x i32> %x, %y %s = add <32 x i32> %r, %z %head = insertelement <32 x i32> poison, i32 %w, i32 0 @@ -244,98 +174,50 @@ declare <32 x i32> @ext2(<32 x i32>, <32 x i32>, i32, i32) declare <32 x i32> @ext3(<32 x i32>, <32 x i32>, <32 x i32>, i32, i32) define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, i32 %w) { -; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -16 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: vmv8r.v v24, v8 -; LMULMAX8-NEXT: li a1, 2 -; LMULMAX8-NEXT: vmv8r.v v8, v16 -; LMULMAX8-NEXT: vmv8r.v v16, v24 -; LMULMAX8-NEXT: call ext2 -; LMULMAX8-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 16 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -16 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX4-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: vmv4r.v v24, v12 -; LMULMAX4-NEXT: vmv4r.v v28, v8 -; LMULMAX4-NEXT: li a1, 2 -; LMULMAX4-NEXT: vmv4r.v v8, v16 -; LMULMAX4-NEXT: vmv4r.v v12, v20 -; LMULMAX4-NEXT: vmv4r.v v16, v28 -; LMULMAX4-NEXT: vmv4r.v v20, v24 -; LMULMAX4-NEXT: call ext2 -; LMULMAX4-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 16 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: ret_v32i32_call_v32i32_v32i32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: call ext2 +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %t = call fastcc <32 x i32> @ext2(<32 x i32> %y, <32 x i32> %x, i32 %w, i32 2) ret <32 x i32> %t } define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) { -; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -256 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX8-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: .cfi_offset s0, -16 -; LMULMAX8-NEXT: addi s0, sp, 256 -; LMULMAX8-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX8-NEXT: andi sp, sp, -128 -; LMULMAX8-NEXT: li a2, 32 -; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v24, (a0) -; LMULMAX8-NEXT: mv a3, sp -; LMULMAX8-NEXT: mv a0, sp -; LMULMAX8-NEXT: li a2, 42 -; LMULMAX8-NEXT: vse32.v v8, (a3) -; LMULMAX8-NEXT: vmv.v.v v8, v24 -; LMULMAX8-NEXT: call ext3 -; LMULMAX8-NEXT: addi sp, s0, -256 -; LMULMAX8-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 256 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 -; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX4-NEXT: andi sp, sp, -128 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v24, (a0) -; LMULMAX4-NEXT: addi a0, a0, 64 -; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: addi a0, sp, 64 -; LMULMAX4-NEXT: vse32.v v12, (a0) -; LMULMAX4-NEXT: mv a1, sp -; LMULMAX4-NEXT: mv a0, sp -; LMULMAX4-NEXT: li a3, 42 -; LMULMAX4-NEXT: vse32.v v8, (a1) -; LMULMAX4-NEXT: vmv.v.v v8, v24 -; LMULMAX4-NEXT: vmv.v.v v12, v28 -; LMULMAX4-NEXT: call ext3 -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -256 +; CHECK-NEXT: .cfi_def_cfa_offset 256 +; CHECK-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: addi s0, sp, 256 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: andi sp, sp, -128 +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: mv a3, sp +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: vse32.v v8, (a3) +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: call ext3 +; CHECK-NEXT: addi sp, s0, -256 +; CHECK-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 256 +; CHECK-NEXT: ret %t = call fastcc <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42) ret <32 x i32> %t } @@ -344,127 +226,67 @@ define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x ; stack, but with fastcc can pass indirectly with the extra GPR registers ; allowed. define fastcc <32 x i32> @vector_arg_indirect_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %8) { -; LMULMAX8-LABEL: vector_arg_indirect_stack: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v16, (t2) -; LMULMAX8-NEXT: vadd.vv v8, v8, v16 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: vector_arg_indirect_stack: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi a0, t2, 64 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v16, (t2) -; LMULMAX4-NEXT: vle32.v v20, (a0) -; LMULMAX4-NEXT: vadd.vv v8, v8, v16 -; LMULMAX4-NEXT: vadd.vv v12, v12, v20 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: vector_arg_indirect_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v16, (t2) +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret %s = add <32 x i32> %x, %z ret <32 x i32> %s } ; Calling the function above. Ensure we pass the arguments correctly. define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z) { -; LMULMAX8-LABEL: pass_vector_arg_indirect_stack: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -256 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX8-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: .cfi_offset s0, -16 -; LMULMAX8-NEXT: addi s0, sp, 256 -; LMULMAX8-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX8-NEXT: andi sp, sp, -128 -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: mv a0, sp -; LMULMAX8-NEXT: li a1, 1 -; LMULMAX8-NEXT: li a2, 2 -; LMULMAX8-NEXT: li a3, 3 -; LMULMAX8-NEXT: li a4, 4 -; LMULMAX8-NEXT: li a5, 5 -; LMULMAX8-NEXT: li a6, 6 -; LMULMAX8-NEXT: li a7, 7 -; LMULMAX8-NEXT: mv t2, sp -; LMULMAX8-NEXT: li t3, 8 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: li a0, 0 -; LMULMAX8-NEXT: vmv.v.i v16, 0 -; LMULMAX8-NEXT: call vector_arg_indirect_stack -; LMULMAX8-NEXT: addi sp, s0, -256 -; LMULMAX8-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 256 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: pass_vector_arg_indirect_stack: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 -; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX4-NEXT: andi sp, sp, -128 -; LMULMAX4-NEXT: addi a0, sp, 64 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: mv a0, sp -; LMULMAX4-NEXT: li a1, 1 -; LMULMAX4-NEXT: li a2, 2 -; LMULMAX4-NEXT: li a3, 3 -; LMULMAX4-NEXT: li a4, 4 -; LMULMAX4-NEXT: li a5, 5 -; LMULMAX4-NEXT: li a6, 6 -; LMULMAX4-NEXT: li a7, 7 -; LMULMAX4-NEXT: mv t2, sp -; LMULMAX4-NEXT: li t4, 8 -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: li a0, 0 -; LMULMAX4-NEXT: vmv.v.i v12, 0 -; LMULMAX4-NEXT: vmv.v.i v16, 0 -; LMULMAX4-NEXT: vmv.v.i v20, 0 -; LMULMAX4-NEXT: call vector_arg_indirect_stack -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: pass_vector_arg_indirect_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -256 +; CHECK-NEXT: .cfi_def_cfa_offset 256 +; CHECK-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: addi s0, sp, 256 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: andi sp, sp, -128 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: li a3, 3 +; CHECK-NEXT: li a4, 4 +; CHECK-NEXT: li a5, 5 +; CHECK-NEXT: li a6, 6 +; CHECK-NEXT: li a7, 7 +; CHECK-NEXT: mv t2, sp +; CHECK-NEXT: li t3, 8 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: call vector_arg_indirect_stack +; CHECK-NEXT: addi sp, s0, -256 +; CHECK-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 256 +; CHECK-NEXT: ret %s = call fastcc <32 x i32> @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8) ret <32 x i32> %s } ; A pathological test case where even with fastcc we must use the stack for arguments %13 and %z define fastcc <32 x i32> @vector_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %last) { -; LMULMAX8-LABEL: vector_arg_direct_stack: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: addi a1, sp, 8 -; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v24, (a1) -; LMULMAX8-NEXT: vadd.vv v8, v8, v16 -; LMULMAX8-NEXT: vadd.vv v8, v8, v24 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: vector_arg_direct_stack: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: addi a0, sp, 8 -; LMULMAX4-NEXT: vle32.v v24, (a0) -; LMULMAX4-NEXT: addi a0, sp, 72 -; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: vadd.vv v12, v12, v20 -; LMULMAX4-NEXT: vadd.vv v8, v8, v16 -; LMULMAX4-NEXT: vadd.vv v8, v8, v24 -; LMULMAX4-NEXT: vadd.vv v12, v12, v28 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: vector_arg_direct_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: addi a1, sp, 8 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: ret %s = add <32 x i32> %x, %y %t = add <32 x i32> %s, %z ret <32 x i32> %t @@ -472,76 +294,39 @@ define fastcc <32 x i32> @vector_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3 ; Calling the function above. Ensure we pass the arguments correctly. define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z) { -; LMULMAX8-LABEL: pass_vector_arg_direct_stack: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -160 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 160 -; LMULMAX8-NEXT: sd ra, 152(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: addi a0, sp, 8 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: li a0, 1 -; LMULMAX8-NEXT: sd a0, 136(sp) -; LMULMAX8-NEXT: li a0, 13 -; LMULMAX8-NEXT: li a1, 1 -; LMULMAX8-NEXT: li a2, 2 -; LMULMAX8-NEXT: li a3, 3 -; LMULMAX8-NEXT: li a4, 4 -; LMULMAX8-NEXT: li a5, 5 -; LMULMAX8-NEXT: li a6, 6 -; LMULMAX8-NEXT: li a7, 7 -; LMULMAX8-NEXT: li t2, 8 -; LMULMAX8-NEXT: li t3, 9 -; LMULMAX8-NEXT: li t4, 10 -; LMULMAX8-NEXT: li t5, 11 -; LMULMAX8-NEXT: li t6, 12 -; LMULMAX8-NEXT: sd a0, 0(sp) -; LMULMAX8-NEXT: li a0, 0 -; LMULMAX8-NEXT: vmv.v.i v16, 0 -; LMULMAX8-NEXT: call vector_arg_direct_stack -; LMULMAX8-NEXT: ld ra, 152(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 160 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: pass_vector_arg_direct_stack: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -160 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 160 -; LMULMAX4-NEXT: sd ra, 152(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: li a0, 1 -; LMULMAX4-NEXT: sd a0, 136(sp) -; LMULMAX4-NEXT: li a0, 13 -; LMULMAX4-NEXT: sd a0, 0(sp) -; LMULMAX4-NEXT: addi a0, sp, 72 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: addi a0, sp, 8 -; LMULMAX4-NEXT: li a1, 1 -; LMULMAX4-NEXT: li a2, 2 -; LMULMAX4-NEXT: li a3, 3 -; LMULMAX4-NEXT: li a4, 4 -; LMULMAX4-NEXT: li a5, 5 -; LMULMAX4-NEXT: li a6, 6 -; LMULMAX4-NEXT: li a7, 7 -; LMULMAX4-NEXT: li t2, 8 -; LMULMAX4-NEXT: li t3, 9 -; LMULMAX4-NEXT: li t4, 10 -; LMULMAX4-NEXT: li t5, 11 -; LMULMAX4-NEXT: li t6, 12 -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: li a0, 0 -; LMULMAX4-NEXT: vmv.v.i v12, 0 -; LMULMAX4-NEXT: vmv.v.i v16, 0 -; LMULMAX4-NEXT: vmv.v.i v20, 0 -; LMULMAX4-NEXT: call vector_arg_direct_stack -; LMULMAX4-NEXT: ld ra, 152(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 160 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: pass_vector_arg_direct_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 160 +; CHECK-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: sd a0, 136(sp) +; CHECK-NEXT: li a0, 13 +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: li a3, 3 +; CHECK-NEXT: li a4, 4 +; CHECK-NEXT: li a5, 5 +; CHECK-NEXT: li a6, 6 +; CHECK-NEXT: li a7, 7 +; CHECK-NEXT: li t2, 8 +; CHECK-NEXT: li t3, 9 +; CHECK-NEXT: li t4, 10 +; CHECK-NEXT: li t5, 11 +; CHECK-NEXT: li t6, 12 +; CHECK-NEXT: sd a0, 0(sp) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: call vector_arg_direct_stack +; CHECK-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 160 +; CHECK-NEXT: ret %s = call fastcc <32 x i32> @vector_arg_direct_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 1) ret <32 x i32> %s } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll index 8e3a432b8ac834..3286c336a0fd11 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -1,8 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s define <4 x i8> @ret_v4i8(ptr %p) { ; CHECK-LABEL: ret_v4i8: @@ -25,81 +22,21 @@ define <4 x i32> @ret_v4i32(ptr %p) { } define <8 x i32> @ret_v8i32(ptr %p) { -; LMULMAX8-LABEL: ret_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v8i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vle32.v v8, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %p ret <8 x i32> %v } define <16 x i64> @ret_v16i64(ptr %p) { -; LMULMAX8-LABEL: ret_v16i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v16i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vle64.v v8, (a0) -; LMULMAX4-NEXT: addi a0, a0, 64 -; LMULMAX4-NEXT: vle64.v v12, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_v16i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: addi a1, a0, 32 -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: addi a1, a0, 64 -; LMULMAX2-NEXT: vle64.v v12, (a1) -; LMULMAX2-NEXT: addi a0, a0, 96 -; LMULMAX2-NEXT: vle64.v v14, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_v16i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vle64.v v8, (a0) -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle64.v v9, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle64.v v10, (a1) -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vle64.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: vle64.v v12, (a1) -; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: vle64.v v13, (a1) -; LMULMAX1-NEXT: addi a1, a0, 96 -; LMULMAX1-NEXT: vle64.v v14, (a1) -; LMULMAX1-NEXT: addi a0, a0, 112 -; LMULMAX1-NEXT: vle64.v v15, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: ret %v = load <16 x i64>, ptr %p ret <16 x i64> %v } @@ -115,374 +52,51 @@ define <8 x i1> @ret_mask_v8i1(ptr %p) { } define <32 x i1> @ret_mask_v32i1(ptr %p) { -; LMULMAX8-LABEL: ret_mask_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX8-NEXT: vlm.v v0, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_mask_v32i1: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: li a1, 32 -; LMULMAX4-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX4-NEXT: vlm.v v0, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_mask_v32i1: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v0, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_mask_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v0, (a0) -; LMULMAX1-NEXT: addi a0, a0, 2 -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_mask_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: ret %v = load <32 x i1>, ptr %p ret <32 x i1> %v } ; Return the vector via registers v8-v23 define <64 x i32> @ret_split_v64i32(ptr %x) { -; LMULMAX8-LABEL: ret_split_v64i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: addi a0, a0, 128 -; LMULMAX8-NEXT: vle32.v v16, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_split_v64i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v8, (a0) -; LMULMAX4-NEXT: addi a1, a0, 64 -; LMULMAX4-NEXT: vle32.v v12, (a1) -; LMULMAX4-NEXT: addi a1, a0, 128 -; LMULMAX4-NEXT: vle32.v v16, (a1) -; LMULMAX4-NEXT: addi a0, a0, 192 -; LMULMAX4-NEXT: vle32.v v20, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_split_v64i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: addi a1, a0, 32 -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: addi a1, a0, 64 -; LMULMAX2-NEXT: vle32.v v12, (a1) -; LMULMAX2-NEXT: addi a1, a0, 96 -; LMULMAX2-NEXT: vle32.v v14, (a1) -; LMULMAX2-NEXT: addi a1, a0, 128 -; LMULMAX2-NEXT: vle32.v v16, (a1) -; LMULMAX2-NEXT: addi a1, a0, 160 -; LMULMAX2-NEXT: vle32.v v18, (a1) -; LMULMAX2-NEXT: addi a1, a0, 192 -; LMULMAX2-NEXT: vle32.v v20, (a1) -; LMULMAX2-NEXT: addi a0, a0, 224 -; LMULMAX2-NEXT: vle32.v v22, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_split_v64i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v10, (a1) -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vle32.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: vle32.v v12, (a1) -; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: vle32.v v13, (a1) -; LMULMAX1-NEXT: addi a1, a0, 96 -; LMULMAX1-NEXT: vle32.v v14, (a1) -; LMULMAX1-NEXT: addi a1, a0, 112 -; LMULMAX1-NEXT: vle32.v v15, (a1) -; LMULMAX1-NEXT: addi a1, a0, 128 -; LMULMAX1-NEXT: vle32.v v16, (a1) -; LMULMAX1-NEXT: addi a1, a0, 144 -; LMULMAX1-NEXT: vle32.v v17, (a1) -; LMULMAX1-NEXT: addi a1, a0, 160 -; LMULMAX1-NEXT: vle32.v v18, (a1) -; LMULMAX1-NEXT: addi a1, a0, 176 -; LMULMAX1-NEXT: vle32.v v19, (a1) -; LMULMAX1-NEXT: addi a1, a0, 192 -; LMULMAX1-NEXT: vle32.v v20, (a1) -; LMULMAX1-NEXT: addi a1, a0, 208 -; LMULMAX1-NEXT: vle32.v v21, (a1) -; LMULMAX1-NEXT: addi a1, a0, 224 -; LMULMAX1-NEXT: vle32.v v22, (a1) -; LMULMAX1-NEXT: addi a0, a0, 240 -; LMULMAX1-NEXT: vle32.v v23, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_split_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x ret <64 x i32> %v } ; Return the vector fully via the stack define <128 x i32> @ret_split_v128i32(ptr %x) { -; LMULMAX8-LABEL: ret_split_v128i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi a2, a1, 128 -; LMULMAX8-NEXT: li a3, 32 -; LMULMAX8-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a2) -; LMULMAX8-NEXT: addi a2, a1, 256 -; LMULMAX8-NEXT: vle32.v v16, (a1) -; LMULMAX8-NEXT: addi a1, a1, 384 -; LMULMAX8-NEXT: vle32.v v24, (a1) -; LMULMAX8-NEXT: vle32.v v0, (a2) -; LMULMAX8-NEXT: vse32.v v16, (a0) -; LMULMAX8-NEXT: addi a1, a0, 384 -; LMULMAX8-NEXT: vse32.v v24, (a1) -; LMULMAX8-NEXT: addi a1, a0, 256 -; LMULMAX8-NEXT: vse32.v v0, (a1) -; LMULMAX8-NEXT: addi a0, a0, 128 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_split_v128i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi a2, a1, 64 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v8, (a2) -; LMULMAX4-NEXT: addi a2, a1, 128 -; LMULMAX4-NEXT: vle32.v v12, (a2) -; LMULMAX4-NEXT: addi a2, a1, 192 -; LMULMAX4-NEXT: vle32.v v16, (a2) -; LMULMAX4-NEXT: addi a2, a1, 256 -; LMULMAX4-NEXT: vle32.v v20, (a2) -; LMULMAX4-NEXT: addi a2, a1, 320 -; LMULMAX4-NEXT: vle32.v v24, (a2) -; LMULMAX4-NEXT: addi a2, a1, 384 -; LMULMAX4-NEXT: vle32.v v28, (a1) -; LMULMAX4-NEXT: addi a1, a1, 448 -; LMULMAX4-NEXT: vle32.v v0, (a1) -; LMULMAX4-NEXT: vle32.v v4, (a2) -; LMULMAX4-NEXT: vse32.v v28, (a0) -; LMULMAX4-NEXT: addi a1, a0, 448 -; LMULMAX4-NEXT: vse32.v v0, (a1) -; LMULMAX4-NEXT: addi a1, a0, 384 -; LMULMAX4-NEXT: vse32.v v4, (a1) -; LMULMAX4-NEXT: addi a1, a0, 320 -; LMULMAX4-NEXT: vse32.v v24, (a1) -; LMULMAX4-NEXT: addi a1, a0, 256 -; LMULMAX4-NEXT: vse32.v v20, (a1) -; LMULMAX4-NEXT: addi a1, a0, 192 -; LMULMAX4-NEXT: vse32.v v16, (a1) -; LMULMAX4-NEXT: addi a1, a0, 128 -; LMULMAX4-NEXT: vse32.v v12, (a1) -; LMULMAX4-NEXT: addi a0, a0, 64 -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_split_v128i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a2, a1, 32 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a2) -; LMULMAX2-NEXT: addi a2, a1, 64 -; LMULMAX2-NEXT: vle32.v v10, (a2) -; LMULMAX2-NEXT: addi a2, a1, 96 -; LMULMAX2-NEXT: vle32.v v12, (a2) -; LMULMAX2-NEXT: addi a2, a1, 128 -; LMULMAX2-NEXT: vle32.v v14, (a2) -; LMULMAX2-NEXT: addi a2, a1, 160 -; LMULMAX2-NEXT: vle32.v v16, (a2) -; LMULMAX2-NEXT: addi a2, a1, 192 -; LMULMAX2-NEXT: vle32.v v18, (a2) -; LMULMAX2-NEXT: addi a2, a1, 224 -; LMULMAX2-NEXT: vle32.v v20, (a2) -; LMULMAX2-NEXT: addi a2, a1, 256 -; LMULMAX2-NEXT: vle32.v v22, (a2) -; LMULMAX2-NEXT: addi a2, a1, 288 -; LMULMAX2-NEXT: vle32.v v24, (a2) -; LMULMAX2-NEXT: addi a2, a1, 320 -; LMULMAX2-NEXT: vle32.v v26, (a2) -; LMULMAX2-NEXT: addi a2, a1, 352 -; LMULMAX2-NEXT: vle32.v v28, (a2) -; LMULMAX2-NEXT: addi a2, a1, 384 -; LMULMAX2-NEXT: vle32.v v30, (a2) -; LMULMAX2-NEXT: addi a2, a1, 416 -; LMULMAX2-NEXT: vle32.v v0, (a2) -; LMULMAX2-NEXT: addi a2, a1, 448 -; LMULMAX2-NEXT: vle32.v v2, (a1) -; LMULMAX2-NEXT: addi a1, a1, 480 -; LMULMAX2-NEXT: vle32.v v4, (a1) -; LMULMAX2-NEXT: vle32.v v6, (a2) -; LMULMAX2-NEXT: vse32.v v2, (a0) -; LMULMAX2-NEXT: addi a1, a0, 480 -; LMULMAX2-NEXT: vse32.v v4, (a1) -; LMULMAX2-NEXT: addi a1, a0, 448 -; LMULMAX2-NEXT: vse32.v v6, (a1) -; LMULMAX2-NEXT: addi a1, a0, 416 -; LMULMAX2-NEXT: vse32.v v0, (a1) -; LMULMAX2-NEXT: addi a1, a0, 384 -; LMULMAX2-NEXT: vse32.v v30, (a1) -; LMULMAX2-NEXT: addi a1, a0, 352 -; LMULMAX2-NEXT: vse32.v v28, (a1) -; LMULMAX2-NEXT: addi a1, a0, 320 -; LMULMAX2-NEXT: vse32.v v26, (a1) -; LMULMAX2-NEXT: addi a1, a0, 288 -; LMULMAX2-NEXT: vse32.v v24, (a1) -; LMULMAX2-NEXT: addi a1, a0, 256 -; LMULMAX2-NEXT: vse32.v v22, (a1) -; LMULMAX2-NEXT: addi a1, a0, 224 -; LMULMAX2-NEXT: vse32.v v20, (a1) -; LMULMAX2-NEXT: addi a1, a0, 192 -; LMULMAX2-NEXT: vse32.v v18, (a1) -; LMULMAX2-NEXT: addi a1, a0, 160 -; LMULMAX2-NEXT: vse32.v v16, (a1) -; LMULMAX2-NEXT: addi a1, a0, 128 -; LMULMAX2-NEXT: vse32.v v14, (a1) -; LMULMAX2-NEXT: addi a1, a0, 96 -; LMULMAX2-NEXT: vse32.v v12, (a1) -; LMULMAX2-NEXT: addi a1, a0, 64 -; LMULMAX2-NEXT: vse32.v v10, (a1) -; LMULMAX2-NEXT: addi a0, a0, 32 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_split_v128i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a2, a1, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a2) -; LMULMAX1-NEXT: addi a2, a1, 32 -; LMULMAX1-NEXT: vle32.v v9, (a2) -; LMULMAX1-NEXT: addi a2, a1, 48 -; LMULMAX1-NEXT: vle32.v v10, (a2) -; LMULMAX1-NEXT: addi a2, a1, 64 -; LMULMAX1-NEXT: vle32.v v11, (a2) -; LMULMAX1-NEXT: addi a2, a1, 80 -; LMULMAX1-NEXT: vle32.v v12, (a2) -; LMULMAX1-NEXT: addi a2, a1, 96 -; LMULMAX1-NEXT: vle32.v v13, (a2) -; LMULMAX1-NEXT: addi a2, a1, 112 -; LMULMAX1-NEXT: vle32.v v14, (a2) -; LMULMAX1-NEXT: addi a2, a1, 128 -; LMULMAX1-NEXT: vle32.v v15, (a2) -; LMULMAX1-NEXT: addi a2, a1, 144 -; LMULMAX1-NEXT: vle32.v v16, (a2) -; LMULMAX1-NEXT: addi a2, a1, 160 -; LMULMAX1-NEXT: vle32.v v17, (a2) -; LMULMAX1-NEXT: addi a2, a1, 176 -; LMULMAX1-NEXT: vle32.v v18, (a2) -; LMULMAX1-NEXT: addi a2, a1, 192 -; LMULMAX1-NEXT: vle32.v v19, (a2) -; LMULMAX1-NEXT: addi a2, a1, 208 -; LMULMAX1-NEXT: vle32.v v20, (a2) -; LMULMAX1-NEXT: addi a2, a1, 224 -; LMULMAX1-NEXT: vle32.v v21, (a2) -; LMULMAX1-NEXT: addi a2, a1, 240 -; LMULMAX1-NEXT: vle32.v v22, (a2) -; LMULMAX1-NEXT: addi a2, a1, 256 -; LMULMAX1-NEXT: vle32.v v23, (a2) -; LMULMAX1-NEXT: addi a2, a1, 272 -; LMULMAX1-NEXT: vle32.v v24, (a2) -; LMULMAX1-NEXT: addi a2, a1, 288 -; LMULMAX1-NEXT: vle32.v v25, (a2) -; LMULMAX1-NEXT: addi a2, a1, 304 -; LMULMAX1-NEXT: vle32.v v26, (a2) -; LMULMAX1-NEXT: addi a2, a1, 320 -; LMULMAX1-NEXT: vle32.v v27, (a2) -; LMULMAX1-NEXT: addi a2, a1, 336 -; LMULMAX1-NEXT: vle32.v v28, (a2) -; LMULMAX1-NEXT: addi a2, a1, 352 -; LMULMAX1-NEXT: vle32.v v29, (a2) -; LMULMAX1-NEXT: addi a2, a1, 368 -; LMULMAX1-NEXT: vle32.v v30, (a2) -; LMULMAX1-NEXT: addi a2, a1, 384 -; LMULMAX1-NEXT: vle32.v v31, (a2) -; LMULMAX1-NEXT: addi a2, a1, 400 -; LMULMAX1-NEXT: vle32.v v0, (a2) -; LMULMAX1-NEXT: addi a2, a1, 416 -; LMULMAX1-NEXT: vle32.v v1, (a2) -; LMULMAX1-NEXT: addi a2, a1, 432 -; LMULMAX1-NEXT: vle32.v v2, (a2) -; LMULMAX1-NEXT: addi a2, a1, 448 -; LMULMAX1-NEXT: vle32.v v3, (a2) -; LMULMAX1-NEXT: addi a2, a1, 464 -; LMULMAX1-NEXT: vle32.v v4, (a2) -; LMULMAX1-NEXT: addi a2, a1, 480 -; LMULMAX1-NEXT: vle32.v v5, (a1) -; LMULMAX1-NEXT: addi a1, a1, 496 -; LMULMAX1-NEXT: vle32.v v6, (a1) -; LMULMAX1-NEXT: vle32.v v7, (a2) -; LMULMAX1-NEXT: vse32.v v5, (a0) -; LMULMAX1-NEXT: addi a1, a0, 496 -; LMULMAX1-NEXT: vse32.v v6, (a1) -; LMULMAX1-NEXT: addi a1, a0, 480 -; LMULMAX1-NEXT: vse32.v v7, (a1) -; LMULMAX1-NEXT: addi a1, a0, 464 -; LMULMAX1-NEXT: vse32.v v4, (a1) -; LMULMAX1-NEXT: addi a1, a0, 448 -; LMULMAX1-NEXT: vse32.v v3, (a1) -; LMULMAX1-NEXT: addi a1, a0, 432 -; LMULMAX1-NEXT: vse32.v v2, (a1) -; LMULMAX1-NEXT: addi a1, a0, 416 -; LMULMAX1-NEXT: vse32.v v1, (a1) -; LMULMAX1-NEXT: addi a1, a0, 400 -; LMULMAX1-NEXT: vse32.v v0, (a1) -; LMULMAX1-NEXT: addi a1, a0, 384 -; LMULMAX1-NEXT: vse32.v v31, (a1) -; LMULMAX1-NEXT: addi a1, a0, 368 -; LMULMAX1-NEXT: vse32.v v30, (a1) -; LMULMAX1-NEXT: addi a1, a0, 352 -; LMULMAX1-NEXT: vse32.v v29, (a1) -; LMULMAX1-NEXT: addi a1, a0, 336 -; LMULMAX1-NEXT: vse32.v v28, (a1) -; LMULMAX1-NEXT: addi a1, a0, 320 -; LMULMAX1-NEXT: vse32.v v27, (a1) -; LMULMAX1-NEXT: addi a1, a0, 304 -; LMULMAX1-NEXT: vse32.v v26, (a1) -; LMULMAX1-NEXT: addi a1, a0, 288 -; LMULMAX1-NEXT: vse32.v v25, (a1) -; LMULMAX1-NEXT: addi a1, a0, 272 -; LMULMAX1-NEXT: vse32.v v24, (a1) -; LMULMAX1-NEXT: addi a1, a0, 256 -; LMULMAX1-NEXT: vse32.v v23, (a1) -; LMULMAX1-NEXT: addi a1, a0, 240 -; LMULMAX1-NEXT: vse32.v v22, (a1) -; LMULMAX1-NEXT: addi a1, a0, 224 -; LMULMAX1-NEXT: vse32.v v21, (a1) -; LMULMAX1-NEXT: addi a1, a0, 208 -; LMULMAX1-NEXT: vse32.v v20, (a1) -; LMULMAX1-NEXT: addi a1, a0, 192 -; LMULMAX1-NEXT: vse32.v v19, (a1) -; LMULMAX1-NEXT: addi a1, a0, 176 -; LMULMAX1-NEXT: vse32.v v18, (a1) -; LMULMAX1-NEXT: addi a1, a0, 160 -; LMULMAX1-NEXT: vse32.v v17, (a1) -; LMULMAX1-NEXT: addi a1, a0, 144 -; LMULMAX1-NEXT: vse32.v v16, (a1) -; LMULMAX1-NEXT: addi a1, a0, 128 -; LMULMAX1-NEXT: vse32.v v15, (a1) -; LMULMAX1-NEXT: addi a1, a0, 112 -; LMULMAX1-NEXT: vse32.v v14, (a1) -; LMULMAX1-NEXT: addi a1, a0, 96 -; LMULMAX1-NEXT: vse32.v v13, (a1) -; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: vse32.v v12, (a1) -; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: vse32.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vse32.v v10, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_split_v128i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a1, 128 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a1, 256 +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: addi a1, a1, 384 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v0, (a2) +; CHECK-NEXT: vse32.v v16, (a0) +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vse32.v v0, (a1) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %v = load <128 x i32>, ptr %x ret <128 x i32> %v } @@ -508,30 +122,11 @@ define <4 x i8> @ret_v4i8_param_v4i8_v4i8(<4 x i8> %v, <4 x i8> %w) { } define <4 x i64> @ret_v4i64_param_v4i64_v4i64(<4 x i64> %v, <4 x i64> %w) { -; LMULMAX8-LABEL: ret_v4i64_param_v4i64_v4i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v4i64_param_v4i64_v4i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX4-NEXT: vadd.vv v8, v8, v10 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_v4i64_param_v4i64_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_v4i64_param_v4i64_v4i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: vadd.vv v9, v9, v11 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_v4i64_param_v4i64_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %r = add <4 x i64> %v, %w ret <4 x i64> %r } @@ -547,130 +142,26 @@ define <8 x i1> @ret_v8i1_param_v8i1_v8i1(<8 x i1> %v, <8 x i1> %w) { } define <32 x i1> @ret_v32i1_param_v32i1_v32i1(<32 x i1> %v, <32 x i1> %w) { -; LMULMAX8-LABEL: ret_v32i1_param_v32i1_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vmand.mm v0, v0, v8 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v32i1_param_v32i1_v32i1: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: li a0, 32 -; LMULMAX4-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX4-NEXT: vmand.mm v0, v0, v8 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_v32i1_param_v32i1_v32i1: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a0, 32 -; LMULMAX2-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX2-NEXT: vmand.mm v0, v0, v8 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_v32i1_param_v32i1_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmand.mm v0, v0, v9 -; LMULMAX1-NEXT: vmand.mm v8, v8, v10 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_v32i1_param_v32i1_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret %r = and <32 x i1> %v, %w ret <32 x i1> %r } define <32 x i32> @ret_v32i32_param_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) { -; LMULMAX8-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a2, 32 -; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v24, (a0) -; LMULMAX8-NEXT: vadd.vv v8, v8, v16 -; LMULMAX8-NEXT: vadd.vv v8, v8, v24 -; LMULMAX8-NEXT: vadd.vx v8, v8, a1 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: addi a1, a0, 64 -; LMULMAX4-NEXT: vle32.v v24, (a1) -; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: vadd.vv v8, v8, v16 -; LMULMAX4-NEXT: vadd.vv v12, v12, v20 -; LMULMAX4-NEXT: vadd.vv v12, v12, v24 -; LMULMAX4-NEXT: vadd.vv v8, v8, v28 -; LMULMAX4-NEXT: vadd.vx v8, v8, a2 -; LMULMAX4-NEXT: vadd.vx v12, v12, a2 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v24, (a0) -; LMULMAX2-NEXT: addi a1, a0, 32 -; LMULMAX2-NEXT: vle32.v v26, (a1) -; LMULMAX2-NEXT: addi a1, a0, 64 -; LMULMAX2-NEXT: vle32.v v28, (a1) -; LMULMAX2-NEXT: addi a0, a0, 96 -; LMULMAX2-NEXT: vle32.v v30, (a0) -; LMULMAX2-NEXT: vadd.vv v8, v8, v16 -; LMULMAX2-NEXT: vadd.vv v10, v10, v18 -; LMULMAX2-NEXT: vadd.vv v12, v12, v20 -; LMULMAX2-NEXT: vadd.vv v14, v14, v22 -; LMULMAX2-NEXT: vadd.vv v14, v14, v30 -; LMULMAX2-NEXT: vadd.vv v12, v12, v28 -; LMULMAX2-NEXT: vadd.vv v10, v10, v26 -; LMULMAX2-NEXT: vadd.vv v8, v8, v24 -; LMULMAX2-NEXT: vadd.vx v8, v8, a4 -; LMULMAX2-NEXT: vadd.vx v10, v10, a4 -; LMULMAX2-NEXT: vadd.vx v12, v12, a4 -; LMULMAX2-NEXT: vadd.vx v14, v14, a4 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v24, (a0) -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle32.v v25, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v26, (a1) -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vle32.v v27, (a1) -; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: vle32.v v28, (a1) -; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: vle32.v v29, (a1) -; LMULMAX1-NEXT: addi a1, a0, 96 -; LMULMAX1-NEXT: vle32.v v30, (a1) -; LMULMAX1-NEXT: addi a0, a0, 112 -; LMULMAX1-NEXT: vle32.v v31, (a0) -; LMULMAX1-NEXT: lw a0, 0(sp) -; LMULMAX1-NEXT: vadd.vv v8, v8, v16 -; LMULMAX1-NEXT: vadd.vv v9, v9, v17 -; LMULMAX1-NEXT: vadd.vv v10, v10, v18 -; LMULMAX1-NEXT: vadd.vv v11, v11, v19 -; LMULMAX1-NEXT: vadd.vv v12, v12, v20 -; LMULMAX1-NEXT: vadd.vv v13, v13, v21 -; LMULMAX1-NEXT: vadd.vv v14, v14, v22 -; LMULMAX1-NEXT: vadd.vv v15, v15, v23 -; LMULMAX1-NEXT: vadd.vv v15, v15, v31 -; LMULMAX1-NEXT: vadd.vv v14, v14, v30 -; LMULMAX1-NEXT: vadd.vv v13, v13, v29 -; LMULMAX1-NEXT: vadd.vv v12, v12, v28 -; LMULMAX1-NEXT: vadd.vv v11, v11, v27 -; LMULMAX1-NEXT: vadd.vv v10, v10, v26 -; LMULMAX1-NEXT: vadd.vv v9, v9, v25 -; LMULMAX1-NEXT: vadd.vv v8, v8, v24 -; LMULMAX1-NEXT: vadd.vx v8, v8, a0 -; LMULMAX1-NEXT: vadd.vx v9, v9, a0 -; LMULMAX1-NEXT: vadd.vx v10, v10, a0 -; LMULMAX1-NEXT: vadd.vx v11, v11, a0 -; LMULMAX1-NEXT: vadd.vx v12, v12, a0 -; LMULMAX1-NEXT: vadd.vx v13, v13, a0 -; LMULMAX1-NEXT: vadd.vx v14, v14, a0 -; LMULMAX1-NEXT: vadd.vx v15, v15, a0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vx v8, v8, a1 +; CHECK-NEXT: ret %r = add <32 x i32> %x, %y %s = add <32 x i32> %r, %z %head = insertelement <32 x i32> poison, i32 %w, i32 0 @@ -683,499 +174,105 @@ declare <32 x i32> @ext2(<32 x i32>, <32 x i32>, i32, i32) declare <32 x i32> @ext3(<32 x i32>, <32 x i32>, <32 x i32>, i32, i32) define <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, i32 %w) { -; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -16 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: vmv8r.v v24, v8 -; LMULMAX8-NEXT: li a1, 2 -; LMULMAX8-NEXT: vmv8r.v v8, v16 -; LMULMAX8-NEXT: vmv8r.v v16, v24 -; LMULMAX8-NEXT: call ext2 -; LMULMAX8-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 16 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -16 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX4-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: vmv4r.v v24, v12 -; LMULMAX4-NEXT: vmv4r.v v28, v8 -; LMULMAX4-NEXT: li a1, 2 -; LMULMAX4-NEXT: vmv4r.v v8, v16 -; LMULMAX4-NEXT: vmv4r.v v12, v20 -; LMULMAX4-NEXT: vmv4r.v v16, v28 -; LMULMAX4-NEXT: vmv4r.v v20, v24 -; LMULMAX4-NEXT: call ext2 -; LMULMAX4-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 16 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -16 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX2-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: .cfi_offset ra, -8 -; LMULMAX2-NEXT: vmv2r.v v24, v14 -; LMULMAX2-NEXT: vmv2r.v v26, v12 -; LMULMAX2-NEXT: vmv2r.v v28, v10 -; LMULMAX2-NEXT: vmv2r.v v30, v8 -; LMULMAX2-NEXT: li a1, 2 -; LMULMAX2-NEXT: vmv2r.v v8, v16 -; LMULMAX2-NEXT: vmv2r.v v10, v18 -; LMULMAX2-NEXT: vmv2r.v v12, v20 -; LMULMAX2-NEXT: vmv2r.v v14, v22 -; LMULMAX2-NEXT: vmv2r.v v16, v30 -; LMULMAX2-NEXT: vmv2r.v v18, v28 -; LMULMAX2-NEXT: vmv2r.v v20, v26 -; LMULMAX2-NEXT: vmv2r.v v22, v24 -; LMULMAX2-NEXT: call ext2 -; LMULMAX2-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 16 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -16 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: vmv1r.v v24, v15 -; LMULMAX1-NEXT: vmv1r.v v25, v14 -; LMULMAX1-NEXT: vmv1r.v v26, v13 -; LMULMAX1-NEXT: vmv1r.v v27, v12 -; LMULMAX1-NEXT: vmv1r.v v28, v11 -; LMULMAX1-NEXT: vmv1r.v v29, v10 -; LMULMAX1-NEXT: vmv1r.v v30, v9 -; LMULMAX1-NEXT: vmv1r.v v31, v8 -; LMULMAX1-NEXT: li a1, 2 -; LMULMAX1-NEXT: vmv1r.v v8, v16 -; LMULMAX1-NEXT: vmv1r.v v9, v17 -; LMULMAX1-NEXT: vmv1r.v v10, v18 -; LMULMAX1-NEXT: vmv1r.v v11, v19 -; LMULMAX1-NEXT: vmv1r.v v12, v20 -; LMULMAX1-NEXT: vmv1r.v v13, v21 -; LMULMAX1-NEXT: vmv1r.v v14, v22 -; LMULMAX1-NEXT: vmv1r.v v15, v23 -; LMULMAX1-NEXT: vmv1r.v v16, v31 -; LMULMAX1-NEXT: vmv1r.v v17, v30 -; LMULMAX1-NEXT: vmv1r.v v18, v29 -; LMULMAX1-NEXT: vmv1r.v v19, v28 -; LMULMAX1-NEXT: vmv1r.v v20, v27 -; LMULMAX1-NEXT: vmv1r.v v21, v26 -; LMULMAX1-NEXT: vmv1r.v v22, v25 -; LMULMAX1-NEXT: vmv1r.v v23, v24 -; LMULMAX1-NEXT: call ext2 -; LMULMAX1-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 16 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_v32i32_call_v32i32_v32i32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: call ext2 +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %t = call <32 x i32> @ext2(<32 x i32> %y, <32 x i32> %x, i32 %w, i32 2) ret <32 x i32> %t } define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) { -; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -256 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX8-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: .cfi_offset s0, -16 -; LMULMAX8-NEXT: addi s0, sp, 256 -; LMULMAX8-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX8-NEXT: andi sp, sp, -128 -; LMULMAX8-NEXT: li a2, 32 -; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v24, (a0) -; LMULMAX8-NEXT: mv a3, sp -; LMULMAX8-NEXT: mv a0, sp -; LMULMAX8-NEXT: li a2, 42 -; LMULMAX8-NEXT: vse32.v v8, (a3) -; LMULMAX8-NEXT: vmv.v.v v8, v24 -; LMULMAX8-NEXT: call ext3 -; LMULMAX8-NEXT: addi sp, s0, -256 -; LMULMAX8-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 256 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 -; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX4-NEXT: andi sp, sp, -128 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v24, (a0) -; LMULMAX4-NEXT: addi a0, a0, 64 -; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: addi a0, sp, 64 -; LMULMAX4-NEXT: vse32.v v12, (a0) -; LMULMAX4-NEXT: mv a1, sp -; LMULMAX4-NEXT: mv a0, sp -; LMULMAX4-NEXT: li a3, 42 -; LMULMAX4-NEXT: vse32.v v8, (a1) -; LMULMAX4-NEXT: vmv.v.v v8, v24 -; LMULMAX4-NEXT: vmv.v.v v12, v28 -; LMULMAX4-NEXT: call ext3 -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -256 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: .cfi_offset ra, -8 -; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 256 -; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX2-NEXT: andi sp, sp, -128 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v24, (a0) -; LMULMAX2-NEXT: addi a1, a0, 32 -; LMULMAX2-NEXT: vle32.v v26, (a1) -; LMULMAX2-NEXT: addi a1, a0, 64 -; LMULMAX2-NEXT: vle32.v v28, (a1) -; LMULMAX2-NEXT: addi a0, a0, 96 -; LMULMAX2-NEXT: vle32.v v30, (a0) -; LMULMAX2-NEXT: addi a0, sp, 96 -; LMULMAX2-NEXT: vse32.v v14, (a0) -; LMULMAX2-NEXT: addi a0, sp, 64 -; LMULMAX2-NEXT: vse32.v v12, (a0) -; LMULMAX2-NEXT: addi a0, sp, 32 -; LMULMAX2-NEXT: vse32.v v10, (a0) -; LMULMAX2-NEXT: mv a1, sp -; LMULMAX2-NEXT: mv a0, sp -; LMULMAX2-NEXT: li a5, 42 -; LMULMAX2-NEXT: vse32.v v8, (a1) -; LMULMAX2-NEXT: vmv.v.v v8, v24 -; LMULMAX2-NEXT: vmv.v.v v10, v26 -; LMULMAX2-NEXT: vmv.v.v v12, v28 -; LMULMAX2-NEXT: vmv.v.v v14, v30 -; LMULMAX2-NEXT: call ext3 -; LMULMAX2-NEXT: addi sp, s0, -256 -; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 256 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -256 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s1, 232(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: .cfi_offset s0, -16 -; LMULMAX1-NEXT: .cfi_offset s1, -24 -; LMULMAX1-NEXT: addi s0, sp, 256 -; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX1-NEXT: andi sp, sp, -128 -; LMULMAX1-NEXT: mv s1, sp -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v24, (a0) -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle32.v v25, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v26, (a1) -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vle32.v v27, (a1) -; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: vle32.v v28, (a1) -; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: vle32.v v29, (a1) -; LMULMAX1-NEXT: addi a1, a0, 96 -; LMULMAX1-NEXT: vle32.v v30, (a1) -; LMULMAX1-NEXT: addi a0, a0, 112 -; LMULMAX1-NEXT: vle32.v v31, (a0) -; LMULMAX1-NEXT: ld a1, 0(s0) -; LMULMAX1-NEXT: addi sp, sp, -16 -; LMULMAX1-NEXT: addi a0, s1, 112 -; LMULMAX1-NEXT: vse32.v v15, (a0) -; LMULMAX1-NEXT: addi a0, s1, 96 -; LMULMAX1-NEXT: vse32.v v14, (a0) -; LMULMAX1-NEXT: addi a0, s1, 80 -; LMULMAX1-NEXT: vse32.v v13, (a0) -; LMULMAX1-NEXT: addi a0, s1, 64 -; LMULMAX1-NEXT: vse32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, s1, 48 -; LMULMAX1-NEXT: vse32.v v11, (a0) -; LMULMAX1-NEXT: addi a0, s1, 32 -; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, s1, 16 -; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: mv a0, s1 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: li a0, 42 -; LMULMAX1-NEXT: sd a0, 8(sp) -; LMULMAX1-NEXT: mv a0, s1 -; LMULMAX1-NEXT: sd a1, 0(sp) -; LMULMAX1-NEXT: vmv.v.v v8, v24 -; LMULMAX1-NEXT: vmv.v.v v9, v25 -; LMULMAX1-NEXT: vmv.v.v v10, v26 -; LMULMAX1-NEXT: vmv.v.v v11, v27 -; LMULMAX1-NEXT: vmv.v.v v12, v28 -; LMULMAX1-NEXT: vmv.v.v v13, v29 -; LMULMAX1-NEXT: vmv.v.v v14, v30 -; LMULMAX1-NEXT: vmv.v.v v15, v31 -; LMULMAX1-NEXT: call ext3 -; LMULMAX1-NEXT: addi sp, sp, 16 -; LMULMAX1-NEXT: addi sp, s0, -256 -; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld s1, 232(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 256 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -256 +; CHECK-NEXT: .cfi_def_cfa_offset 256 +; CHECK-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: addi s0, sp, 256 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: andi sp, sp, -128 +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: mv a3, sp +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: vse32.v v8, (a3) +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: call ext3 +; CHECK-NEXT: addi sp, s0, -256 +; CHECK-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 256 +; CHECK-NEXT: ret %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42) ret <32 x i32> %t } ; Test various configurations of split vector types where the values are split ; across both registers and the stack. -; LMUL8: Ins: v8,v9,v10,v11,v12, v16m8 y[0:31], a0+0 z[0:31] -; LMUL4: Ins: v8,v9,v10,v11,v12, v16m4 y[0:15], v20m4 y[16:31], a0+0 z[0:15], ; a0+64 z[16:31] -; LMUL2: Ins: v8,v9,v10,v11,v12, v14m2 y[0:7], v16m2 y[8:15], v18m2 y[16:23], ; v20m2 y[24:31], v22m2 z[0:7], a1+0 z[8:15], a1+32 z[16:23], ; a1+64 z[24:31] -; LMUL1: Ins: v8,v9,v10,v11,v12, v13 y[0:3], v14 y[4:7], v15 y[8:11], ; v16 y[12:15], v17 y[16:19], v18 y[20:23], v19 y[24:27], ; v20 y[28:31], v21 z[0:3], v22 z[4:7], v23 z[8:11], ; a1+0 z[12:15], a1+16 z[16:19], a1+32 z[20:23], a1+48 z[24:27], ; a1+64 z[28:31] define <32 x i32> @split_vector_args(<2 x i32>,<2 x i32>,<2 x i32>,<2 x i32>,<2 x i32>, <32 x i32> %y, <32 x i32> %z) { -; LMULMAX8-LABEL: split_vector_args: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vadd.vv v8, v16, v8 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: split_vector_args: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi a1, a0, 64 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v8, (a0) -; LMULMAX4-NEXT: vle32.v v12, (a1) -; LMULMAX4-NEXT: vadd.vv v8, v16, v8 -; LMULMAX4-NEXT: vadd.vv v12, v20, v12 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: split_vector_args: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a1, a0, 64 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v10, (a0) -; LMULMAX2-NEXT: addi a0, a0, 32 -; LMULMAX2-NEXT: vle32.v v12, (a0) -; LMULMAX2-NEXT: vle32.v v24, (a1) -; LMULMAX2-NEXT: vadd.vv v8, v14, v22 -; LMULMAX2-NEXT: vadd.vv v10, v16, v10 -; LMULMAX2-NEXT: vadd.vv v12, v18, v12 -; LMULMAX2-NEXT: vadd.vv v14, v20, v24 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: split_vector_args: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v24, (a1) -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vle32.v v25, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v26, (a1) -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a1) -; LMULMAX1-NEXT: vle32.v v11, (a0) -; LMULMAX1-NEXT: vadd.vv v8, v13, v21 -; LMULMAX1-NEXT: vadd.vv v9, v14, v22 -; LMULMAX1-NEXT: vadd.vv v10, v15, v23 -; LMULMAX1-NEXT: vadd.vv v11, v16, v11 -; LMULMAX1-NEXT: vadd.vv v12, v17, v12 -; LMULMAX1-NEXT: vadd.vv v13, v18, v26 -; LMULMAX1-NEXT: vadd.vv v14, v19, v25 -; LMULMAX1-NEXT: vadd.vv v15, v20, v24 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: split_vector_args: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: ret %v0 = add <32 x i32> %y, %z ret <32 x i32> %v0 } define <32 x i32> @call_split_vector_args(ptr %pa, ptr %pb) { -; LMULMAX8-LABEL: call_split_vector_args: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -256 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX8-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: .cfi_offset s0, -16 -; LMULMAX8-NEXT: addi s0, sp, 256 -; LMULMAX8-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX8-NEXT: andi sp, sp, -128 -; LMULMAX8-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v16, (a1) -; LMULMAX8-NEXT: mv a1, sp -; LMULMAX8-NEXT: mv a0, sp -; LMULMAX8-NEXT: vse32.v v16, (a1) -; LMULMAX8-NEXT: vmv1r.v v9, v8 -; LMULMAX8-NEXT: vmv1r.v v10, v8 -; LMULMAX8-NEXT: vmv1r.v v11, v8 -; LMULMAX8-NEXT: vmv1r.v v12, v8 -; LMULMAX8-NEXT: call split_vector_args -; LMULMAX8-NEXT: addi sp, s0, -256 -; LMULMAX8-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 256 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: call_split_vector_args: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 -; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX4-NEXT: andi sp, sp, -128 -; LMULMAX4-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX4-NEXT: vle32.v v8, (a0) -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v16, (a1) -; LMULMAX4-NEXT: addi a0, a1, 64 -; LMULMAX4-NEXT: vle32.v v20, (a0) -; LMULMAX4-NEXT: addi a0, sp, 64 -; LMULMAX4-NEXT: vse32.v v20, (a0) -; LMULMAX4-NEXT: mv a1, sp -; LMULMAX4-NEXT: mv a0, sp -; LMULMAX4-NEXT: vse32.v v16, (a1) -; LMULMAX4-NEXT: vmv1r.v v9, v8 -; LMULMAX4-NEXT: vmv1r.v v10, v8 -; LMULMAX4-NEXT: vmv1r.v v11, v8 -; LMULMAX4-NEXT: vmv1r.v v12, v8 -; LMULMAX4-NEXT: call split_vector_args -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: call_split_vector_args: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -128 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 128 -; LMULMAX2-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: .cfi_offset ra, -8 -; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 128 -; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX2-NEXT: andi sp, sp, -128 -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v14, (a1) -; LMULMAX2-NEXT: addi a0, a1, 32 -; LMULMAX2-NEXT: vle32.v v16, (a0) -; LMULMAX2-NEXT: addi a0, a1, 64 -; LMULMAX2-NEXT: vle32.v v18, (a0) -; LMULMAX2-NEXT: addi a0, a1, 96 -; LMULMAX2-NEXT: vle32.v v20, (a0) -; LMULMAX2-NEXT: addi a0, sp, 64 -; LMULMAX2-NEXT: vse32.v v20, (a0) -; LMULMAX2-NEXT: addi a0, sp, 32 -; LMULMAX2-NEXT: vse32.v v18, (a0) -; LMULMAX2-NEXT: mv a1, sp -; LMULMAX2-NEXT: mv a0, sp -; LMULMAX2-NEXT: vse32.v v16, (a1) -; LMULMAX2-NEXT: vmv1r.v v9, v8 -; LMULMAX2-NEXT: vmv1r.v v10, v8 -; LMULMAX2-NEXT: vmv1r.v v11, v8 -; LMULMAX2-NEXT: vmv1r.v v12, v8 -; LMULMAX2-NEXT: vmv.v.v v22, v14 -; LMULMAX2-NEXT: call split_vector_args -; LMULMAX2-NEXT: addi sp, s0, -128 -; LMULMAX2-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 128 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: call_split_vector_args: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -128 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 128 -; LMULMAX1-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: .cfi_offset s0, -16 -; LMULMAX1-NEXT: addi s0, sp, 128 -; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX1-NEXT: andi sp, sp, -128 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v13, (a1) -; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vle32.v v15, (a0) -; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vle32.v v14, (a0) -; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vle32.v v16, (a0) -; LMULMAX1-NEXT: addi a0, a1, 64 -; LMULMAX1-NEXT: vle32.v v17, (a0) -; LMULMAX1-NEXT: addi a0, a1, 80 -; LMULMAX1-NEXT: vle32.v v18, (a0) -; LMULMAX1-NEXT: addi a0, a1, 96 -; LMULMAX1-NEXT: vle32.v v19, (a0) -; LMULMAX1-NEXT: addi a0, a1, 112 -; LMULMAX1-NEXT: vle32.v v20, (a0) -; LMULMAX1-NEXT: addi a0, sp, 64 -; LMULMAX1-NEXT: vse32.v v20, (a0) -; LMULMAX1-NEXT: addi a0, sp, 48 -; LMULMAX1-NEXT: vse32.v v19, (a0) -; LMULMAX1-NEXT: addi a0, sp, 32 -; LMULMAX1-NEXT: vse32.v v18, (a0) -; LMULMAX1-NEXT: addi a0, sp, 16 -; LMULMAX1-NEXT: vse32.v v17, (a0) -; LMULMAX1-NEXT: mv a1, sp -; LMULMAX1-NEXT: mv a0, sp -; LMULMAX1-NEXT: vse32.v v16, (a1) -; LMULMAX1-NEXT: vmv1r.v v9, v8 -; LMULMAX1-NEXT: vmv1r.v v10, v8 -; LMULMAX1-NEXT: vmv1r.v v11, v8 -; LMULMAX1-NEXT: vmv1r.v v12, v8 -; LMULMAX1-NEXT: vmv.v.v v21, v13 -; LMULMAX1-NEXT: vmv.v.v v22, v14 -; LMULMAX1-NEXT: vmv.v.v v23, v15 -; LMULMAX1-NEXT: call split_vector_args -; LMULMAX1-NEXT: addi sp, s0, -128 -; LMULMAX1-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 128 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: call_split_vector_args: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -256 +; CHECK-NEXT: .cfi_def_cfa_offset 256 +; CHECK-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: addi s0, sp, 256 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: andi sp, sp, -128 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: mv a1, sp +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: vse32.v v16, (a1) +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vmv1r.v v12, v8 +; CHECK-NEXT: call split_vector_args +; CHECK-NEXT: addi sp, s0, -256 +; CHECK-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 256 +; CHECK-NEXT: ret %a = load <2 x i32>, ptr %pa %b = load <32 x i32>, ptr %pb %r = call <32 x i32> @split_vector_args(<2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <2 x i32> %a, <32 x i32> %b, <32 x i32> %b) @@ -1185,217 +282,44 @@ define <32 x i32> @call_split_vector_args(ptr %pa, ptr %pb) { ; A rather pathological test case in which we exhaust all vector registers and ; all scalar registers, forcing %z and %8 to go through the stack. define <32 x i32> @vector_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %8) { -; LMULMAX8-LABEL: vector_arg_via_stack: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; LMULMAX8-NEXT: vle32.v v16, (sp) -; LMULMAX8-NEXT: vadd.vv v8, v8, v16 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: vector_arg_via_stack: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v16, (sp) -; LMULMAX4-NEXT: addi a0, sp, 64 -; LMULMAX4-NEXT: vle32.v v20, (a0) -; LMULMAX4-NEXT: vadd.vv v8, v8, v16 -; LMULMAX4-NEXT: vadd.vv v12, v12, v20 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: vector_arg_via_stack: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: addi a0, sp, 64 -; LMULMAX2-NEXT: vle32.v v16, (a0) -; LMULMAX2-NEXT: vle32.v v18, (sp) -; LMULMAX2-NEXT: addi a0, sp, 32 -; LMULMAX2-NEXT: vle32.v v20, (a0) -; LMULMAX2-NEXT: addi a0, sp, 96 -; LMULMAX2-NEXT: vle32.v v22, (a0) -; LMULMAX2-NEXT: vadd.vv v8, v8, v18 -; LMULMAX2-NEXT: vadd.vv v10, v10, v20 -; LMULMAX2-NEXT: vadd.vv v12, v12, v16 -; LMULMAX2-NEXT: vadd.vv v14, v14, v22 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: vector_arg_via_stack: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, sp, 112 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v16, (a0) -; LMULMAX1-NEXT: addi a0, sp, 96 -; LMULMAX1-NEXT: vle32.v v17, (a0) -; LMULMAX1-NEXT: addi a0, sp, 80 -; LMULMAX1-NEXT: vle32.v v18, (a0) -; LMULMAX1-NEXT: addi a0, sp, 64 -; LMULMAX1-NEXT: vle32.v v19, (a0) -; LMULMAX1-NEXT: addi a0, sp, 32 -; LMULMAX1-NEXT: vle32.v v20, (a0) -; LMULMAX1-NEXT: vle32.v v21, (sp) -; LMULMAX1-NEXT: addi a0, sp, 16 -; LMULMAX1-NEXT: vle32.v v22, (a0) -; LMULMAX1-NEXT: addi a0, sp, 48 -; LMULMAX1-NEXT: vle32.v v23, (a0) -; LMULMAX1-NEXT: vadd.vv v8, v8, v21 -; LMULMAX1-NEXT: vadd.vv v9, v9, v22 -; LMULMAX1-NEXT: vadd.vv v10, v10, v20 -; LMULMAX1-NEXT: vadd.vv v11, v11, v23 -; LMULMAX1-NEXT: vadd.vv v12, v12, v19 -; LMULMAX1-NEXT: vadd.vv v13, v13, v18 -; LMULMAX1-NEXT: vadd.vv v14, v14, v17 -; LMULMAX1-NEXT: vadd.vv v15, v15, v16 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: vector_arg_via_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v16, (sp) +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret %s = add <32 x i32> %x, %z ret <32 x i32> %s } ; Calling the function above. Ensure we pass the arguments correctly. define <32 x i32> @pass_vector_arg_via_stack(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z) { -; LMULMAX8-LABEL: pass_vector_arg_via_stack: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -144 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 144 -; LMULMAX8-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vse32.v v8, (sp) -; LMULMAX8-NEXT: li a0, 8 -; LMULMAX8-NEXT: li a1, 1 -; LMULMAX8-NEXT: li a2, 2 -; LMULMAX8-NEXT: li a3, 3 -; LMULMAX8-NEXT: li a4, 4 -; LMULMAX8-NEXT: li a5, 5 -; LMULMAX8-NEXT: li a6, 6 -; LMULMAX8-NEXT: li a7, 7 -; LMULMAX8-NEXT: sd a0, 128(sp) -; LMULMAX8-NEXT: li a0, 0 -; LMULMAX8-NEXT: vmv.v.i v16, 0 -; LMULMAX8-NEXT: call vector_arg_via_stack -; LMULMAX8-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 144 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: pass_vector_arg_via_stack: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -144 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 144 -; LMULMAX4-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: li a0, 8 -; LMULMAX4-NEXT: sd a0, 128(sp) -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: vse32.v v8, (sp) -; LMULMAX4-NEXT: addi a0, sp, 64 -; LMULMAX4-NEXT: li a1, 1 -; LMULMAX4-NEXT: li a2, 2 -; LMULMAX4-NEXT: li a3, 3 -; LMULMAX4-NEXT: li a4, 4 -; LMULMAX4-NEXT: li a5, 5 -; LMULMAX4-NEXT: li a6, 6 -; LMULMAX4-NEXT: li a7, 7 -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: li a0, 0 -; LMULMAX4-NEXT: vmv.v.i v12, 0 -; LMULMAX4-NEXT: vmv.v.i v16, 0 -; LMULMAX4-NEXT: vmv.v.i v20, 0 -; LMULMAX4-NEXT: call vector_arg_via_stack -; LMULMAX4-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 144 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: pass_vector_arg_via_stack: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -144 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 144 -; LMULMAX2-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: .cfi_offset ra, -8 -; LMULMAX2-NEXT: li a0, 8 -; LMULMAX2-NEXT: sd a0, 128(sp) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse32.v v8, (sp) -; LMULMAX2-NEXT: addi a0, sp, 96 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: addi a0, sp, 64 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: addi a0, sp, 32 -; LMULMAX2-NEXT: li a1, 1 -; LMULMAX2-NEXT: li a2, 2 -; LMULMAX2-NEXT: li a3, 3 -; LMULMAX2-NEXT: li a4, 4 -; LMULMAX2-NEXT: li a5, 5 -; LMULMAX2-NEXT: li a6, 6 -; LMULMAX2-NEXT: li a7, 7 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: li a0, 0 -; LMULMAX2-NEXT: vmv.v.i v10, 0 -; LMULMAX2-NEXT: vmv.v.i v12, 0 -; LMULMAX2-NEXT: vmv.v.i v14, 0 -; LMULMAX2-NEXT: vmv.v.i v16, 0 -; LMULMAX2-NEXT: vmv.v.i v18, 0 -; LMULMAX2-NEXT: vmv.v.i v20, 0 -; LMULMAX2-NEXT: vmv.v.i v22, 0 -; LMULMAX2-NEXT: call vector_arg_via_stack -; LMULMAX2-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 144 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: pass_vector_arg_via_stack: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -144 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 144 -; LMULMAX1-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: li a0, 8 -; LMULMAX1-NEXT: sd a0, 128(sp) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse32.v v8, (sp) -; LMULMAX1-NEXT: addi a0, sp, 112 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 96 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 80 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 64 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 48 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 32 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 16 -; LMULMAX1-NEXT: li a1, 1 -; LMULMAX1-NEXT: li a2, 2 -; LMULMAX1-NEXT: li a3, 3 -; LMULMAX1-NEXT: li a4, 4 -; LMULMAX1-NEXT: li a5, 5 -; LMULMAX1-NEXT: li a6, 6 -; LMULMAX1-NEXT: li a7, 7 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: li a0, 0 -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vmv.v.i v11, 0 -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vmv.v.i v13, 0 -; LMULMAX1-NEXT: vmv.v.i v14, 0 -; LMULMAX1-NEXT: vmv.v.i v15, 0 -; LMULMAX1-NEXT: vmv.v.i v16, 0 -; LMULMAX1-NEXT: vmv.v.i v17, 0 -; LMULMAX1-NEXT: vmv.v.i v18, 0 -; LMULMAX1-NEXT: vmv.v.i v19, 0 -; LMULMAX1-NEXT: vmv.v.i v20, 0 -; LMULMAX1-NEXT: vmv.v.i v21, 0 -; LMULMAX1-NEXT: vmv.v.i v22, 0 -; LMULMAX1-NEXT: vmv.v.i v23, 0 -; LMULMAX1-NEXT: call vector_arg_via_stack -; LMULMAX1-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 144 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: pass_vector_arg_via_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -144 +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse32.v v8, (sp) +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: li a3, 3 +; CHECK-NEXT: li a4, 4 +; CHECK-NEXT: li a5, 5 +; CHECK-NEXT: li a6, 6 +; CHECK-NEXT: li a7, 7 +; CHECK-NEXT: sd a0, 128(sp) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: call vector_arg_via_stack +; CHECK-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 144 +; CHECK-NEXT: ret %s = call <32 x i32> @vector_arg_via_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8) ret <32 x i32> %s } @@ -1415,194 +339,42 @@ define <4 x i1> @vector_mask_arg_via_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 % ; Calling the function above. Ensure we pass the mask arguments correctly. We ; legalize stores of small masks such that the value is at least byte-sized. define <4 x i1> @pass_vector_mask_arg_via_stack(<4 x i1> %v) { -; LMULMAX8-LABEL: pass_vector_mask_arg_via_stack: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -160 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 160 -; LMULMAX8-NEXT: sd ra, 152(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: .cfi_offset ra, -8 -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vse32.v v8, (sp) -; LMULMAX8-NEXT: li a0, 8 -; LMULMAX8-NEXT: sd a0, 128(sp) -; LMULMAX8-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8-NEXT: vmv.v.i v16, 0 -; LMULMAX8-NEXT: vmerge.vim v16, v16, 1, v0 -; LMULMAX8-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v17, 0 -; LMULMAX8-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX8-NEXT: vmv.v.v v17, v16 -; LMULMAX8-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX8-NEXT: vmsne.vi v16, v17, 0 -; LMULMAX8-NEXT: addi a0, sp, 136 -; LMULMAX8-NEXT: li a5, 5 -; LMULMAX8-NEXT: li a6, 6 -; LMULMAX8-NEXT: li a7, 7 -; LMULMAX8-NEXT: vsm.v v16, (a0) -; LMULMAX8-NEXT: li a0, 0 -; LMULMAX8-NEXT: li a1, 0 -; LMULMAX8-NEXT: li a2, 0 -; LMULMAX8-NEXT: li a3, 0 -; LMULMAX8-NEXT: li a4, 0 -; LMULMAX8-NEXT: vmv8r.v v16, v8 -; LMULMAX8-NEXT: call vector_mask_arg_via_stack -; LMULMAX8-NEXT: ld ra, 152(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 160 -; LMULMAX8-NEXT: ret -; -; LMULMAX4-LABEL: pass_vector_mask_arg_via_stack: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -160 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 160 -; LMULMAX4-NEXT: sd ra, 152(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: li a0, 8 -; LMULMAX4-NEXT: sd a0, 128(sp) -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: vse32.v v8, (sp) -; LMULMAX4-NEXT: addi a0, sp, 64 -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX4-NEXT: vmv.v.i v12, 0 -; LMULMAX4-NEXT: vmerge.vim v12, v12, 1, v0 -; LMULMAX4-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX4-NEXT: vmv.v.i v13, 0 -; LMULMAX4-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX4-NEXT: vmv.v.v v13, v12 -; LMULMAX4-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX4-NEXT: vmsne.vi v12, v13, 0 -; LMULMAX4-NEXT: addi a0, sp, 136 -; LMULMAX4-NEXT: li a5, 5 -; LMULMAX4-NEXT: li a6, 6 -; LMULMAX4-NEXT: li a7, 7 -; LMULMAX4-NEXT: vsm.v v12, (a0) -; LMULMAX4-NEXT: li a0, 0 -; LMULMAX4-NEXT: li a1, 0 -; LMULMAX4-NEXT: li a2, 0 -; LMULMAX4-NEXT: li a3, 0 -; LMULMAX4-NEXT: li a4, 0 -; LMULMAX4-NEXT: vmv4r.v v12, v8 -; LMULMAX4-NEXT: vmv4r.v v16, v8 -; LMULMAX4-NEXT: vmv4r.v v20, v8 -; LMULMAX4-NEXT: call vector_mask_arg_via_stack -; LMULMAX4-NEXT: ld ra, 152(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 160 -; LMULMAX4-NEXT: ret -; -; LMULMAX2-LABEL: pass_vector_mask_arg_via_stack: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -160 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 160 -; LMULMAX2-NEXT: sd ra, 152(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: .cfi_offset ra, -8 -; LMULMAX2-NEXT: li a0, 8 -; LMULMAX2-NEXT: sd a0, 128(sp) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse32.v v8, (sp) -; LMULMAX2-NEXT: addi a0, sp, 96 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: addi a0, sp, 64 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: addi a0, sp, 32 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX2-NEXT: vmv.v.i v10, 0 -; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v11, 0 -; LMULMAX2-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX2-NEXT: vmv.v.v v11, v10 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmsne.vi v10, v11, 0 -; LMULMAX2-NEXT: addi a0, sp, 136 -; LMULMAX2-NEXT: li a5, 5 -; LMULMAX2-NEXT: li a6, 6 -; LMULMAX2-NEXT: li a7, 7 -; LMULMAX2-NEXT: vsm.v v10, (a0) -; LMULMAX2-NEXT: li a0, 0 -; LMULMAX2-NEXT: li a1, 0 -; LMULMAX2-NEXT: li a2, 0 -; LMULMAX2-NEXT: li a3, 0 -; LMULMAX2-NEXT: li a4, 0 -; LMULMAX2-NEXT: vmv2r.v v10, v8 -; LMULMAX2-NEXT: vmv2r.v v12, v8 -; LMULMAX2-NEXT: vmv2r.v v14, v8 -; LMULMAX2-NEXT: vmv2r.v v16, v8 -; LMULMAX2-NEXT: vmv2r.v v18, v8 -; LMULMAX2-NEXT: vmv2r.v v20, v8 -; LMULMAX2-NEXT: vmv2r.v v22, v8 -; LMULMAX2-NEXT: call vector_mask_arg_via_stack -; LMULMAX2-NEXT: ld ra, 152(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 160 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: pass_vector_mask_arg_via_stack: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -160 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 160 -; LMULMAX1-NEXT: sd ra, 152(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: li a0, 8 -; LMULMAX1-NEXT: sd a0, 128(sp) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse32.v v8, (sp) -; LMULMAX1-NEXT: addi a0, sp, 112 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 96 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 80 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 64 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 48 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 32 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 16 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX1-NEXT: vmv.v.v v10, v9 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v9, v10, 0 -; LMULMAX1-NEXT: addi a0, sp, 136 -; LMULMAX1-NEXT: li a5, 5 -; LMULMAX1-NEXT: li a6, 6 -; LMULMAX1-NEXT: li a7, 7 -; LMULMAX1-NEXT: vsm.v v9, (a0) -; LMULMAX1-NEXT: li a0, 0 -; LMULMAX1-NEXT: li a1, 0 -; LMULMAX1-NEXT: li a2, 0 -; LMULMAX1-NEXT: li a3, 0 -; LMULMAX1-NEXT: li a4, 0 -; LMULMAX1-NEXT: vmv1r.v v9, v8 -; LMULMAX1-NEXT: vmv1r.v v10, v8 -; LMULMAX1-NEXT: vmv1r.v v11, v8 -; LMULMAX1-NEXT: vmv1r.v v12, v8 -; LMULMAX1-NEXT: vmv1r.v v13, v8 -; LMULMAX1-NEXT: vmv1r.v v14, v8 -; LMULMAX1-NEXT: vmv1r.v v15, v8 -; LMULMAX1-NEXT: vmv1r.v v16, v8 -; LMULMAX1-NEXT: vmv1r.v v17, v8 -; LMULMAX1-NEXT: vmv1r.v v18, v8 -; LMULMAX1-NEXT: vmv1r.v v19, v8 -; LMULMAX1-NEXT: vmv1r.v v20, v8 -; LMULMAX1-NEXT: vmv1r.v v21, v8 -; LMULMAX1-NEXT: vmv1r.v v22, v8 -; LMULMAX1-NEXT: vmv1r.v v23, v8 -; LMULMAX1-NEXT: call vector_mask_arg_via_stack -; LMULMAX1-NEXT: ld ra, 152(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 160 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: pass_vector_mask_arg_via_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 160 +; CHECK-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse32.v v8, (sp) +; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: sd a0, 128(sp) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v17, 0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v17, v16 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmsne.vi v16, v17, 0 +; CHECK-NEXT: addi a0, sp, 136 +; CHECK-NEXT: li a5, 5 +; CHECK-NEXT: li a6, 6 +; CHECK-NEXT: li a7, 7 +; CHECK-NEXT: vsm.v v16, (a0) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: call vector_mask_arg_via_stack +; CHECK-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 160 +; CHECK-NEXT: ret %r = call <4 x i1> @vector_mask_arg_via_stack(i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8, <4 x i1> %v, <4 x i1> %v) ret <4 x i1> %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index a4a2cd1333086d..3e5a89b9bce388 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -1,61 +1,71 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @ctlz_v16i8(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: ctlz_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a1, 85 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a1, 51 -; CHECK-NEXT: vand.vx v9, v8, a1 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_v16i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vzext.vf2 v10, v8 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX8-NEXT: li a1, 134 -; LMULMAX8-NEXT: vrsub.vx v8, v10, a1 -; LMULMAX8-NEXT: li a1, 8 -; LMULMAX8-NEXT: vminu.vx v8, v8, a1 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_v16i8: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RVI-NEXT: vle8.v v8, (a0) +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: li a1, 85 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: li a1, 51 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: vand.vi v8, v8, 15 +; RVI-NEXT: vse8.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_v16i8: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVF-NEXT: vle8.v v8, (a0) +; RVF-NEXT: vzext.vf2 v10, v8 +; RVF-NEXT: vfwcvt.f.xu.v v12, v10 +; RVF-NEXT: vnsrl.wi v8, v12, 23 +; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RVF-NEXT: vnsrl.wi v10, v8, 0 +; RVF-NEXT: li a1, 134 +; RVF-NEXT: vrsub.vx v8, v10, a1 +; RVF-NEXT: li a1, 8 +; RVF-NEXT: vminu.vx v8, v8, a1 +; RVF-NEXT: vse8.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_v16i8: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVD-NEXT: vle8.v v8, (a0) +; RVD-NEXT: vzext.vf2 v10, v8 +; RVD-NEXT: vfwcvt.f.xu.v v12, v10 +; RVD-NEXT: vnsrl.wi v8, v12, 23 +; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RVD-NEXT: vnsrl.wi v10, v8, 0 +; RVD-NEXT: li a1, 134 +; RVD-NEXT: vrsub.vx v8, v10, a1 +; RVD-NEXT: li a1, 8 +; RVD-NEXT: vminu.vx v8, v8, a1 +; RVD-NEXT: vse8.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_v16i8: ; ZVBB: # %bb.0: @@ -73,175 +83,66 @@ define void @ctlz_v16i8(ptr %x, ptr %y) nounwind { declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) define void @ctlz_v8i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_v8i16: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 5 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 3 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 1 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: li a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32I-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_v8i16: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 5 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 3 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 1 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX1-LABEL: ctlz_v8i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-NEXT: lui a1, 5 -; LMULMAX1-NEXT: addi a1, a1, 1365 -; LMULMAX1-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-NEXT: lui a1, 3 -; LMULMAX1-NEXT: addi a1, a1, 819 -; LMULMAX1-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-NEXT: lui a1, 1 -; LMULMAX1-NEXT: addi a1, a1, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-NEXT: li a1, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_v8i16: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32F-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV32F-NEXT: li a1, 142 -; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: li a1, 16 -; LMULMAX2-RV32F-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_v8i16: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64F-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 142 -; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: li a1, 16 -; LMULMAX2-RV64F-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_v8i16: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32D-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV32D-NEXT: li a1, 142 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 16 -; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_v8i16: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64D-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV64D-NEXT: li a1, 142 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 16 -; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_v8i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX8-NEXT: li a1, 142 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 16 -; LMULMAX8-NEXT: vminu.vx v8, v8, a1 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_v8i16: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVI-NEXT: vle16.v v8, (a0) +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 8 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: lui a1, 5 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: lui a1, 3 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: lui a1, 1 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: li a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 8 +; RVI-NEXT: vse16.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_v8i16: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVF-NEXT: vle16.v v8, (a0) +; RVF-NEXT: vfwcvt.f.xu.v v10, v8 +; RVF-NEXT: vnsrl.wi v8, v10, 23 +; RVF-NEXT: li a1, 142 +; RVF-NEXT: vrsub.vx v8, v8, a1 +; RVF-NEXT: li a1, 16 +; RVF-NEXT: vminu.vx v8, v8, a1 +; RVF-NEXT: vse16.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_v8i16: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVD-NEXT: vle16.v v8, (a0) +; RVD-NEXT: vfwcvt.f.xu.v v10, v8 +; RVD-NEXT: vnsrl.wi v8, v10, 23 +; RVD-NEXT: li a1, 142 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: li a1, 16 +; RVD-NEXT: vminu.vx v8, v8, a1 +; RVD-NEXT: vse16.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_v8i16: ; ZVBB: # %bb.0: @@ -259,153 +160,72 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind { declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) define void @ctlz_v4i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_v4i32: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_v4i32: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_v4i32: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32F-NEXT: li a1, 158 -; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: li a1, 32 -; LMULMAX2-RV32F-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_v4i32: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64F-NEXT: li a1, 158 -; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: li a1, 32 -; LMULMAX2-RV64F-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_v4i32: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1054 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 32 -; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_v4i32: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1054 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 32 -; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_v4i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX8-NEXT: li a1, 1054 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vminu.vx v8, v8, a1 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_v4i32: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVI-NEXT: vle32.v v8, (a0) +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 8 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 16 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: lui a1, 349525 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: lui a1, 209715 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: lui a1, 61681 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: lui a1, 4112 +; RVI-NEXT: addi a1, a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 24 +; RVI-NEXT: vse32.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_v4i32: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVF-NEXT: vle32.v v8, (a0) +; RVF-NEXT: fsrmi a1, 1 +; RVF-NEXT: vfcvt.f.xu.v v8, v8 +; RVF-NEXT: fsrm a1 +; RVF-NEXT: vsrl.vi v8, v8, 23 +; RVF-NEXT: li a1, 158 +; RVF-NEXT: vrsub.vx v8, v8, a1 +; RVF-NEXT: li a1, 32 +; RVF-NEXT: vminu.vx v8, v8, a1 +; RVF-NEXT: vse32.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_v4i32: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVD-NEXT: vle32.v v8, (a0) +; RVD-NEXT: vfwcvt.f.xu.v v10, v8 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vnsrl.wx v8, v10, a1 +; RVD-NEXT: li a1, 1054 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: li a1, 32 +; RVD-NEXT: vminu.vx v8, v8, a1 +; RVD-NEXT: vse32.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_v4i32: ; ZVBB: # %bb.0: @@ -423,192 +243,160 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind { declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_v2i64: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: li a1, 32 -; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: li a1, 56 -; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_v2i64: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: li a1, 32 -; LMULMAX2-RV64I-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 56 -; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_v2i64: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: li a1, 190 -; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 -; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 -; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vminu.vx v8, v9, a1 -; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_v2i64: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 -; LMULMAX2-RV64F-NEXT: li a1, 64 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vminu.vx v8, v10, a1 -; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_v2i64: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1086 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_v2i64: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1086 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 64 -; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_v2i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: fsrmi a1, 1 -; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-NEXT: fsrm a1 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 1086 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 64 -; LMULMAX8-NEXT: vminu.vx v8, v8, a1 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret +; RV32I-LABEL: ctlz_v2i64: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vle64.v v8, (a0) +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: li a1, 32 +; RV32I-NEXT: vsrl.vx v9, v8, a1 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v9, v9, v10 +; RV32I-NEXT: vsub.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v10, v8, v9 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v9 +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a1 +; RV32I-NEXT: vse64.v v8, (a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: ctlz_v2i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64I-NEXT: vle64.v v8, (a0) +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: li a1, 32 +; RV64I-NEXT: vsrl.vx v9, v8, a1 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v9, v9, a1 +; RV64I-NEXT: vsub.vv v8, v8, v9 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v9, v8 +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a1 +; RV64I-NEXT: vse64.v v8, (a0) +; RV64I-NEXT: ret +; +; RV32F-LABEL: ctlz_v2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32F-NEXT: vle64.v v8, (a0) +; RV32F-NEXT: li a1, 190 +; RV32F-NEXT: vmv.v.x v9, a1 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a1, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: fsrm a1 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vwsubu.wv v9, v9, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32F-NEXT: vminu.vx v8, v9, a1 +; RV32F-NEXT: vse64.v v8, (a0) +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_v2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64F-NEXT: vle64.v v8, (a0) +; RV64F-NEXT: li a1, 190 +; RV64F-NEXT: vmv.v.x v9, a1 +; RV64F-NEXT: fsrmi a1, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: fsrm a1 +; RV64F-NEXT: vsrl.vi v8, v10, 23 +; RV64F-NEXT: vwsubu.vv v10, v9, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64F-NEXT: vminu.vx v8, v10, a1 +; RV64F-NEXT: vse64.v v8, (a0) +; RV64F-NEXT: ret +; +; RVD-LABEL: ctlz_v2i64: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVD-NEXT: vle64.v v8, (a0) +; RVD-NEXT: fsrmi a1, 1 +; RVD-NEXT: vfcvt.f.xu.v v8, v8 +; RVD-NEXT: fsrm a1 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vsrl.vx v8, v8, a1 +; RVD-NEXT: li a1, 1086 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: li a1, 64 +; RVD-NEXT: vminu.vx v8, v8, a1 +; RVD-NEXT: vse64.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_v2i64: ; ZVBB: # %bb.0: @@ -626,95 +414,66 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) define void @ctlz_v32i8(ptr %x, ptr %y) nounwind { -; LMULMAX2-LABEL: ctlz_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vnot.v v8, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: li a1, 85 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: li a1, 51 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctlz_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle8.v v8, (a1) -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: li a2, 85 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: li a3, 51 -; LMULMAX1-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vnot.v v9, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-NEXT: vse8.v v9, (a0) -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_v32i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vzext.vf2 v12, v8 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v16, v12 -; LMULMAX8-NEXT: vnsrl.wi v8, v16, 23 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX8-NEXT: li a1, 134 -; LMULMAX8-NEXT: vrsub.vx v8, v12, a1 -; LMULMAX8-NEXT: li a1, 8 -; LMULMAX8-NEXT: vminu.vx v8, v8, a1 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_v32i8: +; RVI: # %bb.0: +; RVI-NEXT: li a1, 32 +; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RVI-NEXT: vle8.v v8, (a0) +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: li a1, 85 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: li a1, 51 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: vand.vi v8, v8, 15 +; RVI-NEXT: vse8.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_v32i8: +; RVF: # %bb.0: +; RVF-NEXT: li a1, 32 +; RVF-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RVF-NEXT: vle8.v v8, (a0) +; RVF-NEXT: vzext.vf2 v12, v8 +; RVF-NEXT: vfwcvt.f.xu.v v16, v12 +; RVF-NEXT: vnsrl.wi v8, v16, 23 +; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; RVF-NEXT: vnsrl.wi v12, v8, 0 +; RVF-NEXT: li a1, 134 +; RVF-NEXT: vrsub.vx v8, v12, a1 +; RVF-NEXT: li a1, 8 +; RVF-NEXT: vminu.vx v8, v8, a1 +; RVF-NEXT: vse8.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_v32i8: +; RVD: # %bb.0: +; RVD-NEXT: li a1, 32 +; RVD-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RVD-NEXT: vle8.v v8, (a0) +; RVD-NEXT: vzext.vf2 v12, v8 +; RVD-NEXT: vfwcvt.f.xu.v v16, v12 +; RVD-NEXT: vnsrl.wi v8, v16, 23 +; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; RVD-NEXT: vnsrl.wi v12, v8, 0 +; RVD-NEXT: li a1, 134 +; RVD-NEXT: vrsub.vx v8, v12, a1 +; RVD-NEXT: li a1, 8 +; RVD-NEXT: vminu.vx v8, v8, a1 +; RVD-NEXT: vse8.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_v32i8: ; ZVBB: # %bb.0: @@ -733,112 +492,66 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind { declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) define void @ctlz_v16i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-LABEL: ctlz_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vnot.v v8, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: lui a1, 5 -; LMULMAX2-NEXT: addi a1, a1, 1365 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 3 -; LMULMAX2-NEXT: addi a1, a1, 819 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 1 -; LMULMAX2-NEXT: addi a1, a1, -241 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: li a1, 257 -; LMULMAX2-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctlz_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: lui a2, 5 -; LMULMAX1-NEXT: addi a2, a2, 1365 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a3, 3 -; LMULMAX1-NEXT: addi a3, a3, 819 -; LMULMAX1-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a4, 1 -; LMULMAX1-NEXT: addi a4, a4, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-NEXT: li a5, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vnot.v v9, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_v16i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23 -; LMULMAX8-NEXT: li a1, 142 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 16 -; LMULMAX8-NEXT: vminu.vx v8, v8, a1 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_v16i16: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVI-NEXT: vle16.v v8, (a0) +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 8 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: lui a1, 5 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: lui a1, 3 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: lui a1, 1 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: li a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 8 +; RVI-NEXT: vse16.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_v16i16: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVF-NEXT: vle16.v v8, (a0) +; RVF-NEXT: vfwcvt.f.xu.v v12, v8 +; RVF-NEXT: vnsrl.wi v8, v12, 23 +; RVF-NEXT: li a1, 142 +; RVF-NEXT: vrsub.vx v8, v8, a1 +; RVF-NEXT: li a1, 16 +; RVF-NEXT: vminu.vx v8, v8, a1 +; RVF-NEXT: vse16.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_v16i16: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVD-NEXT: vle16.v v8, (a0) +; RVD-NEXT: vfwcvt.f.xu.v v12, v8 +; RVD-NEXT: vnsrl.wi v8, v12, 23 +; RVD-NEXT: li a1, 142 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: li a1, 16 +; RVD-NEXT: vminu.vx v8, v8, a1 +; RVD-NEXT: vse16.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_v16i16: ; ZVBB: # %bb.0: @@ -856,155 +569,72 @@ define void @ctlz_v16i16(ptr %x, ptr %y) nounwind { declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) define void @ctlz_v8i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_v8i32: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_v8i32: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_v8i32: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32F-NEXT: li a1, 158 -; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: li a1, 32 -; LMULMAX2-RV32F-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_v8i32: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64F-NEXT: li a1, 158 -; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: li a1, 32 -; LMULMAX2-RV64F-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_v8i32: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32D-NEXT: li a1, 158 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 32 -; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_v8i32: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64D-NEXT: li a1, 158 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 32 -; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v8, v12, a1 -; LMULMAX8-NEXT: li a1, 1054 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vminu.vx v8, v8, a1 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_v8i32: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVI-NEXT: vle32.v v8, (a0) +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 8 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 16 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: lui a1, 349525 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: lui a1, 209715 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: lui a1, 61681 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: lui a1, 4112 +; RVI-NEXT: addi a1, a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 24 +; RVI-NEXT: vse32.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_v8i32: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVF-NEXT: vle32.v v8, (a0) +; RVF-NEXT: fsrmi a1, 1 +; RVF-NEXT: vfcvt.f.xu.v v8, v8 +; RVF-NEXT: fsrm a1 +; RVF-NEXT: vsrl.vi v8, v8, 23 +; RVF-NEXT: li a1, 158 +; RVF-NEXT: vrsub.vx v8, v8, a1 +; RVF-NEXT: li a1, 32 +; RVF-NEXT: vminu.vx v8, v8, a1 +; RVF-NEXT: vse32.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_v8i32: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVD-NEXT: vle32.v v8, (a0) +; RVD-NEXT: vfwcvt.f.xu.v v12, v8 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vnsrl.wx v8, v12, a1 +; RVD-NEXT: li a1, 1054 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: li a1, 32 +; RVD-NEXT: vminu.vx v8, v8, a1 +; RVD-NEXT: vse32.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_v8i32: ; ZVBB: # %bb.0: @@ -1022,192 +652,160 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind { declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_v4i64: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: li a1, 32 -; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: li a1, 56 -; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_v4i64: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: li a1, 32 -; LMULMAX2-RV64I-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 56 -; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_v4i64: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: li a1, 190 -; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v12, 23 -; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 -; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vminu.vx v8, v10, a1 -; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_v4i64: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v11, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v11, 23 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v10, v8 -; LMULMAX2-RV64F-NEXT: li a1, 64 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vminu.vx v8, v12, a1 -; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_v4i64: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1086 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_v4i64: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1086 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 64 -; LMULMAX2-RV64D-NEXT: vminu.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_v4i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: fsrmi a1, 1 -; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-NEXT: fsrm a1 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 1086 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 64 -; LMULMAX8-NEXT: vminu.vx v8, v8, a1 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret +; RV32I-LABEL: ctlz_v4i64: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vle64.v v8, (a0) +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: li a1, 32 +; RV32I-NEXT: vsrl.vx v10, v8, a1 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v12, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v10, v10, v12 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v12, v8, v10 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a1 +; RV32I-NEXT: vse64.v v8, (a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: ctlz_v4i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64I-NEXT: vle64.v v8, (a0) +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: li a1, 32 +; RV64I-NEXT: vsrl.vx v10, v8, a1 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v10, v10, a1 +; RV64I-NEXT: vsub.vv v8, v8, v10 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v10, v8 +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a1 +; RV64I-NEXT: vse64.v v8, (a0) +; RV64I-NEXT: ret +; +; RV32F-LABEL: ctlz_v4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32F-NEXT: vle64.v v8, (a0) +; RV32F-NEXT: li a1, 190 +; RV32F-NEXT: vmv.v.x v10, a1 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a1, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: fsrm a1 +; RV32F-NEXT: vsrl.vi v8, v12, 23 +; RV32F-NEXT: vwsubu.wv v10, v10, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32F-NEXT: vminu.vx v8, v10, a1 +; RV32F-NEXT: vse64.v v8, (a0) +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_v4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64F-NEXT: vle64.v v8, (a0) +; RV64F-NEXT: li a1, 190 +; RV64F-NEXT: vmv.v.x v10, a1 +; RV64F-NEXT: fsrmi a1, 1 +; RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; RV64F-NEXT: fsrm a1 +; RV64F-NEXT: vsrl.vi v8, v11, 23 +; RV64F-NEXT: vwsubu.vv v12, v10, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64F-NEXT: vminu.vx v8, v12, a1 +; RV64F-NEXT: vse64.v v8, (a0) +; RV64F-NEXT: ret +; +; RVD-LABEL: ctlz_v4i64: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RVD-NEXT: vle64.v v8, (a0) +; RVD-NEXT: fsrmi a1, 1 +; RVD-NEXT: vfcvt.f.xu.v v8, v8 +; RVD-NEXT: fsrm a1 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vsrl.vx v8, v8, a1 +; RVD-NEXT: li a1, 1086 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: li a1, 64 +; RVD-NEXT: vminu.vx v8, v8, a1 +; RVD-NEXT: vse64.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_v4i64: ; ZVBB: # %bb.0: @@ -1225,45 +823,59 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: ctlz_zero_undef_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a1, 85 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a1, 51 -; CHECK-NEXT: vand.vx v9, v8, a1 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_zero_undef_v16i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vzext.vf2 v10, v8 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX8-NEXT: li a1, 134 -; LMULMAX8-NEXT: vrsub.vx v8, v10, a1 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_zero_undef_v16i8: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RVI-NEXT: vle8.v v8, (a0) +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: li a1, 85 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: li a1, 51 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: vand.vi v8, v8, 15 +; RVI-NEXT: vse8.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_zero_undef_v16i8: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVF-NEXT: vle8.v v8, (a0) +; RVF-NEXT: vzext.vf2 v10, v8 +; RVF-NEXT: vfwcvt.f.xu.v v12, v10 +; RVF-NEXT: vnsrl.wi v8, v12, 23 +; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RVF-NEXT: vnsrl.wi v10, v8, 0 +; RVF-NEXT: li a1, 134 +; RVF-NEXT: vrsub.vx v8, v10, a1 +; RVF-NEXT: vse8.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_zero_undef_v16i8: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVD-NEXT: vle8.v v8, (a0) +; RVD-NEXT: vzext.vf2 v10, v8 +; RVD-NEXT: vfwcvt.f.xu.v v12, v10 +; RVD-NEXT: vnsrl.wi v8, v12, 23 +; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RVD-NEXT: vnsrl.wi v10, v8, 0 +; RVD-NEXT: li a1, 134 +; RVD-NEXT: vrsub.vx v8, v10, a1 +; RVD-NEXT: vse8.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_zero_undef_v16i8: ; ZVBB: # %bb.0: @@ -1280,165 +892,62 @@ define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { } define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 5 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 3 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 1 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: li a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32I-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 5 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 3 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 1 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX1-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX1-NEXT: vor.vv v8, v8, v9 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-NEXT: lui a1, 5 -; LMULMAX1-NEXT: addi a1, a1, 1365 -; LMULMAX1-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-NEXT: lui a1, 3 -; LMULMAX1-NEXT: addi a1, a1, 819 -; LMULMAX1-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-NEXT: lui a1, 1 -; LMULMAX1-NEXT: addi a1, a1, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-NEXT: li a1, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32F-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV32F-NEXT: li a1, 142 -; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64F-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 142 -; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32D-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV32D-NEXT: li a1, 142 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64D-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV64D-NEXT: li a1, 142 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_zero_undef_v8i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX8-NEXT: li a1, 142 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_zero_undef_v8i16: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVI-NEXT: vle16.v v8, (a0) +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 8 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: lui a1, 5 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: lui a1, 3 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: lui a1, 1 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: li a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 8 +; RVI-NEXT: vse16.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_zero_undef_v8i16: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVF-NEXT: vle16.v v8, (a0) +; RVF-NEXT: vfwcvt.f.xu.v v10, v8 +; RVF-NEXT: vnsrl.wi v8, v10, 23 +; RVF-NEXT: li a1, 142 +; RVF-NEXT: vrsub.vx v8, v8, a1 +; RVF-NEXT: vse16.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_zero_undef_v8i16: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVD-NEXT: vle16.v v8, (a0) +; RVD-NEXT: vfwcvt.f.xu.v v10, v8 +; RVD-NEXT: vnsrl.wi v8, v10, 23 +; RVD-NEXT: li a1, 142 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: vse16.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_zero_undef_v8i16: ; ZVBB: # %bb.0: @@ -1455,143 +964,68 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { } define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v4i32: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v4i32: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v4i32: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32F-NEXT: li a1, 158 -; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v4i32: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64F-NEXT: li a1, 158 -; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v4i32: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1054 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v4i32: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1054 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_zero_undef_v4i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX8-NEXT: li a1, 1054 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_zero_undef_v4i32: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVI-NEXT: vle32.v v8, (a0) +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 8 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 16 +; RVI-NEXT: vor.vv v8, v8, v9 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: lui a1, 349525 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: lui a1, 209715 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: lui a1, 61681 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: lui a1, 4112 +; RVI-NEXT: addi a1, a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 24 +; RVI-NEXT: vse32.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_zero_undef_v4i32: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVF-NEXT: vle32.v v8, (a0) +; RVF-NEXT: fsrmi a1, 1 +; RVF-NEXT: vfcvt.f.xu.v v8, v8 +; RVF-NEXT: fsrm a1 +; RVF-NEXT: vsrl.vi v8, v8, 23 +; RVF-NEXT: li a1, 158 +; RVF-NEXT: vrsub.vx v8, v8, a1 +; RVF-NEXT: vse32.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_zero_undef_v4i32: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVD-NEXT: vle32.v v8, (a0) +; RVD-NEXT: vfwcvt.f.xu.v v10, v8 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vnsrl.wx v8, v10, a1 +; RVD-NEXT: li a1, 1054 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: vse32.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_zero_undef_v4i32: ; ZVBB: # %bb.0: @@ -1608,180 +1042,152 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { } define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v2i64: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: li a1, 32 -; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: li a1, 56 -; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v2i64: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 16 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: li a1, 32 -; LMULMAX2-RV64I-NEXT: vsrl.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 56 -; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v2i64: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: li a1, 190 -; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 -; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 -; LMULMAX2-RV32F-NEXT: vse64.v v9, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v2i64: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 -; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v2i64: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1086 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v2i64: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1086 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_zero_undef_v2i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: fsrmi a1, 1 -; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-NEXT: fsrm a1 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 1086 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret +; RV32I-LABEL: ctlz_zero_undef_v2i64: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vle64.v v8, (a0) +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: li a1, 32 +; RV32I-NEXT: vsrl.vx v9, v8, a1 +; RV32I-NEXT: vor.vv v8, v8, v9 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v9, v9, v10 +; RV32I-NEXT: vsub.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v10, v8, v9 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v9 +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a1 +; RV32I-NEXT: vse64.v v8, (a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: ctlz_zero_undef_v2i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64I-NEXT: vle64.v v8, (a0) +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: li a1, 32 +; RV64I-NEXT: vsrl.vx v9, v8, a1 +; RV64I-NEXT: vor.vv v8, v8, v9 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v9, v9, a1 +; RV64I-NEXT: vsub.vv v8, v8, v9 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v9, v8 +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a1 +; RV64I-NEXT: vse64.v v8, (a0) +; RV64I-NEXT: ret +; +; RV32F-LABEL: ctlz_zero_undef_v2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32F-NEXT: vle64.v v8, (a0) +; RV32F-NEXT: li a1, 190 +; RV32F-NEXT: vmv.v.x v9, a1 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a1, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: fsrm a1 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vwsubu.wv v9, v9, v8 +; RV32F-NEXT: vse64.v v9, (a0) +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_v2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64F-NEXT: vle64.v v8, (a0) +; RV64F-NEXT: li a1, 190 +; RV64F-NEXT: vmv.v.x v9, a1 +; RV64F-NEXT: fsrmi a1, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: fsrm a1 +; RV64F-NEXT: vsrl.vi v8, v10, 23 +; RV64F-NEXT: vwsubu.vv v10, v9, v8 +; RV64F-NEXT: vse64.v v10, (a0) +; RV64F-NEXT: ret +; +; RVD-LABEL: ctlz_zero_undef_v2i64: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVD-NEXT: vle64.v v8, (a0) +; RVD-NEXT: fsrmi a1, 1 +; RVD-NEXT: vfcvt.f.xu.v v8, v8 +; RVD-NEXT: fsrm a1 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vsrl.vx v8, v8, a1 +; RVD-NEXT: li a1, 1086 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: vse64.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_zero_undef_v2i64: ; ZVBB: # %bb.0: @@ -1798,93 +1204,62 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { } define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { -; LMULMAX2-LABEL: ctlz_zero_undef_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vnot.v v8, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: li a1, 85 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: li a1, 51 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctlz_zero_undef_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle8.v v8, (a1) -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: li a2, 85 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: li a3, 51 -; LMULMAX1-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vnot.v v9, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-NEXT: vse8.v v9, (a0) -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_zero_undef_v32i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vzext.vf2 v12, v8 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v16, v12 -; LMULMAX8-NEXT: vnsrl.wi v8, v16, 23 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX8-NEXT: li a1, 134 -; LMULMAX8-NEXT: vrsub.vx v8, v12, a1 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_zero_undef_v32i8: +; RVI: # %bb.0: +; RVI-NEXT: li a1, 32 +; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RVI-NEXT: vle8.v v8, (a0) +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: li a1, 85 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: li a1, 51 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: vand.vi v8, v8, 15 +; RVI-NEXT: vse8.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_zero_undef_v32i8: +; RVF: # %bb.0: +; RVF-NEXT: li a1, 32 +; RVF-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RVF-NEXT: vle8.v v8, (a0) +; RVF-NEXT: vzext.vf2 v12, v8 +; RVF-NEXT: vfwcvt.f.xu.v v16, v12 +; RVF-NEXT: vnsrl.wi v8, v16, 23 +; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; RVF-NEXT: vnsrl.wi v12, v8, 0 +; RVF-NEXT: li a1, 134 +; RVF-NEXT: vrsub.vx v8, v12, a1 +; RVF-NEXT: vse8.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_zero_undef_v32i8: +; RVD: # %bb.0: +; RVD-NEXT: li a1, 32 +; RVD-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RVD-NEXT: vle8.v v8, (a0) +; RVD-NEXT: vzext.vf2 v12, v8 +; RVD-NEXT: vfwcvt.f.xu.v v16, v12 +; RVD-NEXT: vnsrl.wi v8, v16, 23 +; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; RVD-NEXT: vnsrl.wi v12, v8, 0 +; RVD-NEXT: li a1, 134 +; RVD-NEXT: vrsub.vx v8, v12, a1 +; RVD-NEXT: vse8.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_zero_undef_v32i8: ; ZVBB: # %bb.0: @@ -1902,110 +1277,62 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { } define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-LABEL: ctlz_zero_undef_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vnot.v v8, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: lui a1, 5 -; LMULMAX2-NEXT: addi a1, a1, 1365 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 3 -; LMULMAX2-NEXT: addi a1, a1, 819 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 1 -; LMULMAX2-NEXT: addi a1, a1, -241 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: li a1, 257 -; LMULMAX2-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctlz_zero_undef_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX1-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: lui a2, 5 -; LMULMAX1-NEXT: addi a2, a2, 1365 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a3, 3 -; LMULMAX1-NEXT: addi a3, a3, 819 -; LMULMAX1-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a4, 1 -; LMULMAX1-NEXT: addi a4, a4, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-NEXT: li a5, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 8 -; LMULMAX1-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-NEXT: vnot.v v9, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_zero_undef_v16i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23 -; LMULMAX8-NEXT: li a1, 142 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_zero_undef_v16i16: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVI-NEXT: vle16.v v8, (a0) +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 8 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: lui a1, 5 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: lui a1, 3 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: lui a1, 1 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: li a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 8 +; RVI-NEXT: vse16.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_zero_undef_v16i16: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVF-NEXT: vle16.v v8, (a0) +; RVF-NEXT: vfwcvt.f.xu.v v12, v8 +; RVF-NEXT: vnsrl.wi v8, v12, 23 +; RVF-NEXT: li a1, 142 +; RVF-NEXT: vrsub.vx v8, v8, a1 +; RVF-NEXT: vse16.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_zero_undef_v16i16: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVD-NEXT: vle16.v v8, (a0) +; RVD-NEXT: vfwcvt.f.xu.v v12, v8 +; RVD-NEXT: vnsrl.wi v8, v12, 23 +; RVD-NEXT: li a1, 142 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: vse16.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_zero_undef_v16i16: ; ZVBB: # %bb.0: @@ -2022,145 +1349,68 @@ define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { } define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v8i32: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v8i32: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v8i32: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32F-NEXT: li a1, 158 -; LMULMAX2-RV32F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v8i32: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64F-NEXT: li a1, 158 -; LMULMAX2-RV64F-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v8i32: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32D-NEXT: li a1, 158 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v8i32: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64D-NEXT: li a1, 158 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_zero_undef_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v8, v12, a1 -; LMULMAX8-NEXT: li a1, 1054 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: ctlz_zero_undef_v8i32: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVI-NEXT: vle32.v v8, (a0) +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 2 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 8 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 16 +; RVI-NEXT: vor.vv v8, v8, v10 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: lui a1, 349525 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: lui a1, 209715 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: lui a1, 61681 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: lui a1, 4112 +; RVI-NEXT: addi a1, a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 24 +; RVI-NEXT: vse32.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: ctlz_zero_undef_v8i32: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVF-NEXT: vle32.v v8, (a0) +; RVF-NEXT: fsrmi a1, 1 +; RVF-NEXT: vfcvt.f.xu.v v8, v8 +; RVF-NEXT: fsrm a1 +; RVF-NEXT: vsrl.vi v8, v8, 23 +; RVF-NEXT: li a1, 158 +; RVF-NEXT: vrsub.vx v8, v8, a1 +; RVF-NEXT: vse32.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: ctlz_zero_undef_v8i32: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVD-NEXT: vle32.v v8, (a0) +; RVD-NEXT: vfwcvt.f.xu.v v12, v8 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vnsrl.wx v8, v12, a1 +; RVD-NEXT: li a1, 1054 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: vse32.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_zero_undef_v8i32: ; ZVBB: # %bb.0: @@ -2177,180 +1427,152 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { } define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v4i64: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: li a1, 32 -; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: li a1, 56 -; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v4i64: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 8 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 16 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: li a1, 32 -; LMULMAX2-RV64I-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 56 -; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v4i64: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: li a1, 190 -; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v12, 23 -; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 -; LMULMAX2-RV32F-NEXT: vse64.v v10, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: ctlz_zero_undef_v4i64: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v11, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v11, 23 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v10, v8 -; LMULMAX2-RV64F-NEXT: vse64.v v12, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v4i64: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1086 -; LMULMAX2-RV32D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: ctlz_zero_undef_v4i64: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1086 -; LMULMAX2-RV64D-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: ctlz_zero_undef_v4i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: fsrmi a1, 1 -; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-NEXT: fsrm a1 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 1086 -; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret +; RV32I-LABEL: ctlz_zero_undef_v4i64: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vle64.v v8, (a0) +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 2 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 8 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 16 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: li a1, 32 +; RV32I-NEXT: vsrl.vx v10, v8, a1 +; RV32I-NEXT: vor.vv v8, v8, v10 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v12, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v10, v10, v12 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v12, v8, v10 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a1 +; RV32I-NEXT: vse64.v v8, (a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: ctlz_zero_undef_v4i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64I-NEXT: vle64.v v8, (a0) +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 2 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 8 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 16 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: li a1, 32 +; RV64I-NEXT: vsrl.vx v10, v8, a1 +; RV64I-NEXT: vor.vv v8, v8, v10 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v10, v10, a1 +; RV64I-NEXT: vsub.vv v8, v8, v10 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v10, v8 +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a1 +; RV64I-NEXT: vse64.v v8, (a0) +; RV64I-NEXT: ret +; +; RV32F-LABEL: ctlz_zero_undef_v4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32F-NEXT: vle64.v v8, (a0) +; RV32F-NEXT: li a1, 190 +; RV32F-NEXT: vmv.v.x v10, a1 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a1, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: fsrm a1 +; RV32F-NEXT: vsrl.vi v8, v12, 23 +; RV32F-NEXT: vwsubu.wv v10, v10, v8 +; RV32F-NEXT: vse64.v v10, (a0) +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_v4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64F-NEXT: vle64.v v8, (a0) +; RV64F-NEXT: li a1, 190 +; RV64F-NEXT: vmv.v.x v10, a1 +; RV64F-NEXT: fsrmi a1, 1 +; RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; RV64F-NEXT: fsrm a1 +; RV64F-NEXT: vsrl.vi v8, v11, 23 +; RV64F-NEXT: vwsubu.vv v12, v10, v8 +; RV64F-NEXT: vse64.v v12, (a0) +; RV64F-NEXT: ret +; +; RVD-LABEL: ctlz_zero_undef_v4i64: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RVD-NEXT: vle64.v v8, (a0) +; RVD-NEXT: fsrmi a1, 1 +; RVD-NEXT: vfcvt.f.xu.v v8, v8 +; RVD-NEXT: fsrm a1 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vsrl.vx v8, v8, a1 +; RVD-NEXT: li a1, 1086 +; RVD-NEXT: vrsub.vx v8, v8, a1 +; RVD-NEXT: vse64.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: ctlz_zero_undef_v4i64: ; ZVBB: # %bb.0: @@ -2366,7 +1588,5 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ret void } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; LMULMAX1-RV32: {{.*}} -; LMULMAX1-RV64: {{.*}} -; LMULMAX2-RV32: {{.*}} -; LMULMAX2-RV64: {{.*}} +; RV32D: {{.*}} +; RV64D: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll index 5e0c99fa1f46e0..147f560633a45f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB @@ -127,157 +125,81 @@ define void @ctpop_v4i32(ptr %x, ptr %y) { declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) define void @ctpop_v2i64(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: ctpop_v2i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: vand.vv v9, v10, v9 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v9 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctpop_v2i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, 349525 -; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vand.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 209715 -; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 61681 -; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: li a1, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctpop_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 349525 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a1, 32 -; LMULMAX1-RV64-NEXT: add a1, a1, a2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 209715 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a2, a1, 32 -; LMULMAX1-RV64-NEXT: add a1, a1, a2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 61681 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a2, a1, 32 -; LMULMAX1-RV64-NEXT: add a1, a1, a2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a2, a1, 32 -; LMULMAX1-RV64-NEXT: add a1, a1, a2 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: ctpop_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: vand.vv v9, v10, v9 +; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vand.vv v10, v8, v9 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: vadd.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v9, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v9 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: ctpop_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vsub.vv v8, v8, v9 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vsrl.vi v8, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: addiw a1, a1, -241 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vsrl.vx v8, v8, a1 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret ; ; ZVBB-LABEL: ctpop_v2i64: ; ZVBB: # %bb.0: @@ -295,57 +217,25 @@ define void @ctpop_v2i64(ptr %x, ptr %y) { declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) define void @ctpop_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: li a1, 85 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: li a1, 51 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctpop_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle8.v v8, (a1) -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: li a2, 85 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: li a3, 51 -; LMULMAX1-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-NEXT: vse8.v v9, (a0) -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ctpop_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: li a1, 85 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: li a1, 51 +; CHECK-NEXT: vand.vx v10, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vand.vi v8, v8, 15 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v32i8: ; ZVBB: # %bb.0: @@ -364,72 +254,31 @@ define void @ctpop_v32i8(ptr %x, ptr %y) { declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) define void @ctpop_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: lui a1, 5 -; LMULMAX2-NEXT: addi a1, a1, 1365 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 3 -; LMULMAX2-NEXT: addi a1, a1, 819 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 1 -; LMULMAX2-NEXT: addi a1, a1, -241 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: li a1, 257 -; LMULMAX2-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctpop_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: lui a2, 5 -; LMULMAX1-NEXT: addi a2, a2, 1365 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a3, 3 -; LMULMAX1-NEXT: addi a3, a3, 819 -; LMULMAX1-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a4, 1 -; LMULMAX1-NEXT: addi a4, a4, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-NEXT: li a5, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ctpop_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addi a1, a1, 1365 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a1, 3 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: vand.vx v10, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: li a1, 257 +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 8 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v16i16: ; ZVBB: # %bb.0: @@ -447,74 +296,32 @@ define void @ctpop_v16i16(ptr %x, ptr %y) { declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) define void @ctpop_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: lui a1, 349525 -; LMULMAX2-NEXT: addi a1, a1, 1365 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 209715 -; LMULMAX2-NEXT: addi a1, a1, 819 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 61681 -; LMULMAX2-NEXT: addi a1, a1, -241 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: lui a1, 4112 -; LMULMAX2-NEXT: addi a1, a1, 257 -; LMULMAX2-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctpop_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle32.v v8, (a1) -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: lui a2, 349525 -; LMULMAX1-NEXT: addi a2, a2, 1365 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a3, 209715 -; LMULMAX1-NEXT: addi a3, a3, 819 -; LMULMAX1-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a4, 61681 -; LMULMAX1-NEXT: addi a4, a4, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-NEXT: lui a5, 4112 -; LMULMAX1-NEXT: addi a5, a5, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 24 -; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ctpop_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a1, 349525 +; CHECK-NEXT: addi a1, a1, 1365 +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: vand.vx v10, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a1, 61681 +; CHECK-NEXT: addi a1, a1, -241 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: lui a1, 4112 +; CHECK-NEXT: addi a1, a1, 257 +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v8i32: ; ZVBB: # %bb.0: @@ -530,38 +337,14 @@ define void @ctpop_v8i32(ptr %x, ptr %y) { ret void } define <8 x i1> @ctpop_v8i32_ult_two(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v8i32_ult_two: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v10, v8, -1 -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctpop_v8i32_ult_two: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vadd.vi v10, v8, -1 -; LMULMAX1-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vadd.vi v10, v9, -1 -; LMULMAX1-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-NEXT: vmseq.vi v0, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ctpop_v8i32_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v8i32_ult_two: ; ZVBB: # %bb.0: @@ -577,38 +360,14 @@ define <8 x i1> @ctpop_v8i32_ult_two(ptr %x, ptr %y) { ret <8 x i1> %cmp } define <8 x i1> @ctpop_v8i32_ugt_one(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v8i32_ugt_one: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v10, v8, -1 -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctpop_v8i32_ugt_one: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vadd.vi v10, v8, -1 -; LMULMAX1-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vadd.vi v10, v9, -1 -; LMULMAX1-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ctpop_v8i32_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v8i32_ugt_one: ; ZVBB: # %bb.0: @@ -624,38 +383,14 @@ define <8 x i1> @ctpop_v8i32_ugt_one(ptr %x, ptr %y) { ret <8 x i1> %cmp } define <8 x i1> @ctpop_v8i32_eq_one(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v8i32_eq_one: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v10, v8, -1 -; LMULMAX2-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-NEXT: vmsltu.vv v0, v10, v8 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctpop_v8i32_eq_one: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vadd.vi v10, v8, -1 -; LMULMAX1-NEXT: vxor.vv v8, v8, v10 -; LMULMAX1-NEXT: vmsltu.vv v0, v10, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vadd.vi v10, v9, -1 -; LMULMAX1-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-NEXT: vmsltu.vv v0, v10, v9 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ctpop_v8i32_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v8i32_eq_one: ; ZVBB: # %bb.0: @@ -671,38 +406,14 @@ define <8 x i1> @ctpop_v8i32_eq_one(ptr %x, ptr %y) { ret <8 x i1> %cmp } define <8 x i1> @ctpop_v8i32_ne_one(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v8i32_ne_one: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v10, v8, -1 -; LMULMAX2-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-NEXT: vmsleu.vv v0, v8, v10 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: ctpop_v8i32_ne_one: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vadd.vi v10, v8, -1 -; LMULMAX1-NEXT: vxor.vv v8, v8, v10 -; LMULMAX1-NEXT: vmsleu.vv v0, v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vadd.vi v10, v9, -1 -; LMULMAX1-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-NEXT: vmsleu.vv v0, v9, v10 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ctpop_v8i32_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v8i32_ne_one: ; ZVBB: # %bb.0: @@ -720,187 +431,81 @@ define <8 x i1> @ctpop_v8i32_ne_one(ptr %x, ptr %y) { declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) define void @ctpop_v4i64(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: ctpop_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 1 -; LMULMAX2-RV32-NEXT: vand.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v12, v8 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: ctpop_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 209715 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 61681 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: li a1, 56 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, 349525 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: lui a2, 209715 -; LMULMAX1-RV32-NEXT: addi a2, a2, 819 -; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v11 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v12, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: lui a2, 61681 -; LMULMAX1-RV32-NEXT: addi a2, a2, -241 -; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a2, 257 -; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vsrl.vi v14, v8, 1 -; LMULMAX1-RV32-NEXT: vand.vv v10, v14, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v11 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctpop_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 349525 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a2, 32 -; LMULMAX1-RV64-NEXT: add a2, a2, a3 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 209715 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: slli a4, a3, 32 -; LMULMAX1-RV64-NEXT: add a3, a3, a4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 61681 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 -; LMULMAX1-RV64-NEXT: slli a5, a4, 32 -; LMULMAX1-RV64-NEXT: add a4, a4, a5 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: lui a5, 4112 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 257 -; LMULMAX1-RV64-NEXT: slli a6, a5, 32 -; LMULMAX1-RV64-NEXT: add a5, a5, a6 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: li a6, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a6 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a6 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: ctpop_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vsrl.vi v12, v8, 1 +; RV32-NEXT: vand.vv v10, v12, v10 +; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vand.vv v12, v8, v10 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: ctpop_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vsub.vv v8, v8, v10 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vsrl.vi v8, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: addiw a1, a1, -241 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: lui a1, 4112 +; RV64-NEXT: addiw a1, a1, 257 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vsrl.vx v8, v8, a1 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret ; ; ZVBB-LABEL: ctpop_v4i64: ; ZVBB: # %bb.0: @@ -916,68 +521,14 @@ define void @ctpop_v4i64(ptr %x, ptr %y) { ret void } define <4 x i1> @ctpop_v4i64_ult_two(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v4i64_ult_two: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v10, v8, -1 -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v4i64_ult_two: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmseq.vv v0, v9, v11 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vmseq.vv v0, v8, v11 -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctpop_v4i64_ult_two: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1 -; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vmseq.vi v0, v9, 0 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ctpop_v4i64_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v4i64_ult_two: ; ZVBB: # %bb.0: @@ -993,68 +544,14 @@ define <4 x i1> @ctpop_v4i64_ult_two(ptr %x, ptr %y) { ret <4 x i1> %cmp } define <4 x i1> @ctpop_v4i64_ugt_one(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v4i64_ugt_one: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v10, v8, -1 -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v4i64_ugt_one: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmsne.vv v0, v9, v11 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vmsne.vv v0, v8, v11 -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctpop_v4i64_ugt_one: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1 -; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ctpop_v4i64_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v4i64_ugt_one: ; ZVBB: # %bb.0: @@ -1070,65 +567,14 @@ define <4 x i1> @ctpop_v4i64_ugt_one(ptr %x, ptr %y) { ret <4 x i1> %cmp } define <4 x i1> @ctpop_v4i64_eq_one(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v4i64_eq_one: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v10, v8, -1 -; LMULMAX2-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-NEXT: vmsltu.vv v0, v10, v8 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v4i64_eq_one: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10 -; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vmsltu.vv v0, v11, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10 -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vmsltu.vv v0, v10, v8 -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctpop_v4i64_eq_one: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1 -; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vmsltu.vv v0, v10, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1 -; LMULMAX1-RV64-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vmsltu.vv v0, v10, v9 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ctpop_v4i64_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v4i64_eq_one: ; ZVBB: # %bb.0: @@ -1144,65 +590,14 @@ define <4 x i1> @ctpop_v4i64_eq_one(ptr %x, ptr %y) { ret <4 x i1> %cmp } define <4 x i1> @ctpop_v4i64_ne_one(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ctpop_v4i64_ne_one: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v10, v8, -1 -; LMULMAX2-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-NEXT: vmsleu.vv v0, v8, v10 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ctpop_v4i64_ne_one: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v11, v9, v10 -; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vmsleu.vv v0, v9, v11 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v10, v8, v10 -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vmsleu.vv v0, v8, v10 -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ctpop_v4i64_ne_one: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v10, v8, -1 -; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vmsleu.vv v0, v8, v10 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vadd.vi v10, v9, -1 -; LMULMAX1-RV64-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vmsleu.vv v0, v9, v10 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ctpop_v4i64_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: ret ; ; ZVBB-LABEL: ctpop_v4i64_ne_one: ; ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 5afd935935e5d9..5802fba2f24545 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -1,61 +1,74 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32I -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64I -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32F -; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64F -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32,LMULMAX2-RV32D -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64,LMULMAX2-RV64D -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @cttz_v16i8(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: cttz_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a1, 85 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a1, 51 -; CHECK-NEXT: vand.vx v9, v8, a1 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret -; -; LMULMAX8-LABEL: cttz_v16i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; LMULMAX8-NEXT: vzext.vf2 v10, v9 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v9, v10, 0 -; LMULMAX8-NEXT: li a1, 127 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: vsub.vx v8, v9, a1 -; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_v16i8: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RVI-NEXT: vle8.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v9, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: li a1, 85 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: li a1, 51 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: vand.vi v8, v8, 15 +; RVI-NEXT: vse8.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_v16i8: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RVF-NEXT: vle8.v v8, (a0) +; RVF-NEXT: vrsub.vi v9, v8, 0 +; RVF-NEXT: vand.vv v9, v8, v9 +; RVF-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RVF-NEXT: vzext.vf2 v10, v9 +; RVF-NEXT: vfwcvt.f.xu.v v12, v10 +; RVF-NEXT: vnsrl.wi v10, v12, 23 +; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RVF-NEXT: vnsrl.wi v9, v10, 0 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vmseq.vi v0, v8, 0 +; RVF-NEXT: vsub.vx v8, v9, a1 +; RVF-NEXT: vmerge.vim v8, v8, 8, v0 +; RVF-NEXT: vse8.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_v16i8: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RVD-NEXT: vle8.v v8, (a0) +; RVD-NEXT: vrsub.vi v9, v8, 0 +; RVD-NEXT: vand.vv v9, v8, v9 +; RVD-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RVD-NEXT: vzext.vf2 v10, v9 +; RVD-NEXT: vfwcvt.f.xu.v v12, v10 +; RVD-NEXT: vnsrl.wi v10, v12, 23 +; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RVD-NEXT: vnsrl.wi v9, v10, 0 +; RVD-NEXT: li a1, 127 +; RVD-NEXT: vmseq.vi v0, v8, 0 +; RVD-NEXT: vsub.vx v8, v9, a1 +; RVD-NEXT: vmerge.vim v8, v8, 8, v0 +; RVD-NEXT: vse8.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_v16i8: ; ZVBB: # %bb.0: @@ -73,175 +86,67 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) define void @cttz_v8i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_v8i16: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 5 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 3 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 1 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: li a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32I-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_v8i16: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 5 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 3 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 1 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX1-LABEL: cttz_v8i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: li a1, 1 -; LMULMAX1-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-NEXT: lui a1, 5 -; LMULMAX1-NEXT: addi a1, a1, 1365 -; LMULMAX1-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-NEXT: lui a1, 3 -; LMULMAX1-NEXT: addi a1, a1, 819 -; LMULMAX1-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-NEXT: lui a1, 1 -; LMULMAX1-NEXT: addi a1, a1, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-NEXT: li a1, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_v8i16: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX2-RV32F-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32F-NEXT: li a1, 16 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_v8i16: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX2-RV64F-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64F-NEXT: li a1, 16 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_v8i16: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX2-RV32D-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX2-RV32D-NEXT: li a1, 127 -; LMULMAX2-RV32D-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32D-NEXT: li a1, 16 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV32D-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_v8i16: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX2-RV64D-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX2-RV64D-NEXT: li a1, 127 -; LMULMAX2-RV64D-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64D-NEXT: li a1, 16 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_v8i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX8-NEXT: vnsrl.wi v9, v10, 23 -; LMULMAX8-NEXT: li a1, 127 -; LMULMAX8-NEXT: vsub.vx v9, v9, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: li a1, 16 -; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_v8i16: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVI-NEXT: vle16.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v9, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: lui a1, 5 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: lui a1, 3 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: lui a1, 1 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: li a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 8 +; RVI-NEXT: vse16.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_v8i16: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVF-NEXT: vle16.v v8, (a0) +; RVF-NEXT: vrsub.vi v9, v8, 0 +; RVF-NEXT: vand.vv v9, v8, v9 +; RVF-NEXT: vfwcvt.f.xu.v v10, v9 +; RVF-NEXT: vnsrl.wi v9, v10, 23 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v9, v9, a1 +; RVF-NEXT: vmseq.vi v0, v8, 0 +; RVF-NEXT: li a1, 16 +; RVF-NEXT: vmerge.vxm v8, v9, a1, v0 +; RVF-NEXT: vse16.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_v8i16: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVD-NEXT: vle16.v v8, (a0) +; RVD-NEXT: vrsub.vi v9, v8, 0 +; RVD-NEXT: vand.vv v9, v8, v9 +; RVD-NEXT: vfwcvt.f.xu.v v10, v9 +; RVD-NEXT: vnsrl.wi v9, v10, 23 +; RVD-NEXT: li a1, 127 +; RVD-NEXT: vsub.vx v9, v9, a1 +; RVD-NEXT: vmseq.vi v0, v8, 0 +; RVD-NEXT: li a1, 16 +; RVD-NEXT: vmerge.vxm v8, v9, a1, v0 +; RVD-NEXT: vse16.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_v8i16: ; ZVBB: # %bb.0: @@ -259,154 +164,71 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind { declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) define void @cttz_v4i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_v4i32: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_v4i32: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_v4i32: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v9, v9 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v9, 23 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32F-NEXT: li a1, 32 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_v4i32: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v9, v9 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v9, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64F-NEXT: li a1, 32 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_v4i32: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vnsrl.wx v9, v10, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32D-NEXT: li a1, 32 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_v4i32: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vnsrl.wx v9, v10, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1023 -; LMULMAX2-RV64D-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64D-NEXT: li a1, 32 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_v4i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v9 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v9, v10, a1 -; LMULMAX8-NEXT: li a1, 1023 -; LMULMAX8-NEXT: vsub.vx v9, v9, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_v4i32: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVI-NEXT: vle32.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v9, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: lui a1, 349525 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: lui a1, 209715 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: lui a1, 61681 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: lui a1, 4112 +; RVI-NEXT: addi a1, a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 24 +; RVI-NEXT: vse32.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_v4i32: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVF-NEXT: vle32.v v8, (a0) +; RVF-NEXT: vrsub.vi v9, v8, 0 +; RVF-NEXT: vand.vv v9, v8, v9 +; RVF-NEXT: fsrmi a1, 1 +; RVF-NEXT: vfcvt.f.xu.v v9, v9 +; RVF-NEXT: fsrm a1 +; RVF-NEXT: vsrl.vi v9, v9, 23 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v9, v9, a1 +; RVF-NEXT: vmseq.vi v0, v8, 0 +; RVF-NEXT: li a1, 32 +; RVF-NEXT: vmerge.vxm v8, v9, a1, v0 +; RVF-NEXT: vse32.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_v4i32: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVD-NEXT: vle32.v v8, (a0) +; RVD-NEXT: vrsub.vi v9, v8, 0 +; RVD-NEXT: vand.vv v9, v8, v9 +; RVD-NEXT: vfwcvt.f.xu.v v10, v9 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vnsrl.wx v9, v10, a1 +; RVD-NEXT: li a1, 1023 +; RVD-NEXT: vsub.vx v9, v9, a1 +; RVD-NEXT: vmseq.vi v0, v8, 0 +; RVD-NEXT: li a1, 32 +; RVD-NEXT: vmerge.vxm v8, v9, a1, v0 +; RVD-NEXT: vse32.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_v4i32: ; ZVBB: # %bb.0: @@ -424,187 +246,149 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind { declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) define void @cttz_v2i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_v2i64: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: li a1, 56 -; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_v2i64: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 56 -; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_v2i64: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v9 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v10, 23 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v9 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v9, v10, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_v2i64: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v9 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v9, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vwsubu.vx v10, v9, a1 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64F-NEXT: li a1, 64 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_v2i64: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v9, v9 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v9, v9, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_v2i64: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v9, v8, v9 -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v9, v9 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v9, v9, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1023 -; LMULMAX2-RV64D-NEXT: vsub.vx v9, v9, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64D-NEXT: li a1, 64 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_v2i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-NEXT: fsrmi a1, 1 -; LMULMAX8-NEXT: vfcvt.f.xu.v v9, v9 -; LMULMAX8-NEXT: fsrm a1 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vsrl.vx v9, v9, a1 -; LMULMAX8-NEXT: li a1, 1023 -; LMULMAX8-NEXT: vsub.vx v9, v9, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: li a1, 64 -; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret +; RV32I-LABEL: cttz_v2i64: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vle64.v v8, (a0) +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: vsub.vx v9, v8, a1 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v9, v9, v10 +; RV32I-NEXT: vsub.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v10, v8, v9 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v9 +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a1 +; RV32I-NEXT: vse64.v v8, (a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: cttz_v2i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64I-NEXT: vle64.v v8, (a0) +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: vsub.vx v9, v8, a1 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v9, v9, a1 +; RV64I-NEXT: vsub.vv v8, v8, v9 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v9, v8 +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a1 +; RV64I-NEXT: vse64.v v8, (a0) +; RV64I-NEXT: ret +; +; RV32F-LABEL: cttz_v2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32F-NEXT: vle64.v v8, (a0) +; RV32F-NEXT: vrsub.vi v9, v8, 0 +; RV32F-NEXT: vand.vv v9, v8, v9 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a1, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v9 +; RV32F-NEXT: fsrm a1 +; RV32F-NEXT: vsrl.vi v9, v10, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32F-NEXT: vzext.vf2 v10, v9 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v9, v10, a1 +; RV32F-NEXT: vmseq.vi v0, v8, 0 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; RV32F-NEXT: vse64.v v8, (a0) +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_v2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64F-NEXT: vle64.v v8, (a0) +; RV64F-NEXT: vrsub.vi v9, v8, 0 +; RV64F-NEXT: vand.vv v9, v8, v9 +; RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64F-NEXT: fsrmi a1, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v9 +; RV64F-NEXT: fsrm a1 +; RV64F-NEXT: vsrl.vi v9, v10, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v10, v9, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64F-NEXT: vmseq.vi v0, v8, 0 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; RV64F-NEXT: vse64.v v8, (a0) +; RV64F-NEXT: ret +; +; RVD-LABEL: cttz_v2i64: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVD-NEXT: vle64.v v8, (a0) +; RVD-NEXT: vrsub.vi v9, v8, 0 +; RVD-NEXT: vand.vv v9, v8, v9 +; RVD-NEXT: fsrmi a1, 1 +; RVD-NEXT: vfcvt.f.xu.v v9, v9 +; RVD-NEXT: fsrm a1 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vsrl.vx v9, v9, a1 +; RVD-NEXT: li a1, 1023 +; RVD-NEXT: vsub.vx v9, v9, a1 +; RVD-NEXT: vmseq.vi v0, v8, 0 +; RVD-NEXT: li a1, 64 +; RVD-NEXT: vmerge.vxm v8, v9, a1, v0 +; RVD-NEXT: vse64.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_v2i64: ; ZVBB: # %bb.0: @@ -622,88 +406,69 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) define void @cttz_v32i8(ptr %x, ptr %y) nounwind { -; LMULMAX2-LABEL: cttz_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: li a1, 1 -; LMULMAX2-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-NEXT: vnot.v v8, v8 -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: li a1, 85 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: li a1, 51 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: cttz_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle8.v v8, (a1) -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: li a2, 1 -; LMULMAX1-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: li a3, 85 -; LMULMAX1-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: li a4, 51 -; LMULMAX1-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-NEXT: vnot.v v9, v9 -; LMULMAX1-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-NEXT: vse8.v v9, (a0) -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: cttz_v32i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; LMULMAX8-NEXT: vzext.vf2 v12, v10 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v16, v12 -; LMULMAX8-NEXT: vnsrl.wi v12, v16, 23 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v10, v12, 0 -; LMULMAX8-NEXT: li a1, 127 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: vsub.vx v8, v10, a1 -; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_v32i8: +; RVI: # %bb.0: +; RVI-NEXT: li a1, 32 +; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RVI-NEXT: vle8.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v10, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: li a1, 85 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: li a1, 51 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: vand.vi v8, v8, 15 +; RVI-NEXT: vse8.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_v32i8: +; RVF: # %bb.0: +; RVF-NEXT: li a1, 32 +; RVF-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RVF-NEXT: vle8.v v8, (a0) +; RVF-NEXT: vrsub.vi v10, v8, 0 +; RVF-NEXT: vand.vv v10, v8, v10 +; RVF-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RVF-NEXT: vzext.vf2 v12, v10 +; RVF-NEXT: vfwcvt.f.xu.v v16, v12 +; RVF-NEXT: vnsrl.wi v12, v16, 23 +; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; RVF-NEXT: vnsrl.wi v10, v12, 0 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vmseq.vi v0, v8, 0 +; RVF-NEXT: vsub.vx v8, v10, a1 +; RVF-NEXT: vmerge.vim v8, v8, 8, v0 +; RVF-NEXT: vse8.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_v32i8: +; RVD: # %bb.0: +; RVD-NEXT: li a1, 32 +; RVD-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RVD-NEXT: vle8.v v8, (a0) +; RVD-NEXT: vrsub.vi v10, v8, 0 +; RVD-NEXT: vand.vv v10, v8, v10 +; RVD-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RVD-NEXT: vzext.vf2 v12, v10 +; RVD-NEXT: vfwcvt.f.xu.v v16, v12 +; RVD-NEXT: vnsrl.wi v12, v16, 23 +; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; RVD-NEXT: vnsrl.wi v10, v12, 0 +; RVD-NEXT: li a1, 127 +; RVD-NEXT: vmseq.vi v0, v8, 0 +; RVD-NEXT: vsub.vx v8, v10, a1 +; RVD-NEXT: vmerge.vim v8, v8, 8, v0 +; RVD-NEXT: vse8.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_v32i8: ; ZVBB: # %bb.0: @@ -722,99 +487,67 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1) define void @cttz_v16i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-LABEL: cttz_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: li a1, 1 -; LMULMAX2-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-NEXT: vnot.v v8, v8 -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: lui a1, 5 -; LMULMAX2-NEXT: addi a1, a1, 1365 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 3 -; LMULMAX2-NEXT: addi a1, a1, 819 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 1 -; LMULMAX2-NEXT: addi a1, a1, -241 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: li a1, 257 -; LMULMAX2-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: cttz_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: li a2, 1 -; LMULMAX1-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: lui a3, 5 -; LMULMAX1-NEXT: addi a3, a3, 1365 -; LMULMAX1-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a4, 3 -; LMULMAX1-NEXT: addi a4, a4, 819 -; LMULMAX1-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a5, 1 -; LMULMAX1-NEXT: addi a5, a5, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-NEXT: li a6, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-NEXT: vnot.v v9, v9 -; LMULMAX1-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: cttz_v16i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-NEXT: vnsrl.wi v10, v12, 23 -; LMULMAX8-NEXT: li a1, 127 -; LMULMAX8-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: li a1, 16 -; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_v16i16: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVI-NEXT: vle16.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v10, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: lui a1, 5 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: lui a1, 3 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: lui a1, 1 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: li a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 8 +; RVI-NEXT: vse16.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_v16i16: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVF-NEXT: vle16.v v8, (a0) +; RVF-NEXT: vrsub.vi v10, v8, 0 +; RVF-NEXT: vand.vv v10, v8, v10 +; RVF-NEXT: vfwcvt.f.xu.v v12, v10 +; RVF-NEXT: vnsrl.wi v10, v12, 23 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v10, v10, a1 +; RVF-NEXT: vmseq.vi v0, v8, 0 +; RVF-NEXT: li a1, 16 +; RVF-NEXT: vmerge.vxm v8, v10, a1, v0 +; RVF-NEXT: vse16.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_v16i16: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVD-NEXT: vle16.v v8, (a0) +; RVD-NEXT: vrsub.vi v10, v8, 0 +; RVD-NEXT: vand.vv v10, v8, v10 +; RVD-NEXT: vfwcvt.f.xu.v v12, v10 +; RVD-NEXT: vnsrl.wi v10, v12, 23 +; RVD-NEXT: li a1, 127 +; RVD-NEXT: vsub.vx v10, v10, a1 +; RVD-NEXT: vmseq.vi v0, v8, 0 +; RVD-NEXT: li a1, 16 +; RVD-NEXT: vmerge.vxm v8, v10, a1, v0 +; RVD-NEXT: vse16.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_v16i16: ; ZVBB: # %bb.0: @@ -832,156 +565,71 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind { declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) define void @cttz_v8i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_v8i32: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_v8i32: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_v8i32: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v10, v10 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v10, 23 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v10, v10, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32F-NEXT: li a1, 32 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_v8i32: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v10, v10 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vsub.vx v10, v10, a1 -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64F-NEXT: li a1, 32 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_v8i32: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: vsrl.vi v10, v10, 23 -; LMULMAX2-RV32D-NEXT: li a1, 127 -; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32D-NEXT: li a1, 32 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_v8i32: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v10 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: vsrl.vi v10, v10, 23 -; LMULMAX2-RV64D-NEXT: li a1, 127 -; LMULMAX2-RV64D-NEXT: vsub.vx v10, v10, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64D-NEXT: li a1, 32 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v10, v12, a1 -; LMULMAX8-NEXT: li a1, 1023 -; LMULMAX8-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_v8i32: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVI-NEXT: vle32.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v10, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: lui a1, 349525 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: lui a1, 209715 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: lui a1, 61681 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: lui a1, 4112 +; RVI-NEXT: addi a1, a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 24 +; RVI-NEXT: vse32.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_v8i32: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVF-NEXT: vle32.v v8, (a0) +; RVF-NEXT: vrsub.vi v10, v8, 0 +; RVF-NEXT: vand.vv v10, v8, v10 +; RVF-NEXT: fsrmi a1, 1 +; RVF-NEXT: vfcvt.f.xu.v v10, v10 +; RVF-NEXT: fsrm a1 +; RVF-NEXT: vsrl.vi v10, v10, 23 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v10, v10, a1 +; RVF-NEXT: vmseq.vi v0, v8, 0 +; RVF-NEXT: li a1, 32 +; RVF-NEXT: vmerge.vxm v8, v10, a1, v0 +; RVF-NEXT: vse32.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_v8i32: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVD-NEXT: vle32.v v8, (a0) +; RVD-NEXT: vrsub.vi v10, v8, 0 +; RVD-NEXT: vand.vv v10, v8, v10 +; RVD-NEXT: vfwcvt.f.xu.v v12, v10 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vnsrl.wx v10, v12, a1 +; RVD-NEXT: li a1, 1023 +; RVD-NEXT: vsub.vx v10, v10, a1 +; RVD-NEXT: vmseq.vi v0, v8, 0 +; RVD-NEXT: li a1, 32 +; RVD-NEXT: vmerge.vxm v8, v10, a1, v0 +; RVD-NEXT: vse32.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_v8i32: ; ZVBB: # %bb.0: @@ -999,187 +647,149 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind { declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) define void @cttz_v4i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_v4i64: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: li a1, 56 -; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_v4i64: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 56 -; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_v4i64: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v10 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v12, 23 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v12, v10 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v10, v12, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_v4i64: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v12, v10 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v10, v12, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vwsubu.vx v12, v10, a1 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64F-NEXT: li a1, 64 -; LMULMAX2-RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 -; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_v4i64: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v10, v10, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_v4i64: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v10, v8, v10 -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v10, v10 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v10, v10, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1023 -; LMULMAX2-RV64D-NEXT: vsub.vx v10, v10, a1 -; LMULMAX2-RV64D-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX2-RV64D-NEXT: li a1, 64 -; LMULMAX2-RV64D-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_v4i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-NEXT: fsrmi a1, 1 -; LMULMAX8-NEXT: vfcvt.f.xu.v v10, v10 -; LMULMAX8-NEXT: fsrm a1 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vsrl.vx v10, v10, a1 -; LMULMAX8-NEXT: li a1, 1023 -; LMULMAX8-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-NEXT: li a1, 64 -; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret +; RV32I-LABEL: cttz_v4i64: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vle64.v v8, (a0) +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: vsub.vx v10, v8, a1 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v12, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v10, v10, v12 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v12, v8, v10 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a1 +; RV32I-NEXT: vse64.v v8, (a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: cttz_v4i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64I-NEXT: vle64.v v8, (a0) +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: vsub.vx v10, v8, a1 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v10, v10, a1 +; RV64I-NEXT: vsub.vv v8, v8, v10 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v10, v8 +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a1 +; RV64I-NEXT: vse64.v v8, (a0) +; RV64I-NEXT: ret +; +; RV32F-LABEL: cttz_v4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32F-NEXT: vle64.v v8, (a0) +; RV32F-NEXT: vrsub.vi v10, v8, 0 +; RV32F-NEXT: vand.vv v10, v8, v10 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a1, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v10 +; RV32F-NEXT: fsrm a1 +; RV32F-NEXT: vsrl.vi v10, v12, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32F-NEXT: vzext.vf2 v12, v10 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v10, v12, a1 +; RV32F-NEXT: vmseq.vi v0, v8, 0 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; RV32F-NEXT: vse64.v v8, (a0) +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_v4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64F-NEXT: vle64.v v8, (a0) +; RV64F-NEXT: vrsub.vi v10, v8, 0 +; RV64F-NEXT: vand.vv v10, v8, v10 +; RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64F-NEXT: fsrmi a1, 1 +; RV64F-NEXT: vfncvt.f.xu.w v12, v10 +; RV64F-NEXT: fsrm a1 +; RV64F-NEXT: vsrl.vi v10, v12, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v12, v10, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64F-NEXT: vmseq.vi v0, v8, 0 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 +; RV64F-NEXT: vse64.v v8, (a0) +; RV64F-NEXT: ret +; +; RVD-LABEL: cttz_v4i64: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RVD-NEXT: vle64.v v8, (a0) +; RVD-NEXT: vrsub.vi v10, v8, 0 +; RVD-NEXT: vand.vv v10, v8, v10 +; RVD-NEXT: fsrmi a1, 1 +; RVD-NEXT: vfcvt.f.xu.v v10, v10 +; RVD-NEXT: fsrm a1 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vsrl.vx v10, v10, a1 +; RVD-NEXT: li a1, 1023 +; RVD-NEXT: vsub.vx v10, v10, a1 +; RVD-NEXT: vmseq.vi v0, v8, 0 +; RVD-NEXT: li a1, 64 +; RVD-NEXT: vmerge.vxm v8, v10, a1, v0 +; RVD-NEXT: vse64.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_v4i64: ; ZVBB: # %bb.0: @@ -1197,45 +807,62 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { -; CHECK-LABEL: cttz_zero_undef_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: vsub.vx v9, v8, a1 -; CHECK-NEXT: vnot.v v8, v8 -; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: li a1, 85 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: li a1, 51 -; CHECK-NEXT: vand.vx v9, v8, a1 -; CHECK-NEXT: vsrl.vi v8, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vand.vi v8, v8, 15 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret -; -; LMULMAX8-LABEL: cttz_zero_undef_v16i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; LMULMAX8-NEXT: vzext.vf2 v10, v8 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX8-NEXT: li a1, 127 -; LMULMAX8-NEXT: vsub.vx v8, v10, a1 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_zero_undef_v16i8: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RVI-NEXT: vle8.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v9, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: li a1, 85 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: li a1, 51 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: vand.vi v8, v8, 15 +; RVI-NEXT: vse8.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_zero_undef_v16i8: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RVF-NEXT: vle8.v v8, (a0) +; RVF-NEXT: vrsub.vi v9, v8, 0 +; RVF-NEXT: vand.vv v8, v8, v9 +; RVF-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RVF-NEXT: vzext.vf2 v10, v8 +; RVF-NEXT: vfwcvt.f.xu.v v12, v10 +; RVF-NEXT: vnsrl.wi v8, v12, 23 +; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RVF-NEXT: vnsrl.wi v10, v8, 0 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v8, v10, a1 +; RVF-NEXT: vse8.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_zero_undef_v16i8: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RVD-NEXT: vle8.v v8, (a0) +; RVD-NEXT: vrsub.vi v9, v8, 0 +; RVD-NEXT: vand.vv v8, v8, v9 +; RVD-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RVD-NEXT: vzext.vf2 v10, v8 +; RVD-NEXT: vfwcvt.f.xu.v v12, v10 +; RVD-NEXT: vnsrl.wi v8, v12, 23 +; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RVD-NEXT: vnsrl.wi v10, v8, 0 +; RVD-NEXT: li a1, 127 +; RVD-NEXT: vsub.vx v8, v10, a1 +; RVD-NEXT: vse8.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v16i8: ; ZVBB: # %bb.0: @@ -1252,160 +879,61 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { } define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v8i16: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 5 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 3 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 1 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: li a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV32I-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v8i16: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 5 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 3 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 1 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-RV64I-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX1-LABEL: cttz_zero_undef_v8i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: li a1, 1 -; LMULMAX1-NEXT: vsub.vx v9, v8, a1 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-NEXT: lui a1, 5 -; LMULMAX1-NEXT: addi a1, a1, 1365 -; LMULMAX1-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-NEXT: lui a1, 3 -; LMULMAX1-NEXT: addi a1, a1, 819 -; LMULMAX1-NEXT: vand.vx v9, v8, a1 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-NEXT: lui a1, 1 -; LMULMAX1-NEXT: addi a1, a1, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-NEXT: li a1, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a1 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i16: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32F-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32F-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v8i16: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64F-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64F-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v8i16: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32D-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV32D-NEXT: li a1, 127 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v8i16: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64D-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX2-RV64D-NEXT: li a1, 127 -; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_zero_undef_v8i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-NEXT: vnsrl.wi v8, v10, 23 -; LMULMAX8-NEXT: li a1, 127 -; LMULMAX8-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_zero_undef_v8i16: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVI-NEXT: vle16.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v9, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: lui a1, 5 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: lui a1, 3 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: lui a1, 1 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: li a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 8 +; RVI-NEXT: vse16.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_zero_undef_v8i16: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVF-NEXT: vle16.v v8, (a0) +; RVF-NEXT: vrsub.vi v9, v8, 0 +; RVF-NEXT: vand.vv v8, v8, v9 +; RVF-NEXT: vfwcvt.f.xu.v v10, v8 +; RVF-NEXT: vnsrl.wi v8, v10, 23 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v8, v8, a1 +; RVF-NEXT: vse16.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_zero_undef_v8i16: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RVD-NEXT: vle16.v v8, (a0) +; RVD-NEXT: vrsub.vi v9, v8, 0 +; RVD-NEXT: vand.vv v8, v8, v9 +; RVD-NEXT: vfwcvt.f.xu.v v10, v8 +; RVD-NEXT: vnsrl.wi v8, v10, 23 +; RVD-NEXT: li a1, 127 +; RVD-NEXT: vsub.vx v8, v8, a1 +; RVD-NEXT: vse16.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v8i16: ; ZVBB: # %bb.0: @@ -1422,139 +950,65 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { } define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i32: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i32: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v4i32: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v4i32: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v4i32: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v4i32: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64D-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1023 -; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_zero_undef_v4i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v8, v10, a1 -; LMULMAX8-NEXT: li a1, 1023 -; LMULMAX8-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_zero_undef_v4i32: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVI-NEXT: vle32.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v9, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v9 +; RVI-NEXT: vsrl.vi v9, v8, 1 +; RVI-NEXT: lui a1, 349525 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v9, v9, a1 +; RVI-NEXT: vsub.vv v8, v8, v9 +; RVI-NEXT: lui a1, 209715 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v9, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v9, v8 +; RVI-NEXT: vsrl.vi v9, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v9 +; RVI-NEXT: lui a1, 61681 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: lui a1, 4112 +; RVI-NEXT: addi a1, a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 24 +; RVI-NEXT: vse32.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_zero_undef_v4i32: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVF-NEXT: vle32.v v8, (a0) +; RVF-NEXT: vrsub.vi v9, v8, 0 +; RVF-NEXT: vand.vv v8, v8, v9 +; RVF-NEXT: fsrmi a1, 1 +; RVF-NEXT: vfcvt.f.xu.v v8, v8 +; RVF-NEXT: fsrm a1 +; RVF-NEXT: vsrl.vi v8, v8, 23 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v8, v8, a1 +; RVF-NEXT: vse32.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_zero_undef_v4i32: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RVD-NEXT: vle32.v v8, (a0) +; RVD-NEXT: vrsub.vi v9, v8, 0 +; RVD-NEXT: vand.vv v8, v8, v9 +; RVD-NEXT: vfwcvt.f.xu.v v10, v8 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vnsrl.wx v8, v10, a1 +; RVD-NEXT: li a1, 1023 +; RVD-NEXT: vsub.vx v8, v8, a1 +; RVD-NEXT: vse32.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v4i32: ; ZVBB: # %bb.0: @@ -1571,171 +1025,139 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { } define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v2i64: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: li a1, 56 -; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v2i64: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 56 -; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v2i64: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v9, v8 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v9, a1 -; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v2i64: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vwsubu.vx v9, v8, a1 -; LMULMAX2-RV64F-NEXT: vse64.v v9, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v2i64: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v2i64: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1023 -; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_zero_undef_v2i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-NEXT: fsrmi a1, 1 -; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-NEXT: fsrm a1 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 1023 -; LMULMAX8-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret +; RV32I-LABEL: cttz_zero_undef_v2i64: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vle64.v v8, (a0) +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: vsub.vx v9, v8, a1 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v9, v9, v10 +; RV32I-NEXT: vsub.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v10, v8, v9 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a1 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v9 +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a1 +; RV32I-NEXT: vse64.v v8, (a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: cttz_zero_undef_v2i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64I-NEXT: vle64.v v8, (a0) +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: vsub.vx v9, v8, a1 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v9 +; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v9, v9, a1 +; RV64I-NEXT: vsub.vv v8, v8, v9 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v9, v8 +; RV64I-NEXT: vsrl.vi v9, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a1 +; RV64I-NEXT: vse64.v v8, (a0) +; RV64I-NEXT: ret +; +; RV32F-LABEL: cttz_zero_undef_v2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32F-NEXT: vle64.v v8, (a0) +; RV32F-NEXT: vrsub.vi v9, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v9 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a1, 1 +; RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; RV32F-NEXT: fsrm a1 +; RV32F-NEXT: vsrl.vi v8, v9, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32F-NEXT: vzext.vf2 v9, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v9, a1 +; RV32F-NEXT: vse64.v v8, (a0) +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_v2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64F-NEXT: vle64.v v8, (a0) +; RV64F-NEXT: vrsub.vi v9, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v9 +; RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64F-NEXT: fsrmi a1, 1 +; RV64F-NEXT: vfncvt.f.xu.w v9, v8 +; RV64F-NEXT: fsrm a1 +; RV64F-NEXT: vsrl.vi v8, v9, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v9, v8, a1 +; RV64F-NEXT: vse64.v v9, (a0) +; RV64F-NEXT: ret +; +; RVD-LABEL: cttz_zero_undef_v2i64: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVD-NEXT: vle64.v v8, (a0) +; RVD-NEXT: vrsub.vi v9, v8, 0 +; RVD-NEXT: vand.vv v8, v8, v9 +; RVD-NEXT: fsrmi a1, 1 +; RVD-NEXT: vfcvt.f.xu.v v8, v8 +; RVD-NEXT: fsrm a1 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vsrl.vx v8, v8, a1 +; RVD-NEXT: li a1, 1023 +; RVD-NEXT: vsub.vx v8, v8, a1 +; RVD-NEXT: vse64.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v2i64: ; ZVBB: # %bb.0: @@ -1752,86 +1174,65 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { } define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { -; LMULMAX2-LABEL: cttz_zero_undef_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: li a1, 1 -; LMULMAX2-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-NEXT: vnot.v v8, v8 -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: li a1, 85 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: li a1, 51 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vand.vi v8, v8, 15 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: cttz_zero_undef_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle8.v v8, (a1) -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: li a2, 1 -; LMULMAX1-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: li a3, 85 -; LMULMAX1-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: li a4, 51 -; LMULMAX1-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: vand.vi v8, v8, 15 -; LMULMAX1-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-NEXT: vnot.v v9, v9 -; LMULMAX1-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vi v9, v9, 15 -; LMULMAX1-NEXT: vse8.v v9, (a0) -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: cttz_zero_undef_v32i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; LMULMAX8-NEXT: vzext.vf2 v12, v8 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v16, v12 -; LMULMAX8-NEXT: vnsrl.wi v8, v16, 23 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX8-NEXT: li a1, 127 -; LMULMAX8-NEXT: vsub.vx v8, v12, a1 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_zero_undef_v32i8: +; RVI: # %bb.0: +; RVI-NEXT: li a1, 32 +; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RVI-NEXT: vle8.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v10, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: li a1, 85 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: li a1, 51 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: vand.vi v8, v8, 15 +; RVI-NEXT: vse8.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_zero_undef_v32i8: +; RVF: # %bb.0: +; RVF-NEXT: li a1, 32 +; RVF-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RVF-NEXT: vle8.v v8, (a0) +; RVF-NEXT: vrsub.vi v10, v8, 0 +; RVF-NEXT: vand.vv v8, v8, v10 +; RVF-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RVF-NEXT: vzext.vf2 v12, v8 +; RVF-NEXT: vfwcvt.f.xu.v v16, v12 +; RVF-NEXT: vnsrl.wi v8, v16, 23 +; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; RVF-NEXT: vnsrl.wi v12, v8, 0 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v8, v12, a1 +; RVF-NEXT: vse8.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_zero_undef_v32i8: +; RVD: # %bb.0: +; RVD-NEXT: li a1, 32 +; RVD-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RVD-NEXT: vle8.v v8, (a0) +; RVD-NEXT: vrsub.vi v10, v8, 0 +; RVD-NEXT: vand.vv v8, v8, v10 +; RVD-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RVD-NEXT: vzext.vf2 v12, v8 +; RVD-NEXT: vfwcvt.f.xu.v v16, v12 +; RVD-NEXT: vnsrl.wi v8, v16, 23 +; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; RVD-NEXT: vnsrl.wi v12, v8, 0 +; RVD-NEXT: li a1, 127 +; RVD-NEXT: vsub.vx v8, v12, a1 +; RVD-NEXT: vse8.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v32i8: ; ZVBB: # %bb.0: @@ -1849,96 +1250,61 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { } define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { -; LMULMAX2-LABEL: cttz_zero_undef_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: li a1, 1 -; LMULMAX2-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-NEXT: vnot.v v8, v8 -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-NEXT: lui a1, 5 -; LMULMAX2-NEXT: addi a1, a1, 1365 -; LMULMAX2-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 3 -; LMULMAX2-NEXT: addi a1, a1, 819 -; LMULMAX2-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 1 -; LMULMAX2-NEXT: addi a1, a1, -241 -; LMULMAX2-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-NEXT: li a1, 257 -; LMULMAX2-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: cttz_zero_undef_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: li a2, 1 -; LMULMAX1-NEXT: vsub.vx v10, v8, a2 -; LMULMAX1-NEXT: vnot.v v8, v8 -; LMULMAX1-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-NEXT: lui a3, 5 -; LMULMAX1-NEXT: addi a3, a3, 1365 -; LMULMAX1-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a4, 3 -; LMULMAX1-NEXT: addi a4, a4, 819 -; LMULMAX1-NEXT: vand.vx v10, v8, a4 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: lui a5, 1 -; LMULMAX1-NEXT: addi a5, a5, -241 -; LMULMAX1-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-NEXT: li a6, 257 -; LMULMAX1-NEXT: vmul.vx v8, v8, a6 -; LMULMAX1-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-NEXT: vsub.vx v10, v9, a2 -; LMULMAX1-NEXT: vnot.v v9, v9 -; LMULMAX1-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v10, v9, a4 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-NEXT: vmul.vx v9, v9, a6 -; LMULMAX1-NEXT: vsrl.vi v9, v9, 8 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: cttz_zero_undef_v16i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-NEXT: vnsrl.wi v8, v12, 23 -; LMULMAX8-NEXT: li a1, 127 -; LMULMAX8-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_zero_undef_v16i16: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVI-NEXT: vle16.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v10, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: lui a1, 5 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: lui a1, 3 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: lui a1, 1 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: li a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 8 +; RVI-NEXT: vse16.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_zero_undef_v16i16: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVF-NEXT: vle16.v v8, (a0) +; RVF-NEXT: vrsub.vi v10, v8, 0 +; RVF-NEXT: vand.vv v8, v8, v10 +; RVF-NEXT: vfwcvt.f.xu.v v12, v8 +; RVF-NEXT: vnsrl.wi v8, v12, 23 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v8, v8, a1 +; RVF-NEXT: vse16.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_zero_undef_v16i16: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RVD-NEXT: vle16.v v8, (a0) +; RVD-NEXT: vrsub.vi v10, v8, 0 +; RVD-NEXT: vand.vv v8, v8, v10 +; RVD-NEXT: vfwcvt.f.xu.v v12, v8 +; RVD-NEXT: vnsrl.wi v8, v12, 23 +; RVD-NEXT: li a1, 127 +; RVD-NEXT: vsub.vx v8, v8, a1 +; RVD-NEXT: vse16.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v16i16: ; ZVBB: # %bb.0: @@ -1955,141 +1321,65 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { } define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v8i32: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV32I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v8i32: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64I-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX2-RV64I-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v8i32: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV32F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v8i32: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV64F-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v8i32: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV32D-NEXT: li a1, 127 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v8i32: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64D-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: vsrl.vi v8, v8, 23 -; LMULMAX2-RV64D-NEXT: li a1, 127 -; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_zero_undef_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v8 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vnsrl.wx v8, v12, a1 -; LMULMAX8-NEXT: li a1, 1023 -; LMULMAX8-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret +; RVI-LABEL: cttz_zero_undef_v8i32: +; RVI: # %bb.0: +; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVI-NEXT: vle32.v v8, (a0) +; RVI-NEXT: li a1, 1 +; RVI-NEXT: vsub.vx v10, v8, a1 +; RVI-NEXT: vnot.v v8, v8 +; RVI-NEXT: vand.vv v8, v8, v10 +; RVI-NEXT: vsrl.vi v10, v8, 1 +; RVI-NEXT: lui a1, 349525 +; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vand.vx v10, v10, a1 +; RVI-NEXT: vsub.vv v8, v8, v10 +; RVI-NEXT: lui a1, 209715 +; RVI-NEXT: addi a1, a1, 819 +; RVI-NEXT: vand.vx v10, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 2 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: vadd.vv v8, v10, v8 +; RVI-NEXT: vsrl.vi v10, v8, 4 +; RVI-NEXT: vadd.vv v8, v8, v10 +; RVI-NEXT: lui a1, 61681 +; RVI-NEXT: addi a1, a1, -241 +; RVI-NEXT: vand.vx v8, v8, a1 +; RVI-NEXT: lui a1, 4112 +; RVI-NEXT: addi a1, a1, 257 +; RVI-NEXT: vmul.vx v8, v8, a1 +; RVI-NEXT: vsrl.vi v8, v8, 24 +; RVI-NEXT: vse32.v v8, (a0) +; RVI-NEXT: ret +; +; RVF-LABEL: cttz_zero_undef_v8i32: +; RVF: # %bb.0: +; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVF-NEXT: vle32.v v8, (a0) +; RVF-NEXT: vrsub.vi v10, v8, 0 +; RVF-NEXT: vand.vv v8, v8, v10 +; RVF-NEXT: fsrmi a1, 1 +; RVF-NEXT: vfcvt.f.xu.v v8, v8 +; RVF-NEXT: fsrm a1 +; RVF-NEXT: vsrl.vi v8, v8, 23 +; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v8, v8, a1 +; RVF-NEXT: vse32.v v8, (a0) +; RVF-NEXT: ret +; +; RVD-LABEL: cttz_zero_undef_v8i32: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVD-NEXT: vle32.v v8, (a0) +; RVD-NEXT: vrsub.vi v10, v8, 0 +; RVD-NEXT: vand.vv v8, v8, v10 +; RVD-NEXT: vfwcvt.f.xu.v v12, v8 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vnsrl.wx v8, v12, a1 +; RVD-NEXT: li a1, 1023 +; RVD-NEXT: vsub.vx v8, v8, a1 +; RVD-NEXT: vse32.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v8i32: ; ZVBB: # %bb.0: @@ -2106,171 +1396,139 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { } define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { -; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i64: -; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 -; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: li a1, 56 -; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: ret -; -; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i64: -; LMULMAX2-RV64I: # %bb.0: -; LMULMAX2-RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: li a1, 1 -; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64I-NEXT: lui a1, 349525 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 -; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 209715 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 -; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, 61681 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: lui a1, 4112 -; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64I-NEXT: add a1, a1, a2 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: li a1, 56 -; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64I-NEXT: ret -; -; LMULMAX2-RV32F-LABEL: cttz_zero_undef_v4i64: -; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 -; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v8 -; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v10, a1 -; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: ret -; -; LMULMAX2-RV64F-LABEL: cttz_zero_undef_v4i64: -; LMULMAX2-RV64F: # %bb.0: -; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV64F-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 -; LMULMAX2-RV64F-NEXT: li a1, 127 -; LMULMAX2-RV64F-NEXT: vwsubu.vx v10, v8, a1 -; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0) -; LMULMAX2-RV64F-NEXT: ret -; -; LMULMAX2-RV32D-LABEL: cttz_zero_undef_v4i64: -; LMULMAX2-RV32D: # %bb.0: -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV32D-NEXT: fsrm a1 -; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: ret -; -; LMULMAX2-RV64D-LABEL: cttz_zero_undef_v4i64: -; LMULMAX2-RV64D: # %bb.0: -; LMULMAX2-RV64D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX2-RV64D-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64D-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX2-RV64D-NEXT: fsrm a1 -; LMULMAX2-RV64D-NEXT: li a1, 52 -; LMULMAX2-RV64D-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: li a1, 1023 -; LMULMAX2-RV64D-NEXT: vsub.vx v8, v8, a1 -; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64D-NEXT: ret -; -; LMULMAX8-LABEL: cttz_zero_undef_v4i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-NEXT: fsrmi a1, 1 -; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-NEXT: fsrm a1 -; LMULMAX8-NEXT: li a1, 52 -; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-NEXT: li a1, 1023 -; LMULMAX8-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret +; RV32I-LABEL: cttz_zero_undef_v4i64: +; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vle64.v v8, (a0) +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: vsub.vx v10, v8, a1 +; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: lui a1, 349525 +; RV32I-NEXT: addi a1, a1, 1365 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v12, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v10, v10, v12 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v12, v8, v10 +; RV32I-NEXT: vsrl.vi v8, v8, 2 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a1 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: vsrl.vx v8, v8, a1 +; RV32I-NEXT: vse64.v v8, (a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: cttz_zero_undef_v4i64: +; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64I-NEXT: vle64.v v8, (a0) +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: vsub.vx v10, v8, a1 +; RV64I-NEXT: vnot.v v8, v8 +; RV64I-NEXT: vand.vv v8, v8, v10 +; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw a1, a1, 1365 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v10, v10, a1 +; RV64I-NEXT: vsub.vv v8, v8, v10 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: vsrl.vi v8, v8, 2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vadd.vv v8, v10, v8 +; RV64I-NEXT: vsrl.vi v10, v8, 4 +; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: vsrl.vx v8, v8, a1 +; RV64I-NEXT: vse64.v v8, (a0) +; RV64I-NEXT: ret +; +; RV32F-LABEL: cttz_zero_undef_v4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32F-NEXT: vle64.v v8, (a0) +; RV32F-NEXT: vrsub.vi v10, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v10 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a1, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: fsrm a1 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32F-NEXT: vzext.vf2 v10, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v10, a1 +; RV32F-NEXT: vse64.v v8, (a0) +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_v4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64F-NEXT: vle64.v v8, (a0) +; RV64F-NEXT: vrsub.vi v10, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v10 +; RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64F-NEXT: fsrmi a1, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: fsrm a1 +; RV64F-NEXT: vsrl.vi v8, v10, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v10, v8, a1 +; RV64F-NEXT: vse64.v v10, (a0) +; RV64F-NEXT: ret +; +; RVD-LABEL: cttz_zero_undef_v4i64: +; RVD: # %bb.0: +; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RVD-NEXT: vle64.v v8, (a0) +; RVD-NEXT: vrsub.vi v10, v8, 0 +; RVD-NEXT: vand.vv v8, v8, v10 +; RVD-NEXT: fsrmi a1, 1 +; RVD-NEXT: vfcvt.f.xu.v v8, v8 +; RVD-NEXT: fsrm a1 +; RVD-NEXT: li a1, 52 +; RVD-NEXT: vsrl.vx v8, v8, a1 +; RVD-NEXT: li a1, 1023 +; RVD-NEXT: vsub.vx v8, v8, a1 +; RVD-NEXT: vse64.v v8, (a0) +; RVD-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v4i64: ; ZVBB: # %bb.0: @@ -2286,7 +1544,5 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ret void } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; LMULMAX1-RV32: {{.*}} -; LMULMAX1-RV64: {{.*}} -; LMULMAX2-RV32: {{.*}} -; LMULMAX2-RV64: {{.*}} +; RV32D: {{.*}} +; RV64D: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll index 4aaefb24d5aa27..369f90521cf00a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s define <2 x i16> @sextload_v2i1_v2i16(ptr %x) { ; CHECK-LABEL: sextload_v2i1_v2i16: @@ -138,46 +136,24 @@ define <4 x i32> @zextload_v4i8_v4i32(ptr %x) { } define <4 x i64> @sextload_v4i8_v4i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v4i8_v4i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v9, v8 -; LMULMAX1-NEXT: vsext.vf8 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v4i8_v4i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX4-NEXT: vle8.v v10, (a0) -; LMULMAX4-NEXT: vsext.vf8 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v4i8_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vsext.vf8 v8, v10 +; CHECK-NEXT: ret %y = load <4 x i8>, ptr %x %z = sext <4 x i8> %y to <4 x i64> ret <4 x i64> %z } define <4 x i64> @zextload_v4i8_v4i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v4i8_v4i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v9, v8 -; LMULMAX1-NEXT: vzext.vf8 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v4i8_v4i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX4-NEXT: vle8.v v10, (a0) -; LMULMAX4-NEXT: vzext.vf8 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v4i8_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vzext.vf8 v8, v10 +; CHECK-NEXT: ret %y = load <4 x i8>, ptr %x %z = zext <4 x i8> %y to <4 x i64> ret <4 x i64> %z @@ -208,324 +184,120 @@ define <8 x i16> @zextload_v8i8_v8i16(ptr %x) { } define <8 x i32> @sextload_v8i8_v8i32(ptr %x) { -; LMULMAX1-LABEL: sextload_v8i8_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v8i8_v8i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vle8.v v10, (a0) -; LMULMAX4-NEXT: vsext.vf4 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v8i8_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vsext.vf4 v8, v10 +; CHECK-NEXT: ret %y = load <8 x i8>, ptr %x %z = sext <8 x i8> %y to <8 x i32> ret <8 x i32> %z } define <8 x i32> @zextload_v8i8_v8i32(ptr %x) { -; LMULMAX1-LABEL: zextload_v8i8_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v8 -; LMULMAX1-NEXT: vzext.vf4 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v8i8_v8i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vle8.v v10, (a0) -; LMULMAX4-NEXT: vzext.vf4 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v8i8_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vzext.vf4 v8, v10 +; CHECK-NEXT: ret %y = load <8 x i8>, ptr %x %z = zext <8 x i8> %y to <8 x i32> ret <8 x i32> %z } define <8 x i64> @sextload_v8i8_v8i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v8i8_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v9, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v8i8_v8i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vle8.v v12, (a0) -; LMULMAX4-NEXT: vsext.vf8 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v8i8_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vsext.vf8 v8, v12 +; CHECK-NEXT: ret %y = load <8 x i8>, ptr %x %z = sext <8 x i8> %y to <8 x i64> ret <8 x i64> %z } define <8 x i64> @zextload_v8i8_v8i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v8i8_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v9, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v8i8_v8i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vle8.v v12, (a0) -; LMULMAX4-NEXT: vzext.vf8 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v8i8_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vzext.vf8 v8, v12 +; CHECK-NEXT: ret %y = load <8 x i8>, ptr %x %z = zext <8 x i8> %y to <8 x i64> ret <8 x i64> %z } define <16 x i16> @sextload_v16i8_v16i16(ptr %x) { -; LMULMAX1-LABEL: sextload_v16i8_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 8 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v16i8_v16i16: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vle8.v v10, (a0) -; LMULMAX4-NEXT: vsext.vf2 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v16i8_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vsext.vf2 v8, v10 +; CHECK-NEXT: ret %y = load <16 x i8>, ptr %x %z = sext <16 x i8> %y to <16 x i16> ret <16 x i16> %z } define <16 x i16> @zextload_v16i8_v16i16(ptr %x) { -; LMULMAX1-LABEL: zextload_v16i8_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 8 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v16i8_v16i16: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vle8.v v10, (a0) -; LMULMAX4-NEXT: vzext.vf2 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v16i8_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vzext.vf2 v8, v10 +; CHECK-NEXT: ret %y = load <16 x i8>, ptr %x %z = zext <16 x i8> %y to <16 x i16> ret <16 x i16> %z } define <16 x i32> @sextload_v16i8_v16i32(ptr %x) { -; LMULMAX1-LABEL: sextload_v16i8_v16i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v9, 8 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v11, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v16i8_v16i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle8.v v12, (a0) -; LMULMAX4-NEXT: vsext.vf4 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v16i8_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vsext.vf4 v8, v12 +; CHECK-NEXT: ret %y = load <16 x i8>, ptr %x %z = sext <16 x i8> %y to <16 x i32> ret <16 x i32> %z } define <16 x i32> @zextload_v16i8_v16i32(ptr %x) { -; LMULMAX1-LABEL: zextload_v16i8_v16i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v9, 8 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v11, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v16i8_v16i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle8.v v12, (a0) -; LMULMAX4-NEXT: vzext.vf4 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v16i8_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vzext.vf4 v8, v12 +; CHECK-NEXT: ret %y = load <16 x i8>, ptr %x %z = zext <16 x i8> %y to <16 x i32> ret <16 x i32> %z } define <16 x i64> @sextload_v16i8_v16i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v16i8_v16i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v10, 8 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v12, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v13, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v9, v13 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v14, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v13, v14 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v11, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v14, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v15, v11 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v16, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf8 v11, v16 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v16i8_v16i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX4-NEXT: vle8.v v16, (a0) -; LMULMAX4-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vsext.vf8 v12, v8 -; LMULMAX4-NEXT: vsext.vf8 v8, v16 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v16i8_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vsext.vf8 v8, v16 +; CHECK-NEXT: ret %y = load <16 x i8>, ptr %x %z = sext <16 x i8> %y to <16 x i64> ret <16 x i64> %z } define <16 x i64> @zextload_v16i8_v16i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v16i8_v16i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v10, 8 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v12, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v13, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v9, v13 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v14, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v13, v14 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v11, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v14, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v15, v11 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v16, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf8 v11, v16 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v16i8_v16i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX4-NEXT: vle8.v v16, (a0) -; LMULMAX4-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vzext.vf8 v12, v8 -; LMULMAX4-NEXT: vzext.vf8 v8, v16 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v16i8_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vzext.vf8 v8, v16 +; CHECK-NEXT: ret %y = load <16 x i8>, ptr %x %z = zext <16 x i8> %y to <16 x i64> ret <16 x i64> %z @@ -649,46 +421,24 @@ define <4 x i32> @zextload_v4i16_v4i32(ptr %x) { } define <4 x i64> @sextload_v4i16_v4i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v4i16_v4i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v4i16_v4i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX4-NEXT: vle16.v v10, (a0) -; LMULMAX4-NEXT: vsext.vf4 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v4i16_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vsext.vf4 v8, v10 +; CHECK-NEXT: ret %y = load <4 x i16>, ptr %x %z = sext <4 x i16> %y to <4 x i64> ret <4 x i64> %z } define <4 x i64> @zextload_v4i16_v4i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v4i16_v4i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v8 -; LMULMAX1-NEXT: vzext.vf4 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v4i16_v4i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX4-NEXT: vle16.v v10, (a0) -; LMULMAX4-NEXT: vzext.vf4 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v4i16_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vzext.vf4 v8, v10 +; CHECK-NEXT: ret %y = load <4 x i16>, ptr %x %z = zext <4 x i16> %y to <4 x i64> ret <4 x i64> %z @@ -707,294 +457,108 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, ptr %z) { } define <8 x i32> @sextload_v8i16_v8i32(ptr %x) { -; LMULMAX1-LABEL: sextload_v8i16_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v8i16_v8i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vle16.v v10, (a0) -; LMULMAX4-NEXT: vsext.vf2 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v8i16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vsext.vf2 v8, v10 +; CHECK-NEXT: ret %y = load <8 x i16>, ptr %x %z = sext <8 x i16> %y to <8 x i32> ret <8 x i32> %z } define <8 x i32> @zextload_v8i16_v8i32(ptr %x) { -; LMULMAX1-LABEL: zextload_v8i16_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v8i16_v8i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vle16.v v10, (a0) -; LMULMAX4-NEXT: vzext.vf2 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v8i16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vzext.vf2 v8, v10 +; CHECK-NEXT: ret %y = load <8 x i16>, ptr %x %z = zext <8 x i16> %y to <8 x i32> ret <8 x i32> %z } define <8 x i64> @sextload_v8i16_v8i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v8i16_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v8i16_v8i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vle16.v v12, (a0) -; LMULMAX4-NEXT: vsext.vf4 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v8i16_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vsext.vf4 v8, v12 +; CHECK-NEXT: ret %y = load <8 x i16>, ptr %x %z = sext <8 x i16> %y to <8 x i64> ret <8 x i64> %z } define <8 x i64> @zextload_v8i16_v8i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v8i16_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v8i16_v8i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vle16.v v12, (a0) -; LMULMAX4-NEXT: vzext.vf4 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v8i16_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vzext.vf4 v8, v12 +; CHECK-NEXT: ret %y = load <8 x i16>, ptr %x %z = zext <8 x i16> %y to <8 x i64> ret <8 x i64> %z } define void @truncstore_v16i16_v16i8(<16 x i16> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v16i16_v16i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 8 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v16i16_v16i8: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX4-NEXT: vse8.v v10, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v16i16_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vse8.v v10, (a0) +; CHECK-NEXT: ret %y = trunc <16 x i16> %x to <16 x i8> store <16 x i8> %y, ptr %z ret void } define <16 x i32> @sextload_v16i16_v16i32(ptr %x) { -; LMULMAX1-LABEL: sextload_v16i16_v16i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle16.v v12, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v12, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v11, v10 -; LMULMAX1-NEXT: vsext.vf2 v10, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v16i16_v16i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle16.v v12, (a0) -; LMULMAX4-NEXT: vsext.vf2 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v16i16_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vsext.vf2 v8, v12 +; CHECK-NEXT: ret %y = load <16 x i16>, ptr %x %z = sext <16 x i16> %y to <16 x i32> ret <16 x i32> %z } define <16 x i32> @zextload_v16i16_v16i32(ptr %x) { -; LMULMAX1-LABEL: zextload_v16i16_v16i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle16.v v12, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v12, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v11, v10 -; LMULMAX1-NEXT: vzext.vf2 v10, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v16i16_v16i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle16.v v12, (a0) -; LMULMAX4-NEXT: vzext.vf2 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v16i16_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vzext.vf2 v8, v12 +; CHECK-NEXT: ret %y = load <16 x i16>, ptr %x %z = zext <16 x i16> %y to <16 x i32> ret <16 x i32> %z } define <16 x i64> @sextload_v16i16_v16i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v16i16_v16i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle16.v v13, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v12 -; LMULMAX1-NEXT: vsext.vf4 v12, v13 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v15, v13, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v14, v15 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v16, v15, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v15, v16 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v16, v13, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v13, v16 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v16i16_v16i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vle16.v v16, (a0) -; LMULMAX4-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vsext.vf4 v12, v8 -; LMULMAX4-NEXT: vsext.vf4 v8, v16 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v16i16_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vsext.vf4 v8, v16 +; CHECK-NEXT: ret %y = load <16 x i16>, ptr %x %z = sext <16 x i16> %y to <16 x i64> ret <16 x i64> %z } define <16 x i64> @zextload_v16i16_v16i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v16i16_v16i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle16.v v13, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v11, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v9, v12 -; LMULMAX1-NEXT: vzext.vf4 v12, v13 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v15, v13, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v14, v15 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v16, v15, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v15, v16 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v16, v13, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf4 v13, v16 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v16i16_v16i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vle16.v v16, (a0) -; LMULMAX4-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vzext.vf4 v12, v8 -; LMULMAX4-NEXT: vzext.vf4 v8, v16 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v16i16_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vzext.vf4 v8, v16 +; CHECK-NEXT: ret %y = load <16 x i16>, ptr %x %z = zext <16 x i16> %y to <16 x i64> ret <16 x i64> %z @@ -1077,323 +641,124 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, ptr %z) { } define <4 x i64> @sextload_v4i32_v4i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v4i32_v4i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v4i32_v4i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX4-NEXT: vle32.v v10, (a0) -; LMULMAX4-NEXT: vsext.vf2 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v4i32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vsext.vf2 v8, v10 +; CHECK-NEXT: ret %y = load <4 x i32>, ptr %x %z = sext <4 x i32> %y to <4 x i64> ret <4 x i64> %z } define <4 x i64> @zextload_v4i32_v4i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v4i32_v4i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v10, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v4i32_v4i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX4-NEXT: vle32.v v10, (a0) -; LMULMAX4-NEXT: vzext.vf2 v8, v10 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v4i32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vzext.vf2 v8, v10 +; CHECK-NEXT: ret %y = load <4 x i32>, ptr %x %z = zext <4 x i32> %y to <4 x i64> ret <4 x i64> %z } define void @truncstore_v8i32_v8i8(<8 x i32> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v8i32_v8i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v8i32_v8i8: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX4-NEXT: vse8.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v8i32_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %y = trunc <8 x i32> %x to <8 x i8> store <8 x i8> %y, ptr %z ret void } define void @truncstore_v8i32_v8i16(<8 x i32> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v8i32_v8i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v8i32_v8i16: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX4-NEXT: vse16.v v10, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v8i32_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %y = trunc <8 x i32> %x to <8 x i16> store <8 x i16> %y, ptr %z ret void } define <8 x i64> @sextload_v8i32_v8i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v8i32_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v12, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v11, v10 -; LMULMAX1-NEXT: vsext.vf2 v10, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v8i32_v8i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v12, (a0) -; LMULMAX4-NEXT: vsext.vf2 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v8i32_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsext.vf2 v8, v12 +; CHECK-NEXT: ret %y = load <8 x i32>, ptr %x %z = sext <8 x i32> %y to <8 x i64> ret <8 x i64> %z } define <8 x i64> @zextload_v8i32_v8i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v8i32_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v12, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v11, v10 -; LMULMAX1-NEXT: vzext.vf2 v10, v12 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v8i32_v8i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v12, (a0) -; LMULMAX4-NEXT: vzext.vf2 v8, v12 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v8i32_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vzext.vf2 v8, v12 +; CHECK-NEXT: ret %y = load <8 x i32>, ptr %x %z = zext <8 x i32> %y to <8 x i64> ret <8 x i64> %z } define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v16i32_v16i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 12, e8, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 8 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 12 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v16i32_v16i8: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v12, 0 -; LMULMAX4-NEXT: vse8.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v16i32_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %y = trunc <16 x i32> %x to <16 x i8> store <16 x i8> %y, ptr %z ret void } define void @truncstore_v16i32_v16i16(<16 x i32> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v16i32_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v10, (a1) -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v16i32_v16i16: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vse16.v v12, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v16i32_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: vse16.v v12, (a0) +; CHECK-NEXT: ret %y = trunc <16 x i32> %x to <16 x i16> store <16 x i16> %y, ptr %z ret void } define <16 x i64> @sextload_v16i32_v16i64(ptr %x) { -; LMULMAX1-LABEL: sextload_v16i32_v16i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v16, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v14, (a1) -; LMULMAX1-NEXT: vle32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v9, v8 -; LMULMAX1-NEXT: vsext.vf2 v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v12, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v11, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v14, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v13, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v16, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v15, v10 -; LMULMAX1-NEXT: vsext.vf2 v10, v12 -; LMULMAX1-NEXT: vsext.vf2 v12, v14 -; LMULMAX1-NEXT: vsext.vf2 v14, v16 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: sextload_v16i32_v16i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v16, (a0) -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vsext.vf2 v12, v8 -; LMULMAX4-NEXT: vsext.vf2 v8, v16 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: sextload_v16i32_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vsext.vf2 v8, v16 +; CHECK-NEXT: ret %y = load <16 x i32>, ptr %x %z = sext <16 x i32> %y to <16 x i64> ret <16 x i64> %z } define <16 x i64> @zextload_v16i32_v16i64(ptr %x) { -; LMULMAX1-LABEL: zextload_v16i32_v16i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v16, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v14, (a1) -; LMULMAX1-NEXT: vle32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v9, v8 -; LMULMAX1-NEXT: vzext.vf2 v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v12, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v11, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v14, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v13, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v16, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v15, v10 -; LMULMAX1-NEXT: vzext.vf2 v10, v12 -; LMULMAX1-NEXT: vzext.vf2 v12, v14 -; LMULMAX1-NEXT: vzext.vf2 v14, v16 -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: zextload_v16i32_v16i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vle32.v v16, (a0) -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; LMULMAX4-NEXT: vslidedown.vi v8, v16, 8 -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vzext.vf2 v12, v8 -; LMULMAX4-NEXT: vzext.vf2 v8, v16 -; LMULMAX4-NEXT: ret +; CHECK-LABEL: zextload_v16i32_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vzext.vf2 v8, v16 +; CHECK-NEXT: ret %y = load <16 x i32>, ptr %x %z = zext <16 x i32> %y to <16 x i64> ret <16 x i64> %z @@ -1442,415 +807,126 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, ptr %z) { } define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v4i64_v4i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v4i64_v4i8: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX4-NEXT: vse8.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v4i64_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %y = trunc <4 x i64> %x to <4 x i8> store <4 x i8> %y, ptr %z ret void } define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v4i64_v4i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v4i64_v4i16: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX4-NEXT: vse16.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v4i64_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %y = trunc <4 x i64> %x to <4 x i16> store <4 x i16> %y, ptr %z ret void } define void @truncstore_v4i64_v4i32(<4 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v4i64_v4i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v4i64_v4i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX4-NEXT: vse32.v v10, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v4i64_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret %y = trunc <4 x i64> %x to <4 x i32> store <4 x i32> %y, ptr %z ret void } define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v8i64_v8i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v8i64_v8i8: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v12, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX4-NEXT: vse8.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %y = trunc <8 x i64> %x to <8 x i8> store <8 x i8> %y, ptr %z ret void } define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v8i64_v8i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v8i64_v8i16: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v12, 0 -; LMULMAX4-NEXT: vse16.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v8i64_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %y = trunc <8 x i64> %x to <8 x i16> store <8 x i16> %y, ptr %z ret void } define void @truncstore_v8i64_v8i32(<8 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v8i64_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v10, (a1) -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v8i64_v8i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vse32.v v12, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v8i64_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: vse32.v v12, (a0) +; CHECK-NEXT: ret %y = trunc <8 x i64> %x to <8 x i32> store <8 x i32> %y, ptr %z ret void } define void @truncstore_v16i64_v16i8(<16 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v16i64_v16i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 6, e8, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v12, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 10, e8, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 8 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 12, e8, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 10 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v14, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 14, e8, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 12 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 14 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v16i64_v16i8: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v12, v16, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v12, v12, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 -; LMULMAX4-NEXT: vse8.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v16i64_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vse8.v v10, (a0) +; CHECK-NEXT: ret %y = trunc <16 x i64> %x to <16 x i8> store <16 x i8> %y, ptr %z ret void } define void @truncstore_v16i64_v16i16(<16 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v16i64_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v14, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v10, v9, 6 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v10, (a1) -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v16i64_v16i16: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v12, v16, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0 -; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 -; LMULMAX4-NEXT: vse16.v v8, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v16i64_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %y = trunc <16 x i64> %x to <16 x i16> store <16 x i16> %y, ptr %z ret void } define void @truncstore_v16i64_v16i32(<16 x i64> %x, ptr %z) { -; LMULMAX1-LABEL: truncstore_v16i64_v16i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 -; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v11, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 -; LMULMAX1-NEXT: vnsrl.wi v12, v14, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v12, v9, 2 -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vse32.v v12, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vse32.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v10, (a1) -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret -; -; LMULMAX4-LABEL: truncstore_v16i64_v16i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0 -; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vslideup.vi v12, v16, 8 -; LMULMAX4-NEXT: vse32.v v12, (a0) -; LMULMAX4-NEXT: ret +; CHECK-LABEL: truncstore_v16i64_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0 +; CHECK-NEXT: vse32.v v16, (a0) +; CHECK-NEXT: ret %y = trunc <16 x i64> %x to <16 x i32> store <16 x i32> %y, ptr %z ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll index 88a86bbdab9cd6..d0dc70fd81151d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s define void @fpext_v2f16_v2f32(ptr %x, ptr %y) { ; CHECK-LABEL: fpext_v2f16_v2f32: @@ -35,28 +33,13 @@ define void @fpext_v2f16_v2f64(ptr %x, ptr %y) { } define void @fpext_v8f16_v8f32(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fpext_v8f16_v8f32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.f.v v10, v8 -; LMULMAX8-NEXT: vse32.v v10, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fpext_v8f16_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8 -; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fpext_v8f16_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vse32.v v10, (a1) +; CHECK-NEXT: ret %a = load <8 x half>, ptr %x %d = fpext <8 x half> %a to <8 x float> store <8 x float> %d, ptr %y @@ -64,50 +47,15 @@ define void @fpext_v8f16_v8f32(ptr %x, ptr %y) { } define void @fpext_v8f16_v8f64(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fpext_v8f16_v8f64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.f.f.v v10, v8 -; LMULMAX8-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; LMULMAX8-NEXT: vfwcvt.f.f.v v12, v10 -; LMULMAX8-NEXT: vse64.v v12, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fpext_v8f16_v8f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v9 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v9, v10 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v11, v10 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v12, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.f.f.v v8, v10 -; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vse64.v v12, (a0) -; LMULMAX1-NEXT: vse64.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vse64.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fpext_v8f16_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v12, v10 +; CHECK-NEXT: vse64.v v12, (a1) +; CHECK-NEXT: ret %a = load <8 x half>, ptr %x %d = fpext <8 x half> %a to <8 x double> store <8 x double> %d, ptr %y @@ -145,26 +93,13 @@ define void @fpround_v2f64_v2f16(ptr %x, ptr %y) { } define void @fpround_v8f32_v8f16(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fpround_v8f32_v8f16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfncvt.f.f.w v10, v8 -; LMULMAX8-NEXT: vse16.v v10, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fpround_v8f32_v8f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: vle32.v v9, (a2) -; LMULMAX1-NEXT: vfncvt.f.f.w v10, v8 -; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 -; LMULMAX1-NEXT: vse16.v v10, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fpround_v8f32_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfncvt.f.f.w v10, v8 +; CHECK-NEXT: vse16.v v10, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %d = fptrunc <8 x float> %a to <8 x half> store <8 x half> %d, ptr %y @@ -172,49 +107,15 @@ define void @fpround_v8f32_v8f16(ptr %x, ptr %y) { } define void @fpround_v8f64_v8f16(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fpround_v8f64_v8f16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vfncvt.rod.f.f.w v12, v8 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX8-NEXT: vfncvt.f.f.w v8, v12 -; LMULMAX8-NEXT: vse16.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fpround_v8f64_v8f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a2, a0, 48 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle64.v v8, (a2) -; LMULMAX1-NEXT: addi a2, a0, 32 -; LMULMAX1-NEXT: vle64.v v9, (a0) -; LMULMAX1-NEXT: vle64.v v10, (a2) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle64.v v11, (a0) -; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v9 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v12 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v11 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rod.f.f.w v11, v10 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v8 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 -; LMULMAX1-NEXT: vse16.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fpround_v8f64_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v12 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %d = fptrunc <8 x double> %a to <8 x half> store <8 x half> %d, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll index 1ccb089bbff424..dc907eed16cce6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,LMULMAX2,RV32-LMULMAX2 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,LMULMAX2,RV64-LMULMAX2 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,LMULMAX1,RV32-LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,LMULMAX1,RV64-LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 define void @splat_v8f16(ptr %x, half %y) { ; CHECK-LABEL: splat_v8f16: @@ -44,21 +42,12 @@ define void @splat_v2f64(ptr %x, double %y) { } define void @splat_16f16(ptr %x, half %y) { -; LMULMAX2-LABEL: splat_16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vfmv.v.f v8, fa0 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_16f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vfmv.v.f v8, fa0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <16 x half> poison, half %y, i32 0 %b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer store <16 x half> %b, ptr %x @@ -66,21 +55,12 @@ define void @splat_16f16(ptr %x, half %y) { } define void @splat_v8f32(ptr %x, float %y) { -; LMULMAX2-LABEL: splat_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vfmv.v.f v8, fa0 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vfmv.v.f v8, fa0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <8 x float> poison, float %y, i32 0 %b = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer store <8 x float> %b, ptr %x @@ -88,21 +68,12 @@ define void @splat_v8f32(ptr %x, float %y) { } define void @splat_v4f64(ptr %x, double %y) { -; LMULMAX2-LABEL: splat_v4f64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vfmv.v.f v8, fa0 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_v4f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vfmv.v.f v8, fa0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse64.v v8, (a1) -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <4 x double> poison, double %y, i32 0 %b = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> zeroinitializer store <4 x double> %b, ptr %x @@ -149,21 +120,12 @@ define void @splat_zero_v2f64(ptr %x) { } define void @splat_zero_16f16(ptr %x) { -; LMULMAX2-LABEL: splat_zero_16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_zero_16f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_zero_16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <16 x half> poison, half 0.0, i32 0 %b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer store <16 x half> %b, ptr %x @@ -171,21 +133,12 @@ define void @splat_zero_16f16(ptr %x) { } define void @splat_zero_v8f32(ptr %x) { -; LMULMAX2-LABEL: splat_zero_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_zero_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_zero_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <8 x float> poison, float 0.0, i32 0 %b = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer store <8 x float> %b, ptr %x @@ -193,21 +146,12 @@ define void @splat_zero_v8f32(ptr %x) { } define void @splat_zero_v4f64(ptr %x) { -; LMULMAX2-LABEL: splat_zero_v4f64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_zero_v4f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_zero_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <4 x double> poison, double 0.0, i32 0 %b = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> zeroinitializer store <4 x double> %b, ptr %x @@ -267,23 +211,13 @@ define void @splat_negzero_v2f64(ptr %x) { } define void @splat_negzero_16f16(ptr %x) { -; LMULMAX2-LABEL: splat_negzero_16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: lui a1, 1048568 -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.x v8, a1 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_negzero_16f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: lui a1, 1048568 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.x v8, a1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_negzero_16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <16 x half> poison, half -0.0, i32 0 %b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer store <16 x half> %b, ptr %x @@ -291,23 +225,13 @@ define void @splat_negzero_16f16(ptr %x) { } define void @splat_negzero_v8f32(ptr %x) { -; LMULMAX2-LABEL: splat_negzero_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: lui a1, 524288 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.x v8, a1 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_negzero_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: lui a1, 524288 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.x v8, a1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_negzero_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <8 x float> poison, float -0.0, i32 0 %b = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer store <8 x float> %b, ptr %x @@ -315,45 +239,23 @@ define void @splat_negzero_v8f32(ptr %x) { } define void @splat_negzero_v4f64(ptr %x) { -; RV32-LMULMAX2-LABEL: splat_negzero_v4f64: -; RV32-LMULMAX2: # %bb.0: -; RV32-LMULMAX2-NEXT: fcvt.d.w fa5, zero -; RV32-LMULMAX2-NEXT: fneg.d fa5, fa5 -; RV32-LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-LMULMAX2-NEXT: vfmv.v.f v8, fa5 -; RV32-LMULMAX2-NEXT: vse64.v v8, (a0) -; RV32-LMULMAX2-NEXT: ret -; -; RV64-LMULMAX2-LABEL: splat_negzero_v4f64: -; RV64-LMULMAX2: # %bb.0: -; RV64-LMULMAX2-NEXT: li a1, -1 -; RV64-LMULMAX2-NEXT: slli a1, a1, 63 -; RV64-LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-LMULMAX2-NEXT: vmv.v.x v8, a1 -; RV64-LMULMAX2-NEXT: vse64.v v8, (a0) -; RV64-LMULMAX2-NEXT: ret -; -; RV32-LMULMAX1-LABEL: splat_negzero_v4f64: -; RV32-LMULMAX1: # %bb.0: -; RV32-LMULMAX1-NEXT: fcvt.d.w fa5, zero -; RV32-LMULMAX1-NEXT: fneg.d fa5, fa5 -; RV32-LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-LMULMAX1-NEXT: vfmv.v.f v8, fa5 -; RV32-LMULMAX1-NEXT: addi a1, a0, 16 -; RV32-LMULMAX1-NEXT: vse64.v v8, (a1) -; RV32-LMULMAX1-NEXT: vse64.v v8, (a0) -; RV32-LMULMAX1-NEXT: ret +; CHECK-RV32-LABEL: splat_negzero_v4f64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: fcvt.d.w fa5, zero +; CHECK-RV32-NEXT: fneg.d fa5, fa5 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vfmv.v.f v8, fa5 +; CHECK-RV32-NEXT: vse64.v v8, (a0) +; CHECK-RV32-NEXT: ret ; -; RV64-LMULMAX1-LABEL: splat_negzero_v4f64: -; RV64-LMULMAX1: # %bb.0: -; RV64-LMULMAX1-NEXT: li a1, -1 -; RV64-LMULMAX1-NEXT: slli a1, a1, 63 -; RV64-LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-LMULMAX1-NEXT: vmv.v.x v8, a1 -; RV64-LMULMAX1-NEXT: addi a1, a0, 16 -; RV64-LMULMAX1-NEXT: vse64.v v8, (a1) -; RV64-LMULMAX1-NEXT: vse64.v v8, (a0) -; RV64-LMULMAX1-NEXT: ret +; CHECK-RV64-LABEL: splat_negzero_v4f64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a1, -1 +; CHECK-RV64-NEXT: slli a1, a1, 63 +; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v8, a1 +; CHECK-RV64-NEXT: vse64.v v8, (a0) +; CHECK-RV64-NEXT: ret %a = insertelement <4 x double> poison, double -0.0, i32 0 %b = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> zeroinitializer store <4 x double> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll index 36294af97469fb..de7dfab1dfcff1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s define void @gather_const_v8f16(ptr %x) { ; CHECK-LABEL: gather_const_v8f16: @@ -52,36 +50,14 @@ define void @gather_const_v2f64(ptr %x) { } define void @gather_const_v64f16(ptr %x) { -; LMULMAX8-LABEL: gather_const_v64f16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 64 -; LMULMAX8-NEXT: addi a2, a0, 94 -; LMULMAX8-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; LMULMAX8-NEXT: vlse16.v v8, (a2), zero -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: gather_const_v64f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: addi a3, a0, 48 -; LMULMAX1-NEXT: addi a4, a0, 32 -; LMULMAX1-NEXT: addi a5, a0, 94 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vlse16.v v8, (a5), zero -; LMULMAX1-NEXT: addi a5, a0, 64 -; LMULMAX1-NEXT: addi a6, a0, 112 -; LMULMAX1-NEXT: addi a7, a0, 96 -; LMULMAX1-NEXT: vse16.v v8, (a7) -; LMULMAX1-NEXT: vse16.v v8, (a6) -; LMULMAX1-NEXT: vse16.v v8, (a5) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: vse16.v v8, (a4) -; LMULMAX1-NEXT: vse16.v v8, (a3) -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a2) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: gather_const_v64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: addi a2, a0, 94 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vlse16.v v8, (a2), zero +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <64 x half>, ptr %x %b = extractelement <64 x half> %a, i32 47 %c = insertelement <64 x half> poison, half %b, i32 0 @@ -91,36 +67,14 @@ define void @gather_const_v64f16(ptr %x) { } define void @gather_const_v32f32(ptr %x) { -; LMULMAX8-LABEL: gather_const_v32f32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: addi a2, a0, 68 -; LMULMAX8-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; LMULMAX8-NEXT: vlse32.v v8, (a2), zero -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: gather_const_v32f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: addi a3, a0, 48 -; LMULMAX1-NEXT: addi a4, a0, 32 -; LMULMAX1-NEXT: addi a5, a0, 68 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vlse32.v v8, (a5), zero -; LMULMAX1-NEXT: addi a5, a0, 80 -; LMULMAX1-NEXT: addi a6, a0, 112 -; LMULMAX1-NEXT: addi a7, a0, 96 -; LMULMAX1-NEXT: vse32.v v8, (a7) -; LMULMAX1-NEXT: vse32.v v8, (a6) -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: vse32.v v8, (a5) -; LMULMAX1-NEXT: vse32.v v8, (a4) -; LMULMAX1-NEXT: vse32.v v8, (a3) -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: vse32.v v8, (a2) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: gather_const_v32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: addi a2, a0, 68 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vlse32.v v8, (a2), zero +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = extractelement <32 x float> %a, i32 17 %c = insertelement <32 x float> poison, float %b, i32 0 @@ -130,34 +84,13 @@ define void @gather_const_v32f32(ptr %x) { } define void @gather_const_v16f64(ptr %x) { -; LMULMAX8-LABEL: gather_const_v16f64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi a1, a0, 80 -; LMULMAX8-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; LMULMAX8-NEXT: vlse64.v v8, (a1), zero -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: gather_const_v16f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: addi a3, a0, 48 -; LMULMAX1-NEXT: addi a4, a0, 32 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vlse64.v v8, (a1), zero -; LMULMAX1-NEXT: addi a5, a0, 64 -; LMULMAX1-NEXT: addi a6, a0, 112 -; LMULMAX1-NEXT: addi a7, a0, 96 -; LMULMAX1-NEXT: vse64.v v8, (a7) -; LMULMAX1-NEXT: vse64.v v8, (a6) -; LMULMAX1-NEXT: vse64.v v8, (a5) -; LMULMAX1-NEXT: vse64.v v8, (a1) -; LMULMAX1-NEXT: vse64.v v8, (a4) -; LMULMAX1-NEXT: vse64.v v8, (a3) -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: vse64.v v8, (a2) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: gather_const_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 80 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vlse64.v v8, (a1), zero +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x double>, ptr %x %b = extractelement <16 x double> %a, i32 10 %c = insertelement <16 x double> poison, double %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index dd79311b78ba79..0f003d7af6100e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1,21 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64 - -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH + +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64 define void @fadd_v8f16(ptr %x, ptr %y) { ; ZVFH-LABEL: fadd_v8f16: @@ -59,79 +53,42 @@ define void @fadd_v6f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fadd_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fadd_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fadd_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fadd_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fadd_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fadd_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fadd <6 x half> %a, %b @@ -222,79 +179,42 @@ define void @fsub_v6f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fsub_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fsub_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fsub_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fsub_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fsub_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fsub_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fsub <6 x half> %a, %b @@ -385,79 +305,42 @@ define void @fmul_v6f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fmul_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fmul_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fmul_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fmul_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fmul_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fmul_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fmul <6 x half> %a, %b @@ -548,79 +431,42 @@ define void @fdiv_v6f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fdiv_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fdiv_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fdiv <6 x half> %a, %b @@ -706,71 +552,38 @@ define void @fneg_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fneg_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fneg_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fneg_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fneg_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fneg_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fneg_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = fneg <6 x half> %a store <6 x half> %b, ptr %x @@ -851,71 +664,38 @@ define void @fabs_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fabs_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fabs_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fabs_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fabs_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fabs_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fabs_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -1004,79 +784,42 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: copysign_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: copysign_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: copysign_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: copysign_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: copysign_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: copysign_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b) @@ -1174,99 +917,52 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: copysign_vf_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: copysign_vf_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: copysign_vf_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: copysign_vf_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: copysign_vf_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: copysign_vf_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1363,99 +1059,52 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: copysign_neg_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfneg.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: copysign_neg_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfneg.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fneg <6 x half> %b @@ -1560,133 +1209,69 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: addi sp, sp, -16 -; ZVFHMINLMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle32.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v10, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, sp, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMINLMULMAX2-RV32-NEXT: fsh fa5, 4(a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: addi sp, sp, 16 -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: addi sp, sp, -16 -; ZVFHMINLMULMAX2-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: mv a2, sp -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a2) -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle32.v v9, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v10, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a1, sp, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMINLMULMAX2-RV64-NEXT: fsh fa5, 4(a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: addi sp, sp, 16 -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: addi sp, sp, -16 -; ZVFHMINLMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle32.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v10, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, sp, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMINLMULMAX1-RV32-NEXT: fsh fa5, 4(a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: addi sp, sp, 16 -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: addi sp, sp, -16 -; ZVFHMINLMULMAX1-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: mv a2, sp -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a2) -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle32.v v9, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v10, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a1, sp, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMINLMULMAX1-RV64-NEXT: fsh fa5, 4(a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: addi sp, sp, 16 -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: addi sp, sp, -16 +; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle32.v v9, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v10, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: addi a1, sp, 8 +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-RV32-NEXT: fsh fa5, 4(a0) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vse32.v v9, (a0) +; ZVFHMIN-RV32-NEXT: addi sp, sp, 16 +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: addi sp, sp, -16 +; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vle64.v v8, (a0) +; ZVFHMIN-RV64-NEXT: mv a2, sp +; ZVFHMIN-RV64-NEXT: vse64.v v8, (a2) +; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle32.v v9, (a1) +; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v10, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: addi a1, sp, 8 +; ZVFHMIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-RV64-NEXT: fsh fa5, 4(a0) +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse32.v v9, (a0) +; ZVFHMIN-RV64-NEXT: addi sp, sp, 16 +; ZVFHMIN-RV64-NEXT: ret %a = load <3 x half>, ptr %x %b = load <3 x float>, ptr %y %c = fneg <3 x float> %b @@ -1755,71 +1340,38 @@ define void @sqrt_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: sqrt_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsqrt.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: sqrt_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsqrt.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: sqrt_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsqrt.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: sqrt_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsqrt.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: sqrt_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfsqrt.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: sqrt_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfsqrt.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -1913,87 +1465,46 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fma_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v9, v8, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fma_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v9, v8, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fma_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v9, v8, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fma_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v9, v8, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fma_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a2) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vle16.v v10, (a1) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v8, v11 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fma_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vle16.v v10, (a1) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v8, v11 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -2104,107 +1615,56 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmacc.vv v10, v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmacc.vv v10, v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fmsub_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a2) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vle16.v v10, (a1) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfneg.v v8, v11 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v11 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fmsub_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vle16.v v10, (a1) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfneg.v v8, v11 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v11 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -2264,58 +1724,28 @@ define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) { } define void @fadd_v16f16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fadd_v16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vfadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fadd_v16f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_v16f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret -; -; ZVFHMINLMULMAX2-LABEL: fadd_v16f16: -; ZVFHMINLMULMAX2: # %bb.0: -; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfadd.vv v8, v12, v10 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) -; ZVFHMINLMULMAX2-NEXT: ret +; ZVFH-LABEL: fadd_v16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v10, (a1) +; ZVFH-NEXT: vfadd.vv v8, v8, v10 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fadd <16 x half> %a, %b @@ -2324,44 +1754,14 @@ define void @fadd_v16f16(ptr %x, ptr %y) { } define void @fadd_v8f32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fadd_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vfadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fadd_v8f32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_v8f32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; ZVFH-LABEL: fadd_v8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v10, (a1) +; ZVFH-NEXT: vfadd.vv v8, v8, v10 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_v8f32: ; ZVFHMIN: # %bb.0: @@ -2379,44 +1779,14 @@ define void @fadd_v8f32(ptr %x, ptr %y) { } define void @fadd_v4f64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fadd_v4f64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vfadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fadd_v4f64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_v4f64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; ZVFH-LABEL: fadd_v4f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFH-NEXT: vle64.v v8, (a0) +; ZVFH-NEXT: vle64.v v10, (a1) +; ZVFH-NEXT: vfadd.vv v8, v8, v10 +; ZVFH-NEXT: vse64.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fadd_v4f64: ; ZVFHMIN: # %bb.0: @@ -2434,58 +1804,28 @@ define void @fadd_v4f64(ptr %x, ptr %y) { } define void @fsub_v16f16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fsub_v16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vfsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fsub_v16f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_v16f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret -; -; ZVFHMINLMULMAX2-LABEL: fsub_v16f16: -; ZVFHMINLMULMAX2: # %bb.0: -; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfsub.vv v8, v12, v10 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) -; ZVFHMINLMULMAX2-NEXT: ret +; ZVFH-LABEL: fsub_v16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v10, (a1) +; ZVFH-NEXT: vfsub.vv v8, v8, v10 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fsub <16 x half> %a, %b @@ -2494,44 +1834,14 @@ define void @fsub_v16f16(ptr %x, ptr %y) { } define void @fsub_v8f32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fsub_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vfsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fsub_v8f32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_v8f32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; ZVFH-LABEL: fsub_v8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v10, (a1) +; ZVFH-NEXT: vfsub.vv v8, v8, v10 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_v8f32: ; ZVFHMIN: # %bb.0: @@ -2549,44 +1859,14 @@ define void @fsub_v8f32(ptr %x, ptr %y) { } define void @fsub_v4f64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fsub_v4f64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vfsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fsub_v4f64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_v4f64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfsub.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; ZVFH-LABEL: fsub_v4f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFH-NEXT: vle64.v v8, (a0) +; ZVFH-NEXT: vle64.v v10, (a1) +; ZVFH-NEXT: vfsub.vv v8, v8, v10 +; ZVFH-NEXT: vse64.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fsub_v4f64: ; ZVFHMIN: # %bb.0: @@ -2604,58 +1884,28 @@ define void @fsub_v4f64(ptr %x, ptr %y) { } define void @fmul_v16f16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fmul_v16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vfmul.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fmul_v16f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_v16f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret -; -; ZVFHMINLMULMAX2-LABEL: fmul_v16f16: -; ZVFHMINLMULMAX2: # %bb.0: -; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfmul.vv v8, v12, v10 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) -; ZVFHMINLMULMAX2-NEXT: ret +; ZVFH-LABEL: fmul_v16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v10, (a1) +; ZVFH-NEXT: vfmul.vv v8, v8, v10 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fmul <16 x half> %a, %b @@ -2664,44 +1914,14 @@ define void @fmul_v16f16(ptr %x, ptr %y) { } define void @fmul_v8f32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fmul_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vfmul.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fmul_v8f32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_v8f32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; ZVFH-LABEL: fmul_v8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v10, (a1) +; ZVFH-NEXT: vfmul.vv v8, v8, v10 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_v8f32: ; ZVFHMIN: # %bb.0: @@ -2719,44 +1939,14 @@ define void @fmul_v8f32(ptr %x, ptr %y) { } define void @fmul_v4f64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fmul_v4f64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vfmul.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fmul_v4f64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfmul.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_v4f64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfmul.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; ZVFH-LABEL: fmul_v4f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFH-NEXT: vle64.v v8, (a0) +; ZVFH-NEXT: vle64.v v10, (a1) +; ZVFH-NEXT: vfmul.vv v8, v8, v10 +; ZVFH-NEXT: vse64.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fmul_v4f64: ; ZVFHMIN: # %bb.0: @@ -2774,58 +1964,28 @@ define void @fmul_v4f64(ptr %x, ptr %y) { } define void @fdiv_v16f16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fdiv_v16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fdiv_v16f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_v16f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret -; -; ZVFHMINLMULMAX2-LABEL: fdiv_v16f16: -; ZVFHMINLMULMAX2: # %bb.0: -; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfdiv.vv v8, v12, v10 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) -; ZVFHMINLMULMAX2-NEXT: ret +; ZVFH-LABEL: fdiv_v16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v10, (a1) +; ZVFH-NEXT: vfdiv.vv v8, v8, v10 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fdiv <16 x half> %a, %b @@ -2834,44 +1994,14 @@ define void @fdiv_v16f16(ptr %x, ptr %y) { } define void @fdiv_v8f32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fdiv_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fdiv_v8f32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_v8f32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; ZVFH-LABEL: fdiv_v8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v10, (a1) +; ZVFH-NEXT: vfdiv.vv v8, v8, v10 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_v8f32: ; ZVFHMIN: # %bb.0: @@ -2889,44 +2019,14 @@ define void @fdiv_v8f32(ptr %x, ptr %y) { } define void @fdiv_v4f64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: fdiv_v4f64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vfdiv.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: fdiv_v4f64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vfdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_v4f64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vfdiv.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; ZVFH-LABEL: fdiv_v4f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFH-NEXT: vle64.v v8, (a0) +; ZVFH-NEXT: vle64.v v10, (a1) +; ZVFH-NEXT: vfdiv.vv v8, v8, v10 +; ZVFH-NEXT: vse64.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fdiv_v4f64: ; ZVFHMIN: # %bb.0: @@ -2944,37 +2044,25 @@ define void @fdiv_v4f64(ptr %x, ptr %y) { } define void @fneg_v16f16(ptr %x) { -; LMULMAX2-LABEL: fneg_v16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vfneg.v v8, v8 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: fneg_v16f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: vle16.v v9, (a0) -; LMULMAX1-NEXT: vfneg.v v8, v8 -; LMULMAX1-NEXT: vfneg.v v9, v9 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: ret -; -; ZVFHMINLMULMAX2-LABEL: fneg_v16f16: -; ZVFHMINLMULMAX2: # %bb.0: -; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfneg.v v8, v10 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) -; ZVFHMINLMULMAX2-NEXT: ret +; ZVFH-LABEL: fneg_v16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fneg_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = fneg <16 x half> %a store <16 x half> %b, ptr %x @@ -2982,25 +2070,13 @@ define void @fneg_v16f16(ptr %x) { } define void @fneg_v8f32(ptr %x) { -; LMULMAX2-LABEL: fneg_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vfneg.v v8, v8 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: fneg_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle32.v v8, (a1) -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vfneg.v v8, v8 -; LMULMAX1-NEXT: vfneg.v v9, v9 -; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; ZVFH-LABEL: fneg_v8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fneg_v8f32: ; ZVFHMIN: # %bb.0: @@ -3016,25 +2092,13 @@ define void @fneg_v8f32(ptr %x) { } define void @fneg_v4f64(ptr %x) { -; LMULMAX2-LABEL: fneg_v4f64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vfneg.v v8, v8 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: fneg_v4f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle64.v v8, (a1) -; LMULMAX1-NEXT: vle64.v v9, (a0) -; LMULMAX1-NEXT: vfneg.v v8, v8 -; LMULMAX1-NEXT: vfneg.v v9, v9 -; LMULMAX1-NEXT: vse64.v v9, (a0) -; LMULMAX1-NEXT: vse64.v v8, (a1) -; LMULMAX1-NEXT: ret +; ZVFH-LABEL: fneg_v4f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFH-NEXT: vle64.v v8, (a0) +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: vse64.v v8, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fneg_v4f64: ; ZVFHMIN: # %bb.0: @@ -3050,49 +2114,31 @@ define void @fneg_v4f64(ptr %x) { } define void @fma_v16f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX2-LABEL: fma_v16f16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vle16.v v12, (a2) -; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10 -; LMULMAX2-NEXT: vse16.v v12, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: fma_v16f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: addi a3, a0, 16 -; LMULMAX1-NEXT: vle16.v v9, (a3) -; LMULMAX1-NEXT: vle16.v v10, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vle16.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a2, 16 -; LMULMAX1-NEXT: vle16.v v12, (a1) -; LMULMAX1-NEXT: vle16.v v13, (a2) -; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11 -; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10 -; LMULMAX1-NEXT: vse16.v v13, (a0) -; LMULMAX1-NEXT: vse16.v v12, (a3) -; LMULMAX1-NEXT: ret -; -; ZVFHMINLMULMAX2-LABEL: fma_v16f16: -; ZVFHMINLMULMAX2: # %bb.0: -; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a2) -; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-NEXT: vle16.v v10, (a1) -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfmadd.vv v8, v14, v12 -; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) -; ZVFHMINLMULMAX2-NEXT: ret +; ZVFH-LABEL: fma_v16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v10, (a1) +; ZVFH-NEXT: vle16.v v12, (a2) +; ZVFH-NEXT: vfmacc.vv v12, v8, v10 +; ZVFH-NEXT: vse16.v v12, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fma_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a2) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = load <16 x half>, ptr %z @@ -3103,33 +2149,15 @@ define void @fma_v16f16(ptr %x, ptr %y, ptr %z) { declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>) define void @fma_v8f32(ptr %x, ptr %y, ptr %z) { -; LMULMAX2-LABEL: fma_v8f32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vle32.v v12, (a2) -; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10 -; LMULMAX2-NEXT: vse32.v v12, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: fma_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a3, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a3) -; LMULMAX1-NEXT: vle32.v v10, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vle32.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a2, 16 -; LMULMAX1-NEXT: vle32.v v12, (a1) -; LMULMAX1-NEXT: vle32.v v13, (a2) -; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11 -; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10 -; LMULMAX1-NEXT: vse32.v v13, (a0) -; LMULMAX1-NEXT: vse32.v v12, (a3) -; LMULMAX1-NEXT: ret +; ZVFH-LABEL: fma_v8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v10, (a1) +; ZVFH-NEXT: vle32.v v12, (a2) +; ZVFH-NEXT: vfmacc.vv v12, v8, v10 +; ZVFH-NEXT: vse32.v v12, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fma_v8f32: ; ZVFHMIN: # %bb.0: @@ -3150,33 +2178,15 @@ define void @fma_v8f32(ptr %x, ptr %y, ptr %z) { declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) define void @fma_v4f64(ptr %x, ptr %y, ptr %z) { -; LMULMAX2-LABEL: fma_v4f64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vle64.v v12, (a2) -; LMULMAX2-NEXT: vfmacc.vv v12, v8, v10 -; LMULMAX2-NEXT: vse64.v v12, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: fma_v4f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vle64.v v8, (a0) -; LMULMAX1-NEXT: addi a3, a0, 16 -; LMULMAX1-NEXT: vle64.v v9, (a3) -; LMULMAX1-NEXT: vle64.v v10, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vle64.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a2, 16 -; LMULMAX1-NEXT: vle64.v v12, (a1) -; LMULMAX1-NEXT: vle64.v v13, (a2) -; LMULMAX1-NEXT: vfmacc.vv v12, v9, v11 -; LMULMAX1-NEXT: vfmacc.vv v13, v8, v10 -; LMULMAX1-NEXT: vse64.v v13, (a0) -; LMULMAX1-NEXT: vse64.v v12, (a3) -; LMULMAX1-NEXT: ret +; ZVFH-LABEL: fma_v4f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFH-NEXT: vle64.v v8, (a0) +; ZVFH-NEXT: vle64.v v10, (a1) +; ZVFH-NEXT: vle64.v v12, (a2) +; ZVFH-NEXT: vfmacc.vv v12, v8, v10 +; ZVFH-NEXT: vse64.v v12, (a0) +; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fma_v4f64: ; ZVFHMIN: # %bb.0: @@ -3242,99 +2252,52 @@ define void @fadd_vf_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fadd_vf_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fadd_vf_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fadd_vf_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fadd_vf_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fadd_vf_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fadd_vf_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -3429,99 +2392,52 @@ define void @fadd_fv_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fadd_fv_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fadd_fv_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fadd_fv_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fadd_fv_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fadd_fv_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fadd_fv_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -3616,99 +2532,52 @@ define void @fsub_vf_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fsub_vf_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fsub_vf_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fsub_vf_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fsub_vf_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fsub_vf_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fsub_vf_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -3803,99 +2672,52 @@ define void @fsub_fv_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fsub_fv_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fsub_fv_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fsub_fv_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fsub_fv_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fsub_fv_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fsub_fv_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -3990,99 +2812,52 @@ define void @fmul_vf_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fmul_vf_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fmul_vf_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fmul_vf_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fmul_vf_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fmul_vf_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fmul_vf_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -4177,99 +2952,52 @@ define void @fmul_fv_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fmul_fv_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fmul_fv_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fmul_fv_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fmul_fv_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fmul_fv_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fmul_fv_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -4364,99 +3092,52 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_vf_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_vf_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_vf_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_vf_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fdiv_vf_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fdiv_vf_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -4551,99 +3232,52 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_fv_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_fv_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_fv_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_fv_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fdiv_fv_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fdiv_fv_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -4743,107 +3377,56 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) { ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fma_vf_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v8, v9, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fma_vf_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v8, v9, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fma_vf_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v8, v9, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fma_vf_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v8, v9, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fma_vf_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fma_vf_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -4949,107 +3532,56 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) { ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fma_fv_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v8, v9, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fma_fv_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v8, v9, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fma_fv_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v8, v9, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fma_fv_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v8, v9, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fma_fv_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fma_fv_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -5161,127 +3693,66 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) { ; ZVFH-NEXT: vse16.v v9, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_vf_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v9, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmacc.vv v11, v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_vf_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v9, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmacc.vv v11, v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_vf_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v9, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmacc.vv v11, v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_vf_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v9, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmacc.vv v11, v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fmsub_vf_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfneg.v v9, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmacc.vv v11, v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fmsub_vf_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfneg.v v9, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmacc.vv v11, v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -5558,107 +4029,56 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: ceil_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 3 -; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 -; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: ceil_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 3 -; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 -; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: ceil_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 3 -; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 -; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: ceil_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 3 -; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 -; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: ceil_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMIN-RV32-NEXT: lui a1, 307200 +; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-RV32-NEXT: fsrmi a1, 3 +; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-RV32-NEXT: fsrm a1 +; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: ceil_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMIN-RV64-NEXT: lui a1, 307200 +; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-RV64-NEXT: fsrmi a1, 3 +; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-RV64-NEXT: fsrm a1 +; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -5796,107 +4216,56 @@ define void @floor_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: floor_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 -; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: floor_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 -; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: floor_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 -; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: floor_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 -; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: floor_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMIN-RV32-NEXT: lui a1, 307200 +; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-RV32-NEXT: fsrmi a1, 2 +; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-RV32-NEXT: fsrm a1 +; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: floor_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMIN-RV64-NEXT: lui a1, 307200 +; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-RV64-NEXT: fsrmi a1, 2 +; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-RV64-NEXT: fsrm a1 +; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -6034,107 +4403,56 @@ define void @round_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: round_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 4 -; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 -; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: round_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 4 -; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 -; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: round_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 4 -; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 -; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: round_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 -; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 -; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 4 -; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 -; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: round_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMIN-RV32-NEXT: lui a1, 307200 +; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-RV32-NEXT: fsrmi a1, 4 +; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-RV32-NEXT: fsrm a1 +; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: round_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMIN-RV64-NEXT: lui a1, 307200 +; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-RV64-NEXT: fsrmi a1, 4 +; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-RV64-NEXT: fsrm a1 +; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -6470,107 +4788,56 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fmuladd_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a2) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fmuladd_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a2) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fmuladd_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fmuladd_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fmuladd_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vle16.v v10, (a2) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fmuladd_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vle16.v v10, (a2) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -6681,107 +4948,56 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vse16.v v10, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_fmuladd_v6f16: -; ZVFHMINLMULMAX2-RV32: # %bb.0: -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a2) -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v11 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX2-RV32-NEXT: ret -; -; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_fmuladd_v6f16: -; ZVFHMINLMULMAX2-RV64: # %bb.0: -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a2) -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v11 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v9 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX2-RV64-NEXT: ret -; -; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16: -; ZVFHMINLMULMAX1-RV32: # %bb.0: -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 -; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMINLMULMAX1-RV32-NEXT: ret -; -; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16: -; ZVFHMINLMULMAX1-RV64: # %bb.0: -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 -; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; ZVFHMINLMULMAX1-RV64-NEXT: ret +; ZVFHMIN-RV32-LABEL: fmsub_fmuladd_v6f16: +; ZVFHMIN-RV32: # %bb.0: +; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: vle16.v v10, (a2) +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV32-NEXT: addi a1, a0, 8 +; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMIN-RV32-NEXT: ret +; +; ZVFHMIN-RV64-LABEL: fmsub_fmuladd_v6f16: +; ZVFHMIN-RV64: # %bb.0: +; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vle16.v v10, (a2) +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN-RV64-NEXT: addi a0, a0, 8 +; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMIN-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index 44b96d076df455..dbc65620b7f249 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -1,12 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV32,LMULMAX8RV32ZVFH -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV64,LMULMAX8RV64ZVFH -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32,LMULMAX1RV32ZVFH -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64,LMULMAX1RV64ZVFH -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV32,LMULMAX8RV32ZVFHMIN -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV64,LMULMAX8RV64ZVFHMIN -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32,LMULMAX1RV32ZVFHMIN -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64,LMULMAX1RV64ZVFHMIN +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64 define void @fp2si_v2f32_v2i32(ptr %x, ptr %y) { ; CHECK-LABEL: fp2si_v2f32_v2i32: @@ -128,194 +124,194 @@ define <3 x i1> @fp2si_v3f32_v3i1(<3 x float> %x) { ; FIXME: This is expanded when they could be widened + promoted define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { -; LMULMAX8RV32-LABEL: fp2si_v3f32_v3i15: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX8RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX8RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX8RV32-NEXT: vmv.x.s a1, v8 -; LMULMAX8RV32-NEXT: slli a2, a1, 17 -; LMULMAX8RV32-NEXT: srli a2, a2, 19 -; LMULMAX8RV32-NEXT: sh a2, 4(a0) -; LMULMAX8RV32-NEXT: vmv.x.s a2, v9 -; LMULMAX8RV32-NEXT: lui a3, 8 -; LMULMAX8RV32-NEXT: addi a3, a3, -1 -; LMULMAX8RV32-NEXT: and a2, a2, a3 -; LMULMAX8RV32-NEXT: vslidedown.vi v8, v9, 1 -; LMULMAX8RV32-NEXT: vmv.x.s a4, v8 -; LMULMAX8RV32-NEXT: and a3, a4, a3 -; LMULMAX8RV32-NEXT: slli a3, a3, 15 -; LMULMAX8RV32-NEXT: slli a1, a1, 30 -; LMULMAX8RV32-NEXT: or a1, a2, a1 -; LMULMAX8RV32-NEXT: or a1, a1, a3 -; LMULMAX8RV32-NEXT: sw a1, 0(a0) -; LMULMAX8RV32-NEXT: ret +; ZVFH32-LABEL: fp2si_v3f32_v3i15: +; ZVFH32: # %bb.0: +; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFH32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFH32-NEXT: vmv.x.s a1, v8 +; ZVFH32-NEXT: slli a2, a1, 17 +; ZVFH32-NEXT: srli a2, a2, 19 +; ZVFH32-NEXT: sh a2, 4(a0) +; ZVFH32-NEXT: vmv.x.s a2, v9 +; ZVFH32-NEXT: lui a3, 8 +; ZVFH32-NEXT: addi a3, a3, -1 +; ZVFH32-NEXT: and a2, a2, a3 +; ZVFH32-NEXT: vslidedown.vi v8, v9, 1 +; ZVFH32-NEXT: vmv.x.s a4, v8 +; ZVFH32-NEXT: and a3, a4, a3 +; ZVFH32-NEXT: slli a3, a3, 15 +; ZVFH32-NEXT: slli a1, a1, 30 +; ZVFH32-NEXT: or a1, a2, a1 +; ZVFH32-NEXT: or a1, a1, a3 +; ZVFH32-NEXT: sw a1, 0(a0) +; ZVFH32-NEXT: ret ; -; LMULMAX8RV64-LABEL: fp2si_v3f32_v3i15: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX8RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX8RV64-NEXT: vmv.x.s a1, v9 -; LMULMAX8RV64-NEXT: lui a2, 8 -; LMULMAX8RV64-NEXT: addiw a2, a2, -1 -; LMULMAX8RV64-NEXT: and a1, a1, a2 -; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 1 -; LMULMAX8RV64-NEXT: vmv.x.s a3, v8 -; LMULMAX8RV64-NEXT: and a2, a3, a2 -; LMULMAX8RV64-NEXT: slli a2, a2, 15 -; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX8RV64-NEXT: vmv.x.s a3, v8 -; LMULMAX8RV64-NEXT: slli a3, a3, 30 -; LMULMAX8RV64-NEXT: or a1, a1, a3 -; LMULMAX8RV64-NEXT: or a1, a1, a2 -; LMULMAX8RV64-NEXT: sw a1, 0(a0) -; LMULMAX8RV64-NEXT: slli a1, a1, 19 -; LMULMAX8RV64-NEXT: srli a1, a1, 51 -; LMULMAX8RV64-NEXT: sh a1, 4(a0) -; LMULMAX8RV64-NEXT: ret +; ZVFH64-LABEL: fp2si_v3f32_v3i15: +; ZVFH64: # %bb.0: +; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFH64-NEXT: vmv.x.s a1, v9 +; ZVFH64-NEXT: lui a2, 8 +; ZVFH64-NEXT: addiw a2, a2, -1 +; ZVFH64-NEXT: and a1, a1, a2 +; ZVFH64-NEXT: vslidedown.vi v8, v9, 1 +; ZVFH64-NEXT: vmv.x.s a3, v8 +; ZVFH64-NEXT: and a2, a3, a2 +; ZVFH64-NEXT: slli a2, a2, 15 +; ZVFH64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFH64-NEXT: vmv.x.s a3, v8 +; ZVFH64-NEXT: slli a3, a3, 30 +; ZVFH64-NEXT: or a1, a1, a3 +; ZVFH64-NEXT: or a1, a1, a2 +; ZVFH64-NEXT: sw a1, 0(a0) +; ZVFH64-NEXT: slli a1, a1, 19 +; ZVFH64-NEXT: srli a1, a1, 51 +; ZVFH64-NEXT: sh a1, 4(a0) +; ZVFH64-NEXT: ret ; -; LMULMAX1RV32-LABEL: fp2si_v3f32_v3i15: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX1RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1RV32-NEXT: vmv.x.s a1, v8 -; LMULMAX1RV32-NEXT: slli a2, a1, 17 -; LMULMAX1RV32-NEXT: srli a2, a2, 19 -; LMULMAX1RV32-NEXT: sh a2, 4(a0) -; LMULMAX1RV32-NEXT: vmv.x.s a2, v9 -; LMULMAX1RV32-NEXT: lui a3, 8 -; LMULMAX1RV32-NEXT: addi a3, a3, -1 -; LMULMAX1RV32-NEXT: and a2, a2, a3 -; LMULMAX1RV32-NEXT: vslidedown.vi v8, v9, 1 -; LMULMAX1RV32-NEXT: vmv.x.s a4, v8 -; LMULMAX1RV32-NEXT: and a3, a4, a3 -; LMULMAX1RV32-NEXT: slli a3, a3, 15 -; LMULMAX1RV32-NEXT: slli a1, a1, 30 -; LMULMAX1RV32-NEXT: or a1, a2, a1 -; LMULMAX1RV32-NEXT: or a1, a1, a3 -; LMULMAX1RV32-NEXT: sw a1, 0(a0) -; LMULMAX1RV32-NEXT: ret +; ZVFHMIN32-LABEL: fp2si_v3f32_v3i15: +; ZVFHMIN32: # %bb.0: +; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN32-NEXT: vmv.x.s a1, v8 +; ZVFHMIN32-NEXT: slli a2, a1, 17 +; ZVFHMIN32-NEXT: srli a2, a2, 19 +; ZVFHMIN32-NEXT: sh a2, 4(a0) +; ZVFHMIN32-NEXT: vmv.x.s a2, v9 +; ZVFHMIN32-NEXT: lui a3, 8 +; ZVFHMIN32-NEXT: addi a3, a3, -1 +; ZVFHMIN32-NEXT: and a2, a2, a3 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 1 +; ZVFHMIN32-NEXT: vmv.x.s a4, v8 +; ZVFHMIN32-NEXT: and a3, a4, a3 +; ZVFHMIN32-NEXT: slli a3, a3, 15 +; ZVFHMIN32-NEXT: slli a1, a1, 30 +; ZVFHMIN32-NEXT: or a1, a2, a1 +; ZVFHMIN32-NEXT: or a1, a1, a3 +; ZVFHMIN32-NEXT: sw a1, 0(a0) +; ZVFHMIN32-NEXT: ret ; -; LMULMAX1RV64-LABEL: fp2si_v3f32_v3i15: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX1RV64-NEXT: vmv.x.s a1, v9 -; LMULMAX1RV64-NEXT: lui a2, 8 -; LMULMAX1RV64-NEXT: addiw a2, a2, -1 -; LMULMAX1RV64-NEXT: and a1, a1, a2 -; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 1 -; LMULMAX1RV64-NEXT: vmv.x.s a3, v8 -; LMULMAX1RV64-NEXT: and a2, a3, a2 -; LMULMAX1RV64-NEXT: slli a2, a2, 15 -; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1RV64-NEXT: vmv.x.s a3, v8 -; LMULMAX1RV64-NEXT: slli a3, a3, 30 -; LMULMAX1RV64-NEXT: or a1, a1, a3 -; LMULMAX1RV64-NEXT: or a1, a1, a2 -; LMULMAX1RV64-NEXT: sw a1, 0(a0) -; LMULMAX1RV64-NEXT: slli a1, a1, 19 -; LMULMAX1RV64-NEXT: srli a1, a1, 51 -; LMULMAX1RV64-NEXT: sh a1, 4(a0) -; LMULMAX1RV64-NEXT: ret +; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15: +; ZVFHMIN64: # %bb.0: +; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFHMIN64-NEXT: vmv.x.s a1, v9 +; ZVFHMIN64-NEXT: lui a2, 8 +; ZVFHMIN64-NEXT: addiw a2, a2, -1 +; ZVFHMIN64-NEXT: and a1, a1, a2 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1 +; ZVFHMIN64-NEXT: vmv.x.s a3, v8 +; ZVFHMIN64-NEXT: and a2, a3, a2 +; ZVFHMIN64-NEXT: slli a2, a2, 15 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN64-NEXT: vmv.x.s a3, v8 +; ZVFHMIN64-NEXT: slli a3, a3, 30 +; ZVFHMIN64-NEXT: or a1, a1, a3 +; ZVFHMIN64-NEXT: or a1, a1, a2 +; ZVFHMIN64-NEXT: sw a1, 0(a0) +; ZVFHMIN64-NEXT: slli a1, a1, 19 +; ZVFHMIN64-NEXT: srli a1, a1, 51 +; ZVFHMIN64-NEXT: sh a1, 4(a0) +; ZVFHMIN64-NEXT: ret %z = fptosi <3 x float> %x to <3 x i15> ret <3 x i15> %z } ; FIXME: This is expanded when they could be widened + promoted define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { -; LMULMAX8RV32-LABEL: fp2ui_v3f32_v3i15: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX8RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX8RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX8RV32-NEXT: vmv.x.s a1, v8 -; LMULMAX8RV32-NEXT: slli a2, a1, 17 -; LMULMAX8RV32-NEXT: srli a2, a2, 19 -; LMULMAX8RV32-NEXT: sh a2, 4(a0) -; LMULMAX8RV32-NEXT: vmv.x.s a2, v9 -; LMULMAX8RV32-NEXT: lui a3, 16 -; LMULMAX8RV32-NEXT: addi a3, a3, -1 -; LMULMAX8RV32-NEXT: and a2, a2, a3 -; LMULMAX8RV32-NEXT: vslidedown.vi v8, v9, 1 -; LMULMAX8RV32-NEXT: vmv.x.s a4, v8 -; LMULMAX8RV32-NEXT: and a3, a4, a3 -; LMULMAX8RV32-NEXT: slli a3, a3, 15 -; LMULMAX8RV32-NEXT: slli a1, a1, 30 -; LMULMAX8RV32-NEXT: or a1, a2, a1 -; LMULMAX8RV32-NEXT: or a1, a1, a3 -; LMULMAX8RV32-NEXT: sw a1, 0(a0) -; LMULMAX8RV32-NEXT: ret +; ZVFH32-LABEL: fp2ui_v3f32_v3i15: +; ZVFH32: # %bb.0: +; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFH32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFH32-NEXT: vmv.x.s a1, v8 +; ZVFH32-NEXT: slli a2, a1, 17 +; ZVFH32-NEXT: srli a2, a2, 19 +; ZVFH32-NEXT: sh a2, 4(a0) +; ZVFH32-NEXT: vmv.x.s a2, v9 +; ZVFH32-NEXT: lui a3, 16 +; ZVFH32-NEXT: addi a3, a3, -1 +; ZVFH32-NEXT: and a2, a2, a3 +; ZVFH32-NEXT: vslidedown.vi v8, v9, 1 +; ZVFH32-NEXT: vmv.x.s a4, v8 +; ZVFH32-NEXT: and a3, a4, a3 +; ZVFH32-NEXT: slli a3, a3, 15 +; ZVFH32-NEXT: slli a1, a1, 30 +; ZVFH32-NEXT: or a1, a2, a1 +; ZVFH32-NEXT: or a1, a1, a3 +; ZVFH32-NEXT: sw a1, 0(a0) +; ZVFH32-NEXT: ret ; -; LMULMAX8RV64-LABEL: fp2ui_v3f32_v3i15: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX8RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX8RV64-NEXT: vmv.x.s a1, v9 -; LMULMAX8RV64-NEXT: lui a2, 16 -; LMULMAX8RV64-NEXT: addiw a2, a2, -1 -; LMULMAX8RV64-NEXT: and a1, a1, a2 -; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 1 -; LMULMAX8RV64-NEXT: vmv.x.s a3, v8 -; LMULMAX8RV64-NEXT: and a2, a3, a2 -; LMULMAX8RV64-NEXT: slli a2, a2, 15 -; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX8RV64-NEXT: vmv.x.s a3, v8 -; LMULMAX8RV64-NEXT: slli a3, a3, 30 -; LMULMAX8RV64-NEXT: or a1, a1, a3 -; LMULMAX8RV64-NEXT: or a1, a1, a2 -; LMULMAX8RV64-NEXT: sw a1, 0(a0) -; LMULMAX8RV64-NEXT: slli a1, a1, 19 -; LMULMAX8RV64-NEXT: srli a1, a1, 51 -; LMULMAX8RV64-NEXT: sh a1, 4(a0) -; LMULMAX8RV64-NEXT: ret +; ZVFH64-LABEL: fp2ui_v3f32_v3i15: +; ZVFH64: # %bb.0: +; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFH64-NEXT: vmv.x.s a1, v9 +; ZVFH64-NEXT: lui a2, 16 +; ZVFH64-NEXT: addiw a2, a2, -1 +; ZVFH64-NEXT: and a1, a1, a2 +; ZVFH64-NEXT: vslidedown.vi v8, v9, 1 +; ZVFH64-NEXT: vmv.x.s a3, v8 +; ZVFH64-NEXT: and a2, a3, a2 +; ZVFH64-NEXT: slli a2, a2, 15 +; ZVFH64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFH64-NEXT: vmv.x.s a3, v8 +; ZVFH64-NEXT: slli a3, a3, 30 +; ZVFH64-NEXT: or a1, a1, a3 +; ZVFH64-NEXT: or a1, a1, a2 +; ZVFH64-NEXT: sw a1, 0(a0) +; ZVFH64-NEXT: slli a1, a1, 19 +; ZVFH64-NEXT: srli a1, a1, 51 +; ZVFH64-NEXT: sh a1, 4(a0) +; ZVFH64-NEXT: ret ; -; LMULMAX1RV32-LABEL: fp2ui_v3f32_v3i15: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX1RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1RV32-NEXT: vmv.x.s a1, v8 -; LMULMAX1RV32-NEXT: slli a2, a1, 17 -; LMULMAX1RV32-NEXT: srli a2, a2, 19 -; LMULMAX1RV32-NEXT: sh a2, 4(a0) -; LMULMAX1RV32-NEXT: vmv.x.s a2, v9 -; LMULMAX1RV32-NEXT: lui a3, 16 -; LMULMAX1RV32-NEXT: addi a3, a3, -1 -; LMULMAX1RV32-NEXT: and a2, a2, a3 -; LMULMAX1RV32-NEXT: vslidedown.vi v8, v9, 1 -; LMULMAX1RV32-NEXT: vmv.x.s a4, v8 -; LMULMAX1RV32-NEXT: and a3, a4, a3 -; LMULMAX1RV32-NEXT: slli a3, a3, 15 -; LMULMAX1RV32-NEXT: slli a1, a1, 30 -; LMULMAX1RV32-NEXT: or a1, a2, a1 -; LMULMAX1RV32-NEXT: or a1, a1, a3 -; LMULMAX1RV32-NEXT: sw a1, 0(a0) -; LMULMAX1RV32-NEXT: ret +; ZVFHMIN32-LABEL: fp2ui_v3f32_v3i15: +; ZVFHMIN32: # %bb.0: +; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN32-NEXT: vmv.x.s a1, v8 +; ZVFHMIN32-NEXT: slli a2, a1, 17 +; ZVFHMIN32-NEXT: srli a2, a2, 19 +; ZVFHMIN32-NEXT: sh a2, 4(a0) +; ZVFHMIN32-NEXT: vmv.x.s a2, v9 +; ZVFHMIN32-NEXT: lui a3, 16 +; ZVFHMIN32-NEXT: addi a3, a3, -1 +; ZVFHMIN32-NEXT: and a2, a2, a3 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 1 +; ZVFHMIN32-NEXT: vmv.x.s a4, v8 +; ZVFHMIN32-NEXT: and a3, a4, a3 +; ZVFHMIN32-NEXT: slli a3, a3, 15 +; ZVFHMIN32-NEXT: slli a1, a1, 30 +; ZVFHMIN32-NEXT: or a1, a2, a1 +; ZVFHMIN32-NEXT: or a1, a1, a3 +; ZVFHMIN32-NEXT: sw a1, 0(a0) +; ZVFHMIN32-NEXT: ret ; -; LMULMAX1RV64-LABEL: fp2ui_v3f32_v3i15: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1RV64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX1RV64-NEXT: vmv.x.s a1, v9 -; LMULMAX1RV64-NEXT: lui a2, 16 -; LMULMAX1RV64-NEXT: addiw a2, a2, -1 -; LMULMAX1RV64-NEXT: and a1, a1, a2 -; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 1 -; LMULMAX1RV64-NEXT: vmv.x.s a3, v8 -; LMULMAX1RV64-NEXT: and a2, a3, a2 -; LMULMAX1RV64-NEXT: slli a2, a2, 15 -; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1RV64-NEXT: vmv.x.s a3, v8 -; LMULMAX1RV64-NEXT: slli a3, a3, 30 -; LMULMAX1RV64-NEXT: or a1, a1, a3 -; LMULMAX1RV64-NEXT: or a1, a1, a2 -; LMULMAX1RV64-NEXT: sw a1, 0(a0) -; LMULMAX1RV64-NEXT: slli a1, a1, 19 -; LMULMAX1RV64-NEXT: srli a1, a1, 51 -; LMULMAX1RV64-NEXT: sh a1, 4(a0) -; LMULMAX1RV64-NEXT: ret +; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15: +; ZVFHMIN64: # %bb.0: +; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFHMIN64-NEXT: vmv.x.s a1, v9 +; ZVFHMIN64-NEXT: lui a2, 16 +; ZVFHMIN64-NEXT: addiw a2, a2, -1 +; ZVFHMIN64-NEXT: and a1, a1, a2 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1 +; ZVFHMIN64-NEXT: vmv.x.s a3, v8 +; ZVFHMIN64-NEXT: and a2, a3, a2 +; ZVFHMIN64-NEXT: slli a2, a2, 15 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMIN64-NEXT: vmv.x.s a3, v8 +; ZVFHMIN64-NEXT: slli a3, a3, 30 +; ZVFHMIN64-NEXT: or a1, a1, a3 +; ZVFHMIN64-NEXT: or a1, a1, a2 +; ZVFHMIN64-NEXT: sw a1, 0(a0) +; ZVFHMIN64-NEXT: slli a1, a1, 19 +; ZVFHMIN64-NEXT: srli a1, a1, 51 +; ZVFHMIN64-NEXT: sh a1, 4(a0) +; ZVFHMIN64-NEXT: ret %z = fptoui <3 x float> %x to <3 x i15> ret <3 x i15> %z } @@ -333,26 +329,13 @@ define <3 x i1> @fp2ui_v3f32_v3i1(<3 x float> %x) { } define void @fp2si_v8f32_v8i32(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fp2si_v8f32_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX8-NEXT: vse32.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2si_v8f32_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vle32.v v8, (a2) -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX1-NEXT: vfcvt.rtz.x.f.v v9, v9 -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2si_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %d = fptosi <8 x float> %a to <8 x i32> store <8 x i32> %d, ptr %y @@ -360,26 +343,13 @@ define void @fp2si_v8f32_v8i32(ptr %x, ptr %y) { } define void @fp2ui_v8f32_v8i32(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fp2ui_v8f32_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX8-NEXT: vse32.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2ui_v8f32_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vle32.v v8, (a2) -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX1-NEXT: vfcvt.rtz.xu.f.v v9, v9 -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2ui_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %d = fptoui <8 x float> %a to <8 x i32> store <8 x i32> %d, ptr %y @@ -387,67 +357,25 @@ define void @fp2ui_v8f32_v8i32(ptr %x, ptr %y) { } define <8 x i1> @fp2si_v8f32_v8i1(<8 x float> %x) { -; LMULMAX8-LABEL: fp2si_v8f32_v8i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vfncvt.rtz.x.f.w v10, v8 -; LMULMAX8-NEXT: vand.vi v8, v10, 1 -; LMULMAX8-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2si_v8f32_v8i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v8 -; LMULMAX1-NEXT: vand.vi v8, v10, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v9 -; LMULMAX1-NEXT: vand.vi v9, v10, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2si_v8f32_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %z = fptosi <8 x float> %x to <8 x i1> ret <8 x i1> %z } define <8 x i1> @fp2ui_v8f32_v8i1(<8 x float> %x) { -; LMULMAX8-LABEL: fp2ui_v8f32_v8i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; LMULMAX8-NEXT: vand.vi v8, v10, 1 -; LMULMAX8-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2ui_v8f32_v8i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; LMULMAX1-NEXT: vand.vi v8, v10, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v9 -; LMULMAX1-NEXT: vand.vi v9, v10, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2ui_v8f32_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %z = fptoui <8 x float> %x to <8 x i1> ret <8 x i1> %z } @@ -481,39 +409,13 @@ define void @fp2ui_v2f32_v2i64(ptr %x, ptr %y) { } define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fp2si_v8f32_v8i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; LMULMAX8-NEXT: vse64.v v12, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2si_v8f32_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vle32.v v8, (a2) -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v11, v9 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v9, v8 -; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vse64.v v9, (a0) -; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vse64.v v12, (a0) -; LMULMAX1-NEXT: vse64.v v11, (a1) -; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vse64.v v10, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2si_v8f32_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK-NEXT: vse64.v v12, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %d = fptosi <8 x float> %a to <8 x i64> store <8 x i64> %d, ptr %y @@ -521,39 +423,13 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) { } define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fp2ui_v8f32_v8i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfwcvt.rtz.xu.f.v v12, v8 -; LMULMAX8-NEXT: vse64.v v12, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2ui_v8f32_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vle32.v v8, (a2) -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v10, v8 -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v11, v9 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v12, v8 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v9, v8 -; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vse64.v v9, (a0) -; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vse64.v v12, (a0) -; LMULMAX1-NEXT: vse64.v v11, (a1) -; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vse64.v v10, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2ui_v8f32_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 +; CHECK-NEXT: vse64.v v12, (a1) +; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %d = fptoui <8 x float> %a to <8 x i64> store <8 x i64> %d, ptr %y @@ -593,145 +469,43 @@ define void @fp2ui_v2f16_v2i64(ptr %x, ptr %y) { } define <2 x i1> @fp2si_v2f16_v2i1(<2 x half> %x) { -; LMULMAX8RV32ZVFH-LABEL: fp2si_v2f16_v2i1: -; LMULMAX8RV32ZVFH: # %bb.0: -; LMULMAX8RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8RV32ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX8RV32ZVFH-NEXT: vand.vi v8, v9, 1 -; LMULMAX8RV32ZVFH-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8RV32ZVFH-NEXT: ret -; -; LMULMAX8RV64ZVFH-LABEL: fp2si_v2f16_v2i1: -; LMULMAX8RV64ZVFH: # %bb.0: -; LMULMAX8RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8RV64ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX8RV64ZVFH-NEXT: vand.vi v8, v9, 1 -; LMULMAX8RV64ZVFH-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8RV64ZVFH-NEXT: ret -; -; LMULMAX1RV32ZVFH-LABEL: fp2si_v2f16_v2i1: -; LMULMAX1RV32ZVFH: # %bb.0: -; LMULMAX1RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1RV32ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX1RV32ZVFH-NEXT: vand.vi v8, v9, 1 -; LMULMAX1RV32ZVFH-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1RV32ZVFH-NEXT: ret +; ZVFH-LABEL: fp2si_v2f16_v2i1: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8 +; ZVFH-NEXT: vand.vi v8, v9, 1 +; ZVFH-NEXT: vmsne.vi v0, v8, 0 +; ZVFH-NEXT: ret ; -; LMULMAX1RV64ZVFH-LABEL: fp2si_v2f16_v2i1: -; LMULMAX1RV64ZVFH: # %bb.0: -; LMULMAX1RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1RV64ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8 -; LMULMAX1RV64ZVFH-NEXT: vand.vi v8, v9, 1 -; LMULMAX1RV64ZVFH-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1RV64ZVFH-NEXT: ret -; -; LMULMAX8RV32ZVFHMIN-LABEL: fp2si_v2f16_v2i1: -; LMULMAX8RV32ZVFHMIN: # %bb.0: -; LMULMAX8RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX8RV32ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX8RV32ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9 -; LMULMAX8RV32ZVFHMIN-NEXT: vand.vi v8, v8, 1 -; LMULMAX8RV32ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8RV32ZVFHMIN-NEXT: ret -; -; LMULMAX8RV64ZVFHMIN-LABEL: fp2si_v2f16_v2i1: -; LMULMAX8RV64ZVFHMIN: # %bb.0: -; LMULMAX8RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX8RV64ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX8RV64ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9 -; LMULMAX8RV64ZVFHMIN-NEXT: vand.vi v8, v8, 1 -; LMULMAX8RV64ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8RV64ZVFHMIN-NEXT: ret -; -; LMULMAX1RV32ZVFHMIN-LABEL: fp2si_v2f16_v2i1: -; LMULMAX1RV32ZVFHMIN: # %bb.0: -; LMULMAX1RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX1RV32ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX1RV32ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9 -; LMULMAX1RV32ZVFHMIN-NEXT: vand.vi v8, v8, 1 -; LMULMAX1RV32ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1RV32ZVFHMIN-NEXT: ret -; -; LMULMAX1RV64ZVFHMIN-LABEL: fp2si_v2f16_v2i1: -; LMULMAX1RV64ZVFHMIN: # %bb.0: -; LMULMAX1RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX1RV64ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX1RV64ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9 -; LMULMAX1RV64ZVFHMIN-NEXT: vand.vi v8, v8, 1 -; LMULMAX1RV64ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1RV64ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: fp2si_v2f16_v2i1: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9 +; ZVFHMIN-NEXT: vand.vi v8, v8, 1 +; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 +; ZVFHMIN-NEXT: ret %z = fptosi <2 x half> %x to <2 x i1> ret <2 x i1> %z } define <2 x i1> @fp2ui_v2f16_v2i1(<2 x half> %x) { -; LMULMAX8RV32ZVFH-LABEL: fp2ui_v2f16_v2i1: -; LMULMAX8RV32ZVFH: # %bb.0: -; LMULMAX8RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8RV32ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; LMULMAX8RV32ZVFH-NEXT: vand.vi v8, v9, 1 -; LMULMAX8RV32ZVFH-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8RV32ZVFH-NEXT: ret -; -; LMULMAX8RV64ZVFH-LABEL: fp2ui_v2f16_v2i1: -; LMULMAX8RV64ZVFH: # %bb.0: -; LMULMAX8RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8RV64ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; LMULMAX8RV64ZVFH-NEXT: vand.vi v8, v9, 1 -; LMULMAX8RV64ZVFH-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8RV64ZVFH-NEXT: ret -; -; LMULMAX1RV32ZVFH-LABEL: fp2ui_v2f16_v2i1: -; LMULMAX1RV32ZVFH: # %bb.0: -; LMULMAX1RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1RV32ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; LMULMAX1RV32ZVFH-NEXT: vand.vi v8, v9, 1 -; LMULMAX1RV32ZVFH-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1RV32ZVFH-NEXT: ret -; -; LMULMAX1RV64ZVFH-LABEL: fp2ui_v2f16_v2i1: -; LMULMAX1RV64ZVFH: # %bb.0: -; LMULMAX1RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1RV64ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; LMULMAX1RV64ZVFH-NEXT: vand.vi v8, v9, 1 -; LMULMAX1RV64ZVFH-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1RV64ZVFH-NEXT: ret -; -; LMULMAX8RV32ZVFHMIN-LABEL: fp2ui_v2f16_v2i1: -; LMULMAX8RV32ZVFHMIN: # %bb.0: -; LMULMAX8RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX8RV32ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX8RV32ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9 -; LMULMAX8RV32ZVFHMIN-NEXT: vand.vi v8, v8, 1 -; LMULMAX8RV32ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8RV32ZVFHMIN-NEXT: ret +; ZVFH-LABEL: fp2ui_v2f16_v2i1: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; ZVFH-NEXT: vand.vi v8, v9, 1 +; ZVFH-NEXT: vmsne.vi v0, v8, 0 +; ZVFH-NEXT: ret ; -; LMULMAX8RV64ZVFHMIN-LABEL: fp2ui_v2f16_v2i1: -; LMULMAX8RV64ZVFHMIN: # %bb.0: -; LMULMAX8RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX8RV64ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX8RV64ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9 -; LMULMAX8RV64ZVFHMIN-NEXT: vand.vi v8, v8, 1 -; LMULMAX8RV64ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8RV64ZVFHMIN-NEXT: ret -; -; LMULMAX1RV32ZVFHMIN-LABEL: fp2ui_v2f16_v2i1: -; LMULMAX1RV32ZVFHMIN: # %bb.0: -; LMULMAX1RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX1RV32ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX1RV32ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9 -; LMULMAX1RV32ZVFHMIN-NEXT: vand.vi v8, v8, 1 -; LMULMAX1RV32ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1RV32ZVFHMIN-NEXT: ret -; -; LMULMAX1RV64ZVFHMIN-LABEL: fp2ui_v2f16_v2i1: -; LMULMAX1RV64ZVFHMIN: # %bb.0: -; LMULMAX1RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX1RV64ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; LMULMAX1RV64ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9 -; LMULMAX1RV64ZVFHMIN-NEXT: vand.vi v8, v8, 1 -; LMULMAX1RV64ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1RV64ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: fp2ui_v2f16_v2i1: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9 +; ZVFHMIN-NEXT: vand.vi v8, v8, 1 +; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 +; ZVFHMIN-NEXT: ret %z = fptoui <2 x half> %x to <2 x i1> ret <2 x i1> %z } @@ -797,59 +571,17 @@ define <2 x i1> @fp2ui_v2f64_v2i1(<2 x double> %x) { } define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fp2si_v8f64_v8i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vfncvt.rtz.x.f.w v12, v8 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v8, v12, 0 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX8-NEXT: vse8.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2si_v8f64_v8i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a2, a0, 48 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle64.v v8, (a2) -; LMULMAX1-NEXT: addi a2, a0, 32 -; LMULMAX1-NEXT: vle64.v v9, (a0) -; LMULMAX1-NEXT: vle64.v v10, (a2) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle64.v v11, (a0) -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v9 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v12, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v11 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v11, v11, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v11, v10 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v10, v11, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v8 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 -; LMULMAX1-NEXT: vse8.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2si_v8f64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %d = fptosi <8 x double> %a to <8 x i8> store <8 x i8> %d, ptr %y @@ -857,59 +589,17 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { } define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { -; LMULMAX8-LABEL: fp2ui_v8f64_v8i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v8, v12, 0 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX8-NEXT: vse8.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2ui_v8f64_v8i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a2, a0, 48 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle64.v v8, (a2) -; LMULMAX1-NEXT: addi a2, a0, 32 -; LMULMAX1-NEXT: vle64.v v9, (a0) -; LMULMAX1-NEXT: vle64.v v10, (a2) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle64.v v11, (a0) -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v9 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v12, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v11 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v11, v11, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v11, v10 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v10, v11, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 -; LMULMAX1-NEXT: vse8.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2ui_v8f64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %d = fptoui <8 x double> %a to <8 x i8> store <8 x i8> %d, ptr %y @@ -917,111 +607,25 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { } define <8 x i1> @fp2si_v8f64_v8i1(<8 x double> %x) { -; LMULMAX8-LABEL: fp2si_v8f64_v8i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vfncvt.rtz.x.f.w v12, v8 -; LMULMAX8-NEXT: vand.vi v8, v12, 1 -; LMULMAX8-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2si_v8f64_v8i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v8 -; LMULMAX1-NEXT: vand.vi v8, v12, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v13, v9 -; LMULMAX1-NEXT: vand.vi v9, v13, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v12, v13, 2 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 -; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v13, v10 -; LMULMAX1-NEXT: vand.vi v10, v13, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v12, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v11 -; LMULMAX1-NEXT: vand.vi v10, v10, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2si_v8f64_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 +; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %z = fptosi <8 x double> %x to <8 x i1> ret <8 x i1> %z } define <8 x i1> @fp2ui_v8f64_v8i1(<8 x double> %x) { -; LMULMAX8-LABEL: fp2ui_v8f64_v8i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; LMULMAX8-NEXT: vand.vi v8, v12, 1 -; LMULMAX8-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: fp2ui_v8f64_v8i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; LMULMAX1-NEXT: vand.vi v8, v12, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v13, v9 -; LMULMAX1-NEXT: vand.vi v9, v13, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v12, v13, 2 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 -; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v13, v10 -; LMULMAX1-NEXT: vand.vi v10, v13, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v12, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v11 -; LMULMAX1-NEXT: vand.vi v10, v10, 1 -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: fp2ui_v8f64_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret %z = fptoui <8 x double> %x to <8 x i1> ret <8 x i1> %z } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index ec11ada12eaa76..6ffa6ac250ed7f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -1,10 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8ZVFH,LMULMAX8RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8ZVFH,LMULMAX8RV64 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8ZVFHMIN,LMULMAX8RV32ZVFHMIN -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8ZVFHMIN,LMULMAX8RV64ZVFHMIN +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64 define void @si2fp_v2i32_v2f32(ptr %x, ptr %y) { ; CHECK-LABEL: si2fp_v2i32_v2f32: @@ -132,214 +130,146 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) { ; FIXME: This gets expanded instead of widened + promoted define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { -; LMULMAX8RV32-LABEL: si2fp_v3i7_v3f32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: lw a1, 4(a0) -; LMULMAX8RV32-NEXT: lw a2, 0(a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV32-NEXT: lw a0, 8(a0) -; LMULMAX8RV32-NEXT: vmv.v.x v8, a2 -; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX8RV32-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX8RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX8RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX8RV32-NEXT: vsext.vf2 v9, v8 -; LMULMAX8RV32-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX8RV32-NEXT: ret +; ZVFH32-LABEL: si2fp_v3i7_v3f32: +; ZVFH32: # %bb.0: +; ZVFH32-NEXT: lw a1, 4(a0) +; ZVFH32-NEXT: lw a2, 0(a0) +; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: lw a0, 8(a0) +; ZVFH32-NEXT: vmv.v.x v8, a2 +; ZVFH32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH32-NEXT: vslide1down.vx v8, v8, a0 +; ZVFH32-NEXT: vslidedown.vi v8, v8, 1 +; ZVFH32-NEXT: vadd.vv v8, v8, v8 +; ZVFH32-NEXT: vsra.vi v8, v8, 1 +; ZVFH32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH32-NEXT: vsext.vf2 v9, v8 +; ZVFH32-NEXT: vfwcvt.f.x.v v8, v9 +; ZVFH32-NEXT: ret ; -; LMULMAX8RV64-LABEL: si2fp_v3i7_v3f32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: ld a1, 8(a0) -; LMULMAX8RV64-NEXT: ld a2, 0(a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV64-NEXT: ld a0, 16(a0) -; LMULMAX8RV64-NEXT: vmv.v.x v8, a2 -; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX8RV64-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX8RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX8RV64-NEXT: vsra.vi v8, v8, 1 -; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX8RV64-NEXT: vsext.vf2 v9, v8 -; LMULMAX8RV64-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX8RV64-NEXT: ret +; ZVFH64-LABEL: si2fp_v3i7_v3f32: +; ZVFH64: # %bb.0: +; ZVFH64-NEXT: ld a1, 8(a0) +; ZVFH64-NEXT: ld a2, 0(a0) +; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: ld a0, 16(a0) +; ZVFH64-NEXT: vmv.v.x v8, a2 +; ZVFH64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH64-NEXT: vslide1down.vx v8, v8, a0 +; ZVFH64-NEXT: vslidedown.vi v8, v8, 1 +; ZVFH64-NEXT: vadd.vv v8, v8, v8 +; ZVFH64-NEXT: vsra.vi v8, v8, 1 +; ZVFH64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH64-NEXT: vsext.vf2 v9, v8 +; ZVFH64-NEXT: vfwcvt.f.x.v v8, v9 +; ZVFH64-NEXT: ret ; -; LMULMAX1RV32-LABEL: si2fp_v3i7_v3f32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: lw a1, 4(a0) -; LMULMAX1RV32-NEXT: lw a2, 0(a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1RV32-NEXT: lw a0, 8(a0) -; LMULMAX1RV32-NEXT: vmv.v.x v8, a2 -; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX1RV32-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX1RV32-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1RV32-NEXT: vsext.vf2 v9, v8 -; LMULMAX1RV32-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX1RV32-NEXT: ret +; ZVFHMIN32-LABEL: si2fp_v3i7_v3f32: +; ZVFHMIN32: # %bb.0: +; ZVFHMIN32-NEXT: lw a1, 4(a0) +; ZVFHMIN32-NEXT: lw a2, 0(a0) +; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: lw a0, 8(a0) +; ZVFHMIN32-NEXT: vmv.v.x v8, a2 +; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1 +; ZVFHMIN32-NEXT: vadd.vv v8, v8, v8 +; ZVFHMIN32-NEXT: vsra.vi v8, v8, 1 +; ZVFHMIN32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN32-NEXT: vsext.vf2 v9, v8 +; ZVFHMIN32-NEXT: vfwcvt.f.x.v v8, v9 +; ZVFHMIN32-NEXT: ret ; -; LMULMAX1RV64-LABEL: si2fp_v3i7_v3f32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: ld a1, 8(a0) -; LMULMAX1RV64-NEXT: ld a2, 0(a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1RV64-NEXT: ld a0, 16(a0) -; LMULMAX1RV64-NEXT: vmv.v.x v8, a2 -; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX1RV64-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX1RV64-NEXT: vadd.vv v8, v8, v8 -; LMULMAX1RV64-NEXT: vsra.vi v8, v8, 1 -; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1RV64-NEXT: vsext.vf2 v9, v8 -; LMULMAX1RV64-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX1RV64-NEXT: ret -; -; LMULMAX8RV32ZVFHMIN-LABEL: si2fp_v3i7_v3f32: -; LMULMAX8RV32ZVFHMIN: # %bb.0: -; LMULMAX8RV32ZVFHMIN-NEXT: lw a1, 4(a0) -; LMULMAX8RV32ZVFHMIN-NEXT: lw a2, 0(a0) -; LMULMAX8RV32ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV32ZVFHMIN-NEXT: lw a0, 8(a0) -; LMULMAX8RV32ZVFHMIN-NEXT: vmv.v.x v8, a2 -; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX8RV32ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX8RV32ZVFHMIN-NEXT: vadd.vv v8, v8, v8 -; LMULMAX8RV32ZVFHMIN-NEXT: vsra.vi v8, v8, 1 -; LMULMAX8RV32ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX8RV32ZVFHMIN-NEXT: vsext.vf2 v9, v8 -; LMULMAX8RV32ZVFHMIN-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX8RV32ZVFHMIN-NEXT: ret -; -; LMULMAX8RV64ZVFHMIN-LABEL: si2fp_v3i7_v3f32: -; LMULMAX8RV64ZVFHMIN: # %bb.0: -; LMULMAX8RV64ZVFHMIN-NEXT: ld a1, 8(a0) -; LMULMAX8RV64ZVFHMIN-NEXT: ld a2, 0(a0) -; LMULMAX8RV64ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV64ZVFHMIN-NEXT: ld a0, 16(a0) -; LMULMAX8RV64ZVFHMIN-NEXT: vmv.v.x v8, a2 -; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX8RV64ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX8RV64ZVFHMIN-NEXT: vadd.vv v8, v8, v8 -; LMULMAX8RV64ZVFHMIN-NEXT: vsra.vi v8, v8, 1 -; LMULMAX8RV64ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX8RV64ZVFHMIN-NEXT: vsext.vf2 v9, v8 -; LMULMAX8RV64ZVFHMIN-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX8RV64ZVFHMIN-NEXT: ret +; ZVFHMIN64-LABEL: si2fp_v3i7_v3f32: +; ZVFHMIN64: # %bb.0: +; ZVFHMIN64-NEXT: ld a1, 8(a0) +; ZVFHMIN64-NEXT: ld a2, 0(a0) +; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: ld a0, 16(a0) +; ZVFHMIN64-NEXT: vmv.v.x v8, a2 +; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1 +; ZVFHMIN64-NEXT: vadd.vv v8, v8, v8 +; ZVFHMIN64-NEXT: vsra.vi v8, v8, 1 +; ZVFHMIN64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN64-NEXT: vsext.vf2 v9, v8 +; ZVFHMIN64-NEXT: vfwcvt.f.x.v v8, v9 +; ZVFHMIN64-NEXT: ret %z = sitofp <3 x i7> %x to <3 x float> ret <3 x float> %z } ; FIXME: This gets expanded instead of widened + promoted define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { -; LMULMAX8RV32-LABEL: ui2fp_v3i7_v3f32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: lw a1, 4(a0) -; LMULMAX8RV32-NEXT: lw a2, 0(a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV32-NEXT: lw a0, 8(a0) -; LMULMAX8RV32-NEXT: vmv.v.x v8, a2 -; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX8RV32-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX8RV32-NEXT: li a0, 127 -; LMULMAX8RV32-NEXT: vand.vx v8, v8, a0 -; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX8RV32-NEXT: vzext.vf2 v9, v8 -; LMULMAX8RV32-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: ui2fp_v3i7_v3f32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: ld a1, 8(a0) -; LMULMAX8RV64-NEXT: ld a2, 0(a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV64-NEXT: ld a0, 16(a0) -; LMULMAX8RV64-NEXT: vmv.v.x v8, a2 -; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX8RV64-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX8RV64-NEXT: li a0, 127 -; LMULMAX8RV64-NEXT: vand.vx v8, v8, a0 -; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX8RV64-NEXT: vzext.vf2 v9, v8 -; LMULMAX8RV64-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: ui2fp_v3i7_v3f32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: lw a1, 4(a0) -; LMULMAX1RV32-NEXT: lw a2, 0(a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1RV32-NEXT: lw a0, 8(a0) -; LMULMAX1RV32-NEXT: vmv.v.x v8, a2 -; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX1RV32-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX1RV32-NEXT: li a0, 127 -; LMULMAX1RV32-NEXT: vand.vx v8, v8, a0 -; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1RV32-NEXT: vzext.vf2 v9, v8 -; LMULMAX1RV32-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX1RV32-NEXT: ret +; ZVFH32-LABEL: ui2fp_v3i7_v3f32: +; ZVFH32: # %bb.0: +; ZVFH32-NEXT: lw a1, 4(a0) +; ZVFH32-NEXT: lw a2, 0(a0) +; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH32-NEXT: lw a0, 8(a0) +; ZVFH32-NEXT: vmv.v.x v8, a2 +; ZVFH32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH32-NEXT: vslide1down.vx v8, v8, a0 +; ZVFH32-NEXT: vslidedown.vi v8, v8, 1 +; ZVFH32-NEXT: li a0, 127 +; ZVFH32-NEXT: vand.vx v8, v8, a0 +; ZVFH32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH32-NEXT: vzext.vf2 v9, v8 +; ZVFH32-NEXT: vfwcvt.f.xu.v v8, v9 +; ZVFH32-NEXT: ret ; -; LMULMAX1RV64-LABEL: ui2fp_v3i7_v3f32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: ld a1, 8(a0) -; LMULMAX1RV64-NEXT: ld a2, 0(a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1RV64-NEXT: ld a0, 16(a0) -; LMULMAX1RV64-NEXT: vmv.v.x v8, a2 -; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX1RV64-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX1RV64-NEXT: li a0, 127 -; LMULMAX1RV64-NEXT: vand.vx v8, v8, a0 -; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1RV64-NEXT: vzext.vf2 v9, v8 -; LMULMAX1RV64-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX1RV64-NEXT: ret +; ZVFH64-LABEL: ui2fp_v3i7_v3f32: +; ZVFH64: # %bb.0: +; ZVFH64-NEXT: ld a1, 8(a0) +; ZVFH64-NEXT: ld a2, 0(a0) +; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFH64-NEXT: ld a0, 16(a0) +; ZVFH64-NEXT: vmv.v.x v8, a2 +; ZVFH64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFH64-NEXT: vslide1down.vx v8, v8, a0 +; ZVFH64-NEXT: vslidedown.vi v8, v8, 1 +; ZVFH64-NEXT: li a0, 127 +; ZVFH64-NEXT: vand.vx v8, v8, a0 +; ZVFH64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH64-NEXT: vzext.vf2 v9, v8 +; ZVFH64-NEXT: vfwcvt.f.xu.v v8, v9 +; ZVFH64-NEXT: ret ; -; LMULMAX8RV32ZVFHMIN-LABEL: ui2fp_v3i7_v3f32: -; LMULMAX8RV32ZVFHMIN: # %bb.0: -; LMULMAX8RV32ZVFHMIN-NEXT: lw a1, 4(a0) -; LMULMAX8RV32ZVFHMIN-NEXT: lw a2, 0(a0) -; LMULMAX8RV32ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV32ZVFHMIN-NEXT: lw a0, 8(a0) -; LMULMAX8RV32ZVFHMIN-NEXT: vmv.v.x v8, a2 -; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX8RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX8RV32ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX8RV32ZVFHMIN-NEXT: li a0, 127 -; LMULMAX8RV32ZVFHMIN-NEXT: vand.vx v8, v8, a0 -; LMULMAX8RV32ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX8RV32ZVFHMIN-NEXT: vzext.vf2 v9, v8 -; LMULMAX8RV32ZVFHMIN-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX8RV32ZVFHMIN-NEXT: ret +; ZVFHMIN32-LABEL: ui2fp_v3i7_v3f32: +; ZVFHMIN32: # %bb.0: +; ZVFHMIN32-NEXT: lw a1, 4(a0) +; ZVFHMIN32-NEXT: lw a2, 0(a0) +; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: lw a0, 8(a0) +; ZVFHMIN32-NEXT: vmv.v.x v8, a2 +; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1 +; ZVFHMIN32-NEXT: li a0, 127 +; ZVFHMIN32-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN32-NEXT: vzext.vf2 v9, v8 +; ZVFHMIN32-NEXT: vfwcvt.f.xu.v v8, v9 +; ZVFHMIN32-NEXT: ret ; -; LMULMAX8RV64ZVFHMIN-LABEL: ui2fp_v3i7_v3f32: -; LMULMAX8RV64ZVFHMIN: # %bb.0: -; LMULMAX8RV64ZVFHMIN-NEXT: ld a1, 8(a0) -; LMULMAX8RV64ZVFHMIN-NEXT: ld a2, 0(a0) -; LMULMAX8RV64ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV64ZVFHMIN-NEXT: ld a0, 16(a0) -; LMULMAX8RV64ZVFHMIN-NEXT: vmv.v.x v8, a2 -; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1 -; LMULMAX8RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; LMULMAX8RV64ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX8RV64ZVFHMIN-NEXT: li a0, 127 -; LMULMAX8RV64ZVFHMIN-NEXT: vand.vx v8, v8, a0 -; LMULMAX8RV64ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX8RV64ZVFHMIN-NEXT: vzext.vf2 v9, v8 -; LMULMAX8RV64ZVFHMIN-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX8RV64ZVFHMIN-NEXT: ret +; ZVFHMIN64-LABEL: ui2fp_v3i7_v3f32: +; ZVFHMIN64: # %bb.0: +; ZVFHMIN64-NEXT: ld a1, 8(a0) +; ZVFHMIN64-NEXT: ld a2, 0(a0) +; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: ld a0, 16(a0) +; ZVFHMIN64-NEXT: vmv.v.x v8, a2 +; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1 +; ZVFHMIN64-NEXT: li a0, 127 +; ZVFHMIN64-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN64-NEXT: vzext.vf2 v9, v8 +; ZVFHMIN64-NEXT: vfwcvt.f.xu.v v8, v9 +; ZVFHMIN64-NEXT: ret %z = uitofp <3 x i7> %x to <3 x float> ret <3 x float> %z } @@ -357,26 +287,13 @@ define <3 x float> @ui2fp_v3i1_v3f32(<3 x i1> %x) { } define void @si2fp_v8i32_v8f32(ptr %x, ptr %y) { -; LMULMAX8-LABEL: si2fp_v8i32_v8f32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX8-NEXT: vse32.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: si2fp_v8i32_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vle32.v v8, (a2) -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX1-NEXT: vfcvt.f.x.v v9, v9 -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: si2fp_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.f.x.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %d = sitofp <8 x i32> %a to <8 x float> store <8 x float> %d, ptr %y @@ -384,26 +301,13 @@ define void @si2fp_v8i32_v8f32(ptr %x, ptr %y) { } define void @ui2fp_v8i32_v8f32(ptr %x, ptr %y) { -; LMULMAX8-LABEL: ui2fp_v8i32_v8f32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-NEXT: vse32.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: ui2fp_v8i32_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vle32.v v8, (a2) -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX1-NEXT: vfcvt.f.xu.v v9, v9 -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ui2fp_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %d = uitofp <8 x i32> %a to <8 x float> store <8 x float> %d, ptr %y @@ -411,61 +315,25 @@ define void @ui2fp_v8i32_v8f32(ptr %x, ptr %y) { } define <8 x float> @si2fp_v8i1_v8f32(<8 x i1> %x) { -; LMULMAX8-LABEL: si2fp_v8i1_v8f32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vmerge.vim v10, v8, -1, v0 -; LMULMAX8-NEXT: vfwcvt.f.x.v v8, v10 -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: si2fp_v8i1_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v10, v9, -1, v0 -; LMULMAX1-NEXT: vfwcvt.f.x.v v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1-NEXT: vmerge.vim v10, v9, -1, v0 -; LMULMAX1-NEXT: vfwcvt.f.x.v v9, v10 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: si2fp_v8i1_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v10, v8, -1, v0 +; CHECK-NEXT: vfwcvt.f.x.v v8, v10 +; CHECK-NEXT: ret %z = sitofp <8 x i1> %x to <8 x float> ret <8 x float> %z } define <8 x float> @ui2fp_v8i1_v8f32(<8 x i1> %x) { -; LMULMAX8-LABEL: ui2fp_v8i1_v8f32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vmerge.vim v10, v8, 1, v0 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v8, v10 -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: ui2fp_v8i1_v8f32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v9, v10 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ui2fp_v8i1_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v10 +; CHECK-NEXT: ret %z = uitofp <8 x i1> %x to <8 x float> ret <8 x float> %z } @@ -501,44 +369,14 @@ define void @ui2fp_v2i16_v2f64(ptr %x, ptr %y) { } define void @si2fp_v8i16_v8f64(ptr %x, ptr %y) { -; LMULMAX8-LABEL: si2fp_v8i16_v8f64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vsext.vf2 v10, v8 -; LMULMAX8-NEXT: vfwcvt.f.x.v v12, v10 -; LMULMAX8-NEXT: vse64.v v12, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: si2fp_v8i16_v8f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v10, v9 -; LMULMAX1-NEXT: vfwcvt.f.x.v v9, v10 -; LMULMAX1-NEXT: vsext.vf2 v10, v8 -; LMULMAX1-NEXT: vfwcvt.f.x.v v11, v10 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v10, v8 -; LMULMAX1-NEXT: vfwcvt.f.x.v v12, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vsext.vf2 v10, v8 -; LMULMAX1-NEXT: vfwcvt.f.x.v v8, v10 -; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vse64.v v12, (a0) -; LMULMAX1-NEXT: vse64.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vse64.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: si2fp_v8i16_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vfwcvt.f.x.v v12, v10 +; CHECK-NEXT: vse64.v v12, (a1) +; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x %d = sitofp <8 x i16> %a to <8 x double> store <8 x double> %d, ptr %y @@ -546,44 +384,14 @@ define void @si2fp_v8i16_v8f64(ptr %x, ptr %y) { } define void @ui2fp_v8i16_v8f64(ptr %x, ptr %y) { -; LMULMAX8-LABEL: ui2fp_v8i16_v8f64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle16.v v8, (a0) -; LMULMAX8-NEXT: vzext.vf2 v10, v8 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX8-NEXT: vse64.v v12, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: ui2fp_v8i16_v8f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v10, v9 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v9, v10 -; LMULMAX1-NEXT: vzext.vf2 v10, v8 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v11, v10 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v10, v8 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v12, v10 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vzext.vf2 v10, v8 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v8, v10 -; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vse64.v v12, (a0) -; LMULMAX1-NEXT: vse64.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vse64.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ui2fp_v8i16_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v12, v10 +; CHECK-NEXT: vse64.v v12, (a1) +; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x %d = uitofp <8 x i16> %a to <8 x double> store <8 x double> %d, ptr %y @@ -591,103 +399,25 @@ define void @ui2fp_v8i16_v8f64(ptr %x, ptr %y) { } define <8 x double> @si2fp_v8i1_v8f64(<8 x i1> %x) { -; LMULMAX8-LABEL: si2fp_v8i1_v8f64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vmerge.vim v12, v8, -1, v0 -; LMULMAX8-NEXT: vfwcvt.f.x.v v8, v12 -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: si2fp_v8i1_v8f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vmv1r.v v10, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v11, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v11, -1, v0 -; LMULMAX1-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vmerge.vim v13, v11, -1, v0 -; LMULMAX1-NEXT: vfwcvt.f.x.v v9, v13 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v13, 0 -; LMULMAX1-NEXT: vmv1r.v v0, v10 -; LMULMAX1-NEXT: vmerge.vim v10, v13, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vmerge.vim v13, v11, -1, v0 -; LMULMAX1-NEXT: vfwcvt.f.x.v v10, v13 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmerge.vim v12, v12, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v12, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vmerge.vim v12, v11, -1, v0 -; LMULMAX1-NEXT: vfwcvt.f.x.v v11, v12 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: si2fp_v8i1_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v12, v8, -1, v0 +; CHECK-NEXT: vfwcvt.f.x.v v8, v12 +; CHECK-NEXT: ret %z = sitofp <8 x i1> %x to <8 x double> ret <8 x double> %z } define <8 x double> @ui2fp_v8i1_v8f64(<8 x i1> %x) { -; LMULMAX8-LABEL: ui2fp_v8i1_v8f64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX8-NEXT: vfwcvt.f.xu.v v8, v12 -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: ui2fp_v8i1_v8f64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vmv1r.v v10, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v11, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v11, 1, v0 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v9, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vmerge.vim v13, v11, 1, v0 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v9, v13 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v13, 0 -; LMULMAX1-NEXT: vmv1r.v v0, v10 -; LMULMAX1-NEXT: vmerge.vim v10, v13, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vmerge.vim v13, v11, 1, v0 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v10, v13 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmerge.vim v12, v12, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v12, v12, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vmerge.vim v12, v11, 1, v0 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v11, v12 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ui2fp_v8i1_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v12 +; CHECK-NEXT: ret %z = uitofp <8 x i1> %x to <8 x double> ret <8 x double> %z } @@ -725,107 +455,57 @@ define void @ui2fp_v2i64_v2f16(ptr %x, ptr %y) { } define <2 x half> @si2fp_v2i1_v2f16(<2 x i1> %x) { -; LMULMAX8ZVFH-LABEL: si2fp_v2i1_v2f16: -; LMULMAX8ZVFH: # %bb.0: -; LMULMAX8ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8ZVFH-NEXT: vmv.v.i v8, 0 -; LMULMAX8ZVFH-NEXT: vmerge.vim v9, v8, -1, v0 -; LMULMAX8ZVFH-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX8ZVFH-NEXT: ret -; -; LMULMAX1-LABEL: si2fp_v2i1_v2f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v8, -1, v0 -; LMULMAX1-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX1-NEXT: ret +; ZVFH-LABEL: si2fp_v2i1_v2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; ZVFH-NEXT: vmv.v.i v8, 0 +; ZVFH-NEXT: vmerge.vim v9, v8, -1, v0 +; ZVFH-NEXT: vfwcvt.f.x.v v8, v9 +; ZVFH-NEXT: ret ; -; LMULMAX8ZVFHMIN-LABEL: si2fp_v2i1_v2f16: -; LMULMAX8ZVFHMIN: # %bb.0: -; LMULMAX8ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX8ZVFHMIN-NEXT: vmv.v.i v8, 0 -; LMULMAX8ZVFHMIN-NEXT: vmerge.vim v8, v8, -1, v0 -; LMULMAX8ZVFHMIN-NEXT: vfwcvt.f.x.v v9, v8 -; LMULMAX8ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 -; LMULMAX8ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: si2fp_v2i1_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.i v8, 0 +; ZVFHMIN-NEXT: vmerge.vim v8, v8, -1, v0 +; ZVFHMIN-NEXT: vfwcvt.f.x.v v9, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %z = sitofp <2 x i1> %x to <2 x half> ret <2 x half> %z } define <2 x half> @ui2fp_v2i1_v2f16(<2 x i1> %x) { -; LMULMAX8ZVFH-LABEL: ui2fp_v2i1_v2f16: -; LMULMAX8ZVFH: # %bb.0: -; LMULMAX8ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8ZVFH-NEXT: vmv.v.i v8, 0 -; LMULMAX8ZVFH-NEXT: vmerge.vim v9, v8, 1, v0 -; LMULMAX8ZVFH-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX8ZVFH-NEXT: ret +; ZVFH-LABEL: ui2fp_v2i1_v2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; ZVFH-NEXT: vmv.v.i v8, 0 +; ZVFH-NEXT: vmerge.vim v9, v8, 1, v0 +; ZVFH-NEXT: vfwcvt.f.xu.v v8, v9 +; ZVFH-NEXT: ret ; -; LMULMAX1-LABEL: ui2fp_v2i1_v2f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v8, 1, v0 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX1-NEXT: ret -; -; LMULMAX8ZVFHMIN-LABEL: ui2fp_v2i1_v2f16: -; LMULMAX8ZVFHMIN: # %bb.0: -; LMULMAX8ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; LMULMAX8ZVFHMIN-NEXT: vmv.v.i v8, 0 -; LMULMAX8ZVFHMIN-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX8ZVFHMIN-NEXT: vfwcvt.f.xu.v v9, v8 -; LMULMAX8ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 -; LMULMAX8ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: ui2fp_v2i1_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.i v8, 0 +; ZVFHMIN-NEXT: vmerge.vim v8, v8, 1, v0 +; ZVFHMIN-NEXT: vfwcvt.f.xu.v v9, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %z = uitofp <2 x i1> %x to <2 x half> ret <2 x half> %z } define void @si2fp_v8i64_v8f16(ptr %x, ptr %y) { -; LMULMAX8-LABEL: si2fp_v8i64_v8f16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vfncvt.f.x.w v12, v8 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX8-NEXT: vfncvt.f.f.w v8, v12 -; LMULMAX8-NEXT: vse16.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: si2fp_v8i64_v8f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a2, a0, 48 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle64.v v8, (a2) -; LMULMAX1-NEXT: addi a2, a0, 32 -; LMULMAX1-NEXT: vle64.v v9, (a0) -; LMULMAX1-NEXT: vle64.v v10, (a2) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle64.v v11, (a0) -; LMULMAX1-NEXT: vfncvt.f.x.w v12, v9 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v12 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.f.x.w v12, v11 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.f.x.w v11, v10 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.f.x.w v10, v8 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 -; LMULMAX1-NEXT: vse16.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: si2fp_v8i64_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfncvt.f.x.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v12 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i64>, ptr %x %d = sitofp <8 x i64> %a to <8 x half> store <8 x half> %d, ptr %y @@ -833,49 +513,15 @@ define void @si2fp_v8i64_v8f16(ptr %x, ptr %y) { } define void @ui2fp_v8i64_v8f16(ptr %x, ptr %y) { -; LMULMAX8-LABEL: ui2fp_v8i64_v8f16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vfncvt.f.xu.w v12, v8 -; LMULMAX8-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; LMULMAX8-NEXT: vfncvt.f.f.w v8, v12 -; LMULMAX8-NEXT: vse16.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX1-LABEL: ui2fp_v8i64_v8f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a2, a0, 48 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle64.v v8, (a2) -; LMULMAX1-NEXT: addi a2, a0, 32 -; LMULMAX1-NEXT: vle64.v v9, (a0) -; LMULMAX1-NEXT: vle64.v v10, (a2) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle64.v v11, (a0) -; LMULMAX1-NEXT: vfncvt.f.xu.w v12, v9 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v12 -; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.f.xu.w v12, v11 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v11, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.f.xu.w v11, v10 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v10, v11 -; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 -; LMULMAX1-NEXT: vse16.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: ui2fp_v8i64_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfncvt.f.xu.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v12 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i64>, ptr %x %d = uitofp <8 x i64> %a to <8 x half> store <8 x half> %d, ptr %y @@ -883,59 +529,43 @@ define void @ui2fp_v8i64_v8f16(ptr %x, ptr %y) { } define <8 x half> @si2fp_v8i1_v8f16(<8 x i1> %x) { -; LMULMAX8ZVFH-LABEL: si2fp_v8i1_v8f16: -; LMULMAX8ZVFH: # %bb.0: -; LMULMAX8ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX8ZVFH-NEXT: vmv.v.i v8, 0 -; LMULMAX8ZVFH-NEXT: vmerge.vim v9, v8, -1, v0 -; LMULMAX8ZVFH-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX8ZVFH-NEXT: ret -; -; LMULMAX1-LABEL: si2fp_v8i1_v8f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v8, -1, v0 -; LMULMAX1-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX1-NEXT: ret +; ZVFH-LABEL: si2fp_v8i1_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVFH-NEXT: vmv.v.i v8, 0 +; ZVFH-NEXT: vmerge.vim v9, v8, -1, v0 +; ZVFH-NEXT: vfwcvt.f.x.v v8, v9 +; ZVFH-NEXT: ret ; -; LMULMAX8ZVFHMIN-LABEL: si2fp_v8i1_v8f16: -; LMULMAX8ZVFHMIN: # %bb.0: -; LMULMAX8ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8ZVFHMIN-NEXT: vmv.v.i v8, 0 -; LMULMAX8ZVFHMIN-NEXT: vmerge.vim v8, v8, -1, v0 -; LMULMAX8ZVFHMIN-NEXT: vfwcvt.f.x.v v10, v8 -; LMULMAX8ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 -; LMULMAX8ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: si2fp_v8i1_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.i v8, 0 +; ZVFHMIN-NEXT: vmerge.vim v8, v8, -1, v0 +; ZVFHMIN-NEXT: vfwcvt.f.x.v v10, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %z = sitofp <8 x i1> %x to <8 x half> ret <8 x half> %z } define <8 x half> @ui2fp_v8i1_v8f16(<8 x i1> %x) { -; LMULMAX8ZVFH-LABEL: ui2fp_v8i1_v8f16: -; LMULMAX8ZVFH: # %bb.0: -; LMULMAX8ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX8ZVFH-NEXT: vmv.v.i v8, 0 -; LMULMAX8ZVFH-NEXT: vmerge.vim v9, v8, 1, v0 -; LMULMAX8ZVFH-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX8ZVFH-NEXT: ret -; -; LMULMAX1-LABEL: ui2fp_v8i1_v8f16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v8, 1, v0 -; LMULMAX1-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX1-NEXT: ret +; ZVFH-LABEL: ui2fp_v8i1_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVFH-NEXT: vmv.v.i v8, 0 +; ZVFH-NEXT: vmerge.vim v9, v8, 1, v0 +; ZVFH-NEXT: vfwcvt.f.xu.v v8, v9 +; ZVFH-NEXT: ret ; -; LMULMAX8ZVFHMIN-LABEL: ui2fp_v8i1_v8f16: -; LMULMAX8ZVFHMIN: # %bb.0: -; LMULMAX8ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8ZVFHMIN-NEXT: vmv.v.i v8, 0 -; LMULMAX8ZVFHMIN-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX8ZVFHMIN-NEXT: vfwcvt.f.xu.v v10, v8 -; LMULMAX8ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 -; LMULMAX8ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: ui2fp_v8i1_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.i v8, 0 +; ZVFHMIN-NEXT: vmerge.vim v8, v8, 1, v0 +; ZVFHMIN-NEXT: vfwcvt.f.xu.v v10, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %z = uitofp <8 x i1> %x to <8 x half> ret <8 x half> %z } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 5d045877e5229e..efb1f720f2d096 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -1,13 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @insert_nxv8i32_v2i32_0( %vec, ptr %svp) { ; CHECK-LABEL: insert_nxv8i32_v2i32_0: @@ -49,50 +45,26 @@ define @insert_nxv8i32_v2i32_6( %vec, ptr % } define @insert_nxv8i32_v8i32_0( %vec, ptr %svp) { -; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v12, (a0) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m4, tu, ma -; LMULMAX2-NEXT: vmv.v.v v8, v12 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v16, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, ma -; LMULMAX1-NEXT: vmv.v.v v8, v12 -; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v16, 4 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: insert_nxv8i32_v8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret %sv = load <8 x i32>, ptr %svp %v = call @llvm.vector.insert.v8i32.nxv8i32( %vec, <8 x i32> %sv, i64 0) ret %v } define @insert_nxv8i32_v8i32_8( %vec, ptr %svp) { -; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v12, (a0) -; LMULMAX2-NEXT: vsetivli zero, 16, e32, m4, tu, ma -; LMULMAX2-NEXT: vslideup.vi v8, v12, 8 -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v12, (a1) -; LMULMAX1-NEXT: vle32.v v16, (a0) -; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v16, 8 -; LMULMAX1-NEXT: vsetivli zero, 16, e32, m4, tu, ma -; LMULMAX1-NEXT: vslideup.vi v8, v12, 12 -; LMULMAX1-NEXT: ret +; CHECK-LABEL: insert_nxv8i32_v8i32_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vsetivli zero, 16, e32, m4, tu, ma +; CHECK-NEXT: vslideup.vi v8, v12, 8 +; CHECK-NEXT: ret %sv = load <8 x i32>, ptr %svp %v = call @llvm.vector.insert.v8i32.nxv8i32( %vec, <8 x i32> %sv, i64 8) ret %v @@ -160,29 +132,17 @@ define void @insert_v4i32_undef_v2i32_0(ptr %vp, ptr %svp) { } define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) { -; LMULMAX2-LABEL: insert_v8i32_v2i32_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a1) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v10, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; LMULMAX2-NEXT: vmv.v.v v10, v8 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vse32.v v10, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_v8i32_v2i32_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a1) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; LMULMAX1-NEXT: vmv.v.v v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: insert_v8i32_v2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-NEXT: vmv.v.v v10, v8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret %sv = load <2 x i32>, ptr %svp %vec = load <8 x i32>, ptr %vp %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 0) @@ -191,27 +151,17 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) { } define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { -; LMULMAX2-LABEL: insert_v8i32_v2i32_2: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a1) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v10, (a0) -; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; LMULMAX2-NEXT: vslideup.vi v10, v8, 2 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vse32.v v10, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_v8i32_v2i32_2: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a1) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: insert_v8i32_v2i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vslideup.vi v10, v8, 2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret %sv = load <2 x i32>, ptr %svp %vec = load <8 x i32>, ptr %vp %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 2) @@ -220,26 +170,15 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { } define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) { -; LMULMAX2-LABEL: insert_v8i32_v2i32_6: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a1) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v10, (a0) -; LMULMAX2-NEXT: vslideup.vi v10, v8, 6 -; LMULMAX2-NEXT: vse32.v v10, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_v8i32_v2i32_6: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a1) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: insert_v8i32_v2i32_6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret %sv = load <2 x i32>, ptr %svp %vec = load <8 x i32>, ptr %vp %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 6) @@ -248,24 +187,14 @@ define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) { } define void @insert_v8i32_undef_v2i32_6(ptr %vp, ptr %svp) { -; LMULMAX2-LABEL: insert_v8i32_undef_v2i32_6: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a1) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vslideup.vi v10, v8, 6 -; LMULMAX2-NEXT: vse32.v v10, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_v8i32_undef_v2i32_6: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a1) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: insert_v8i32_undef_v2i32_6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vse32.v v10, (a0) +; CHECK-NEXT: ret %sv = load <2 x i32>, ptr %svp %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6) store <8 x i32> %v, ptr %vp @@ -310,30 +239,18 @@ define void @insert_v4i16_v2i16_2(ptr %vp, ptr %svp) { } define void @insert_v32i1_v8i1_0(ptr %vp, ptr %svp) { -; LMULMAX2-LABEL: insert_v32i1_v8i1_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vlm.v v9, (a1) -; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf4, tu, ma -; LMULMAX2-NEXT: vmv.v.v v8, v9 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vsm.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_v32i1_v8i1_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vlm.v v9, (a1) -; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, tu, ma -; LMULMAX1-NEXT: vmv.v.v v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vsm.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: insert_v32i1_v8i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlm.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 1, e8, mf4, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: ret %v = load <32 x i1>, ptr %vp %sv = load <8 x i1>, ptr %svp %c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 0) @@ -342,31 +259,18 @@ define void @insert_v32i1_v8i1_0(ptr %vp, ptr %svp) { } define void @insert_v32i1_v8i1_16(ptr %vp, ptr %svp) { -; LMULMAX2-LABEL: insert_v32i1_v8i1_16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vlm.v v9, (a1) -; LMULMAX2-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; LMULMAX2-NEXT: vslideup.vi v8, v9, 2 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vsm.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: insert_v32i1_v8i1_16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, a0, 2 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vlm.v v9, (a1) -; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, tu, ma -; LMULMAX1-NEXT: vmv.v.v v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vsm.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: insert_v32i1_v8i1_16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlm.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 3, e8, mf4, tu, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: ret %v = load <32 x i1>, ptr %vp %sv = load <8 x i1>, ptr %svp %c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll index 2c3bc2ef4fe564..c65f6e5fa7866f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll @@ -1,10 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s define void @sext_v4i8_v4i32(ptr %x, ptr %z) { ; CHECK-LABEL: sext_v4i8_v4i32: @@ -35,36 +31,13 @@ define void @zext_v4i8_v4i32(ptr %x, ptr %z) { } define void @sext_v8i8_v8i32(ptr %x, ptr %z) { -; LMULMAX8-LABEL: sext_v8i8_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vsext.vf4 v10, v8 -; LMULMAX8-NEXT: vse32.v v10, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: sext_v8i8_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vsext.vf4 v10, v8 -; LMULMAX2-NEXT: vse32.v v10, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: sext_v8i8_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vle8.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v10, v8 -; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: sext_v8i8_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vse32.v v10, (a1) +; CHECK-NEXT: ret %a = load <8 x i8>, ptr %x %b = sext <8 x i8> %a to <8 x i32> store <8 x i32> %b, ptr %z @@ -72,90 +45,14 @@ define void @sext_v8i8_v8i32(ptr %x, ptr %z) { } define void @sext_v32i8_v32i32(ptr %x, ptr %z) { -; LMULMAX8-LABEL: sext_v32i8_v32i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a2, 32 -; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; LMULMAX8-NEXT: vle8.v v8, (a0) -; LMULMAX8-NEXT: vsext.vf4 v16, v8 -; LMULMAX8-NEXT: vse32.v v16, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: sext_v32i8_v32i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v10, v8, 8 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vsext.vf4 v12, v10 -; LMULMAX2-NEXT: vsext.vf4 v10, v8 -; LMULMAX2-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 16 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vsext.vf4 v14, v8 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 8 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vsext.vf4 v16, v8 -; LMULMAX2-NEXT: addi a0, a1, 96 -; LMULMAX2-NEXT: vse32.v v16, (a0) -; LMULMAX2-NEXT: addi a0, a1, 64 -; LMULMAX2-NEXT: vse32.v v14, (a0) -; LMULMAX2-NEXT: vse32.v v10, (a1) -; LMULMAX2-NEXT: addi a0, a1, 32 -; LMULMAX2-NEXT: vse32.v v12, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: sext_v32i8_v32i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vle8.v v8, (a2) -; LMULMAX1-NEXT: vle8.v v9, (a0) -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v11, v10 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v10, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v12, v10 -; LMULMAX1-NEXT: vsext.vf4 v10, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 8 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v13, v8 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v14, v8 -; LMULMAX1-NEXT: vsext.vf4 v8, v9 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v9, 8 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v15, v9 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v9, v9, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vsext.vf4 v16, v9 -; LMULMAX1-NEXT: addi a0, a1, 48 -; LMULMAX1-NEXT: vse32.v v16, (a0) -; LMULMAX1-NEXT: addi a0, a1, 32 -; LMULMAX1-NEXT: vse32.v v15, (a0) -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: addi a0, a1, 112 -; LMULMAX1-NEXT: vse32.v v14, (a0) -; LMULMAX1-NEXT: addi a0, a1, 96 -; LMULMAX1-NEXT: vse32.v v13, (a0) -; LMULMAX1-NEXT: addi a0, a1, 64 -; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, a1, 16 -; LMULMAX1-NEXT: vse32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, a1, 80 -; LMULMAX1-NEXT: vse32.v v11, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: sext_v32i8_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vse32.v v16, (a1) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = sext <32 x i8> %a to <32 x i32> store <32 x i32> %b, ptr %z @@ -179,43 +76,15 @@ define void @trunc_v4i8_v4i32(ptr %x, ptr %z) { } define void @trunc_v8i8_v8i32(ptr %x, ptr %z) { -; LMULMAX8-LABEL: trunc_v8i8_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-NEXT: vle32.v v8, (a0) -; LMULMAX8-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX8-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; LMULMAX8-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX8-NEXT: vse8.v v8, (a1) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: trunc_v8i8_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vnsrl.wi v10, v8, 0 -; LMULMAX2-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; LMULMAX2-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX2-NEXT: vse8.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: trunc_v8i8_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a0) -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: trunc_v8i8_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = trunc <8 x i32> %a to <8 x i8> store <8 x i8> %b, ptr %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll index df7a989859eebf..60202cfba760d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -1,10 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @splat_v16i8(ptr %x, i8 %y) { ; CHECK-LABEL: splat_v16i8: @@ -46,65 +42,25 @@ define void @splat_v4i32(ptr %x, i32 %y) { } define void @splat_v2i64(ptr %x, i64 %y) { -; LMULMAX8-RV32-LABEL: splat_v2i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: addi sp, sp, -16 -; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8-RV32-NEXT: sw a2, 12(sp) -; LMULMAX8-RV32-NEXT: sw a1, 8(sp) -; LMULMAX8-RV32-NEXT: addi a1, sp, 8 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vlse64.v v8, (a1), zero -; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV32-NEXT: addi sp, sp, 16 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: splat_v2i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -16 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX2-RV32-NEXT: sw a2, 12(sp) -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vlse64.v v8, (a1), zero -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi sp, sp, 16 -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -16 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1-RV32-NEXT: sw a2, 12(sp) -; LMULMAX1-RV32-NEXT: sw a1, 8(sp) -; LMULMAX1-RV32-NEXT: addi a1, sp, 8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vlse64.v v8, (a1), zero -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi sp, sp, 16 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: splat_v2i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV64-NEXT: vmv.v.x v8, a1 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_v2i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: splat_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: splat_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret %a = insertelement <2 x i64> poison, i64 %y, i32 0 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer store <2 x i64> %b, ptr %x @@ -112,30 +68,13 @@ define void @splat_v2i64(ptr %x, i64 %y) { } define void @splat_v32i8(ptr %x, i8 %y) { -; LMULMAX8-LABEL: splat_v32i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a2, 32 -; LMULMAX8-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.x v8, a1 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.x v8, a1 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.x v8, a1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <32 x i8> poison, i8 %y, i32 0 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer store <32 x i8> %b, ptr %x @@ -143,28 +82,12 @@ define void @splat_v32i8(ptr %x, i8 %y) { } define void @splat_v16i16(ptr %x, i16 %y) { -; LMULMAX8-LABEL: splat_v16i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.x v8, a1 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.x v8, a1 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.x v8, a1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <16 x i16> poison, i16 %y, i32 0 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer store <16 x i16> %b, ptr %x @@ -172,28 +95,12 @@ define void @splat_v16i16(ptr %x, i16 %y) { } define void @splat_v8i32(ptr %x, i32 %y) { -; LMULMAX8-LABEL: splat_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.x v8, a1 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.x v8, a1 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.x v8, a1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <8 x i32> poison, i32 %y, i32 0 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer store <8 x i32> %b, ptr %x @@ -201,65 +108,25 @@ define void @splat_v8i32(ptr %x, i32 %y) { } define void @splat_v4i64(ptr %x, i64 %y) { -; LMULMAX8-RV32-LABEL: splat_v4i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: addi sp, sp, -16 -; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8-RV32-NEXT: sw a2, 12(sp) -; LMULMAX8-RV32-NEXT: sw a1, 8(sp) -; LMULMAX8-RV32-NEXT: addi a1, sp, 8 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vlse64.v v8, (a1), zero -; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV32-NEXT: addi sp, sp, 16 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: splat_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -16 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX2-RV32-NEXT: sw a2, 12(sp) -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vlse64.v v8, (a1), zero -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi sp, sp, 16 -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 -; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 -; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: splat_v4i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV64-NEXT: vmv.v.x v8, a1 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1 -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: splat_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: splat_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret %a = insertelement <4 x i64> poison, i64 %y, i32 0 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer store <4 x i64> %b, ptr %x @@ -319,30 +186,13 @@ define void @splat_zero_v2i64(ptr %x) { } define void @splat_zero_v32i8(ptr %x) { -; LMULMAX8-LABEL: splat_zero_v32i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_zero_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_zero_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_zero_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <32 x i8> poison, i8 0, i32 0 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer store <32 x i8> %b, ptr %x @@ -350,28 +200,12 @@ define void @splat_zero_v32i8(ptr %x) { } define void @splat_zero_v16i16(ptr %x) { -; LMULMAX8-LABEL: splat_zero_v16i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_zero_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_zero_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_zero_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <16 x i16> poison, i16 0, i32 0 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer store <16 x i16> %b, ptr %x @@ -379,28 +213,12 @@ define void @splat_zero_v16i16(ptr %x) { } define void @splat_zero_v8i32(ptr %x) { -; LMULMAX8-LABEL: splat_zero_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_zero_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_zero_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_zero_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <8 x i32> poison, i32 0, i32 0 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer store <8 x i32> %b, ptr %x @@ -408,37 +226,12 @@ define void @splat_zero_v8i32(ptr %x) { } define void @splat_zero_v4i64(ptr %x) { -; LMULMAX8-LABEL: splat_zero_v4i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, 0 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_zero_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_zero_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_zero_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_zero_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <4 x i64> poison, i64 0, i32 0 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer store <4 x i64> %b, ptr %x @@ -467,81 +260,33 @@ define void @splat_zero_v2i16_unaligned(ptr %p) { } define void @splat_zero_v4i16(ptr %p) { -; LMULMAX8-RV32-LABEL: splat_zero_v4i16: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: splat_zero_v4i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_zero_v4i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: splat_zero_v4i16: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: sd zero, 0(a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_zero_v4i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: sd zero, 0(a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_zero_v4i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: sd zero, 0(a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: splat_zero_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vse16.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: splat_zero_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: ret store <4 x i16> zeroinitializer, ptr %p ret void } define void @splat_zero_v2i32(ptr %p) { -; LMULMAX8-RV32-LABEL: splat_zero_v2i32: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: splat_zero_v2i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_zero_v2i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: splat_zero_v2i32: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: sd zero, 0(a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_zero_v2i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: sd zero, 0(a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_zero_v2i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: sd zero, 0(a0) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: splat_zero_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: splat_zero_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: ret store <2 x i32> zeroinitializer, ptr %p ret void } @@ -612,30 +357,13 @@ define void @splat_allones_v2i64(ptr %x) { } define void @splat_allones_v32i8(ptr %x) { -; LMULMAX8-LABEL: splat_allones_v32i8: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a1, 32 -; LMULMAX8-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, -1 -; LMULMAX8-NEXT: vse8.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_allones_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, -1 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_allones_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_allones_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <32 x i8> poison, i8 -1, i32 0 %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer store <32 x i8> %b, ptr %x @@ -643,28 +371,12 @@ define void @splat_allones_v32i8(ptr %x) { } define void @splat_allones_v16i16(ptr %x) { -; LMULMAX8-LABEL: splat_allones_v16i16: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, -1 -; LMULMAX8-NEXT: vse16.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_allones_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, -1 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_allones_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_allones_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <16 x i16> poison, i16 -1, i32 0 %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer store <16 x i16> %b, ptr %x @@ -672,28 +384,12 @@ define void @splat_allones_v16i16(ptr %x) { } define void @splat_allones_v8i32(ptr %x) { -; LMULMAX8-LABEL: splat_allones_v8i32: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, -1 -; LMULMAX8-NEXT: vse32.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_allones_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, -1 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: splat_allones_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: splat_allones_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <8 x i32> poison, i32 -1, i32 0 %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer store <8 x i32> %b, ptr %x @@ -701,37 +397,12 @@ define void @splat_allones_v8i32(ptr %x) { } define void @splat_allones_v4i64(ptr %x) { -; LMULMAX8-LABEL: splat_allones_v4i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-NEXT: vmv.v.i v8, -1 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_allones_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, -1 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_allones_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, -1 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_allones_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.i v8, -1 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_allones_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = insertelement <4 x i64> poison, i64 -1, i32 0 %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer store <4 x i64> %b, ptr %x @@ -743,48 +414,13 @@ define void @splat_allones_v4i64(ptr %x) { ; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x ; with SEW=64 on RV32. define void @splat_allones_with_use_v4i64(ptr %x) { -; LMULMAX8-LABEL: splat_allones_with_use_v4i64: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-NEXT: vle64.v v8, (a0) -; LMULMAX8-NEXT: vadd.vi v8, v8, -1 -; LMULMAX8-NEXT: vse64.v v8, (a0) -; LMULMAX8-NEXT: ret -; -; LMULMAX2-LABEL: splat_allones_with_use_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vadd.vi v8, v8, -1 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v8, v8, -1 -; LMULMAX1-RV64-NEXT: vadd.vi v9, v9, -1 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_allones_with_use_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = add <4 x i64> %a, store <4 x i64> %b, ptr %x @@ -796,171 +432,28 @@ define void @splat_allones_with_use_v4i64(ptr %x) { ; which exceeded maximum-expected size of 512. The scalable container type of ; nxv8i64 should have been used instead. define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) { -; LMULMAX8-RV32-LABEL: vadd_vx_v16i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: addi sp, sp, -16 -; LMULMAX8-RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: sw a2, 12(sp) -; LMULMAX8-RV32-NEXT: sw a1, 8(sp) -; LMULMAX8-RV32-NEXT: addi a0, sp, 8 -; LMULMAX8-RV32-NEXT: vlse64.v v16, (a0), zero -; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v16 -; LMULMAX8-RV32-NEXT: vse64.v v8, (a3) -; LMULMAX8-RV32-NEXT: addi sp, sp, 16 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: vadd_vx_v16i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi a4, a0, 64 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a4) -; LMULMAX2-RV32-NEXT: addi a4, a0, 96 -; LMULMAX2-RV32-NEXT: vle64.v v10, (a4) -; LMULMAX2-RV32-NEXT: vle64.v v12, (a0) -; LMULMAX2-RV32-NEXT: addi a0, a0, 32 -; LMULMAX2-RV32-NEXT: vle64.v v14, (a0) -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: li a0, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a0 -; LMULMAX2-RV32-NEXT: vmv.v.x v16, a2 -; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vadd.vv v14, v14, v16 -; LMULMAX2-RV32-NEXT: vadd.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v16 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v16 -; LMULMAX2-RV32-NEXT: addi a0, a3, 64 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi a0, a3, 96 -; LMULMAX2-RV32-NEXT: vse64.v v10, (a0) -; LMULMAX2-RV32-NEXT: vse64.v v12, (a3) -; LMULMAX2-RV32-NEXT: addi a0, a3, 32 -; LMULMAX2-RV32-NEXT: vse64.v v14, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: vadd_vx_v16i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a4, a0, 96 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a4) -; LMULMAX1-RV32-NEXT: addi a4, a0, 112 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a4) -; LMULMAX1-RV32-NEXT: addi a4, a0, 64 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a4) -; LMULMAX1-RV32-NEXT: addi a4, a0, 80 -; LMULMAX1-RV32-NEXT: vle64.v v11, (a4) -; LMULMAX1-RV32-NEXT: addi a4, a0, 32 -; LMULMAX1-RV32-NEXT: vle64.v v12, (a4) -; LMULMAX1-RV32-NEXT: addi a4, a0, 48 -; LMULMAX1-RV32-NEXT: vle64.v v13, (a4) -; LMULMAX1-RV32-NEXT: vle64.v v14, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v15, (a0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 -; LMULMAX1-RV32-NEXT: vmv.v.x v16, a2 -; LMULMAX1-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v15, v15, v16 -; LMULMAX1-RV32-NEXT: vadd.vv v14, v14, v16 -; LMULMAX1-RV32-NEXT: vadd.vv v13, v13, v16 -; LMULMAX1-RV32-NEXT: vadd.vv v12, v12, v16 -; LMULMAX1-RV32-NEXT: vadd.vv v11, v11, v16 -; LMULMAX1-RV32-NEXT: vadd.vv v10, v10, v16 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v16 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v16 -; LMULMAX1-RV32-NEXT: addi a0, a3, 96 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a3, 112 -; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a3, 64 -; LMULMAX1-RV32-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a3, 80 -; LMULMAX1-RV32-NEXT: vse64.v v11, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a3, 32 -; LMULMAX1-RV32-NEXT: vse64.v v12, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a3, 48 -; LMULMAX1-RV32-NEXT: vse64.v v13, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v14, (a3) -; LMULMAX1-RV32-NEXT: addi a3, a3, 16 -; LMULMAX1-RV32-NEXT: vse64.v v15, (a3) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vadd_vx_v16i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vadd.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a2) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: vadd_vx_v16i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: addi a3, a0, 96 -; LMULMAX2-RV64-NEXT: vle64.v v8, (a3) -; LMULMAX2-RV64-NEXT: addi a3, a0, 32 -; LMULMAX2-RV64-NEXT: vle64.v v10, (a3) -; LMULMAX2-RV64-NEXT: addi a3, a0, 64 -; LMULMAX2-RV64-NEXT: vle64.v v12, (a3) -; LMULMAX2-RV64-NEXT: vle64.v v14, (a0) -; LMULMAX2-RV64-NEXT: vadd.vx v10, v10, a1 -; LMULMAX2-RV64-NEXT: vadd.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vadd.vx v12, v12, a1 -; LMULMAX2-RV64-NEXT: vadd.vx v14, v14, a1 -; LMULMAX2-RV64-NEXT: vse64.v v14, (a2) -; LMULMAX2-RV64-NEXT: addi a0, a2, 64 -; LMULMAX2-RV64-NEXT: vse64.v v12, (a0) -; LMULMAX2-RV64-NEXT: addi a0, a2, 96 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: addi a0, a2, 32 -; LMULMAX2-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vadd_vx_v16i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a3, a0, 96 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a3) -; LMULMAX1-RV64-NEXT: addi a3, a0, 112 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV64-NEXT: addi a3, a0, 64 -; LMULMAX1-RV64-NEXT: vle64.v v11, (a3) -; LMULMAX1-RV64-NEXT: addi a3, a0, 48 -; LMULMAX1-RV64-NEXT: vle64.v v12, (a3) -; LMULMAX1-RV64-NEXT: addi a3, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v13, (a3) -; LMULMAX1-RV64-NEXT: addi a3, a0, 80 -; LMULMAX1-RV64-NEXT: addi a0, a0, 32 -; LMULMAX1-RV64-NEXT: vle64.v v14, (a0) -; LMULMAX1-RV64-NEXT: vle64.v v15, (a3) -; LMULMAX1-RV64-NEXT: vadd.vx v13, v13, a1 -; LMULMAX1-RV64-NEXT: vadd.vx v12, v12, a1 -; LMULMAX1-RV64-NEXT: vadd.vx v14, v14, a1 -; LMULMAX1-RV64-NEXT: vadd.vx v15, v15, a1 -; LMULMAX1-RV64-NEXT: vadd.vx v11, v11, a1 -; LMULMAX1-RV64-NEXT: vadd.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vadd.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vadd.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a2) -; LMULMAX1-RV64-NEXT: addi a0, a2, 96 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a2, 112 -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a2, 64 -; LMULMAX1-RV64-NEXT: vse64.v v11, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a2, 80 -; LMULMAX1-RV64-NEXT: vse64.v v15, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a2, 32 -; LMULMAX1-RV64-NEXT: vse64.v v14, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a2, 48 -; LMULMAX1-RV64-NEXT: vse64.v v12, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a2, 16 -; LMULMAX1-RV64-NEXT: vse64.v v13, (a2) -; LMULMAX1-RV64-NEXT: ret +; RV32-LABEL: vadd_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vse64.v v8, (a3) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vadd.vx v8, v8, a1 +; RV64-NEXT: vse64.v v8, (a2) +; RV64-NEXT: ret %va = load <16 x i64>, ptr %a %head = insertelement <16 x i64> poison, i64 %b, i32 0 %splat = shufflevector <16 x i64> %head, <16 x i64> poison, <16 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll index 841e72f3afc353..2c0b1d09b52d93 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define void @gather_const_v16i8(ptr %x) { ; CHECK-LABEL: gather_const_v16i8: @@ -69,27 +67,14 @@ define void @gather_const_v2i64(ptr %x) { } define void @gather_const_v64i8(ptr %x) { -; LMULMAX4-LABEL: gather_const_v64i8: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: li a1, 64 -; LMULMAX4-NEXT: addi a2, a0, 32 -; LMULMAX4-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; LMULMAX4-NEXT: vlse8.v v8, (a2), zero -; LMULMAX4-NEXT: vse8.v v8, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX1-LABEL: gather_const_v64i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlse8.v v8, (a1), zero -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: addi a3, a0, 48 -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: vse8.v v8, (a3) -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: vse8.v v8, (a2) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: gather_const_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: addi a2, a0, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vlse8.v v8, (a2), zero +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = extractelement <64 x i8> %a, i32 32 %c = insertelement <64 x i8> poison, i8 %b, i32 0 @@ -99,28 +84,14 @@ define void @gather_const_v64i8(ptr %x) { } define void @gather_const_v16i16(ptr %x) { -; LMULMAX4-LABEL: gather_const_v16i16: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: li a1, 32 -; LMULMAX4-NEXT: addi a2, a0, 50 -; LMULMAX4-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; LMULMAX4-NEXT: vlse16.v v8, (a2), zero -; LMULMAX4-NEXT: vse16.v v8, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX1-LABEL: gather_const_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 50 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vlse16.v v8, (a1), zero -; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: addi a3, a0, 32 -; LMULMAX1-NEXT: vse16.v v8, (a3) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a2) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: gather_const_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: addi a2, a0, 50 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a2), zero +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i16>, ptr %x %b = extractelement <32 x i16> %a, i32 25 %c = insertelement <32 x i16> poison, i16 %b, i32 0 @@ -130,27 +101,13 @@ define void @gather_const_v16i16(ptr %x) { } define void @gather_const_v16i32(ptr %x) { -; LMULMAX4-LABEL: gather_const_v16i32: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi a1, a0, 36 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: vlse32.v v8, (a1), zero -; LMULMAX4-NEXT: vse32.v v8, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX1-LABEL: gather_const_v16i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 36 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vlse32.v v8, (a1), zero -; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: addi a3, a0, 48 -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: vse32.v v8, (a3) -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: vse32.v v8, (a2) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: gather_const_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 36 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vlse32.v v8, (a1), zero +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i32>, ptr %x %b = extractelement <16 x i32> %a, i32 9 %c = insertelement <16 x i32> poison, i32 %b, i32 0 @@ -160,27 +117,13 @@ define void @gather_const_v16i32(ptr %x) { } define void @gather_const_v8i64(ptr %x) { -; LMULMAX4-LABEL: gather_const_v8i64: -; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi a1, a0, 24 -; LMULMAX4-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; LMULMAX4-NEXT: vlse64.v v8, (a1), zero -; LMULMAX4-NEXT: vse64.v v8, (a0) -; LMULMAX4-NEXT: ret -; -; LMULMAX1-LABEL: gather_const_v8i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a1, a0, 24 -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vlse64.v v8, (a1), zero -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: addi a2, a0, 48 -; LMULMAX1-NEXT: addi a3, a0, 32 -; LMULMAX1-NEXT: vse64.v v8, (a3) -; LMULMAX1-NEXT: vse64.v v8, (a2) -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: vse64.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: gather_const_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 24 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vlse64.v v8, (a1), zero +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i64>, ptr %x %b = extractelement <8 x i64> %a, i32 3 %c = insertelement <8 x i64> poison, i64 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 7a4620a5382584..175b110538ffba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @add_v16i8(ptr %x, ptr %y) { ; CHECK-LABEL: add_v16i8: @@ -2377,45 +2375,15 @@ define void @umax_xv_v4i32(ptr %x, i32 %y) { } define void @add_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: add_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: add_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: add_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: add_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = add <32 x i8> %a, %b @@ -2424,44 +2392,14 @@ define void @add_v32i8(ptr %x, ptr %y) { } define void @add_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: add_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: add_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: add_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: add_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = add <16 x i16> %a, %b @@ -2470,44 +2408,14 @@ define void @add_v16i16(ptr %x, ptr %y) { } define void @add_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: add_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: add_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: add_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: add_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = add <8 x i32> %a, %b @@ -2516,48 +2424,16 @@ define void @add_v8i32(ptr %x, ptr %y) { } define void @add_v6i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: add_v6i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: add_v6i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV32-NEXT: addi a1, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: add_v6i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: add_v6i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i32>, ptr %x %b = load <6 x i32>, ptr %y %c = add <6 x i32> %a, %b @@ -2566,44 +2442,14 @@ define void @add_v6i32(ptr %x, ptr %y) { } define void @add_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: add_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: add_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: add_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: add_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = add <4 x i64> %a, %b @@ -2612,45 +2458,15 @@ define void @add_v4i64(ptr %x, ptr %y) { } define void @sub_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: sub_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: sub_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sub_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sub_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = sub <32 x i8> %a, %b @@ -2659,44 +2475,14 @@ define void @sub_v32i8(ptr %x, ptr %y) { } define void @sub_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: sub_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: sub_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sub_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sub_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = sub <16 x i16> %a, %b @@ -2705,44 +2491,14 @@ define void @sub_v16i16(ptr %x, ptr %y) { } define void @sub_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: sub_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: sub_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sub_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sub_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = sub <8 x i32> %a, %b @@ -2751,44 +2507,14 @@ define void @sub_v8i32(ptr %x, ptr %y) { } define void @sub_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: sub_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: sub_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sub_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sub_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = sub <4 x i64> %a, %b @@ -2797,45 +2523,15 @@ define void @sub_v4i64(ptr %x, ptr %y) { } define void @mul_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: mul_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mul_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mul_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: mul_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = mul <32 x i8> %a, %b @@ -2844,44 +2540,14 @@ define void @mul_v32i8(ptr %x, ptr %y) { } define void @mul_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: mul_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mul_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mul_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: mul_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = mul <16 x i16> %a, %b @@ -2890,44 +2556,14 @@ define void @mul_v16i16(ptr %x, ptr %y) { } define void @mul_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: mul_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mul_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mul_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: mul_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = mul <8 x i32> %a, %b @@ -2936,44 +2572,14 @@ define void @mul_v8i32(ptr %x, ptr %y) { } define void @mul_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: mul_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vmul.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mul_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mul_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: mul_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = mul <4 x i64> %a, %b @@ -2982,45 +2588,15 @@ define void @mul_v4i64(ptr %x, ptr %y) { } define void @and_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: and_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: and_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: and_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: and_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = and <32 x i8> %a, %b @@ -3029,44 +2605,14 @@ define void @and_v32i8(ptr %x, ptr %y) { } define void @and_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: and_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: and_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: and_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: and_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = and <16 x i16> %a, %b @@ -3075,44 +2621,14 @@ define void @and_v16i16(ptr %x, ptr %y) { } define void @and_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: and_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: and_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: and_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: and_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = and <8 x i32> %a, %b @@ -3121,44 +2637,14 @@ define void @and_v8i32(ptr %x, ptr %y) { } define void @and_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: and_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: and_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: and_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: and_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vand.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = and <4 x i64> %a, %b @@ -3167,45 +2653,15 @@ define void @and_v4i64(ptr %x, ptr %y) { } define void @or_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: or_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: or_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: or_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: or_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = or <32 x i8> %a, %b @@ -3214,44 +2670,14 @@ define void @or_v32i8(ptr %x, ptr %y) { } define void @or_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: or_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: or_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: or_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: or_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = or <16 x i16> %a, %b @@ -3260,44 +2686,14 @@ define void @or_v16i16(ptr %x, ptr %y) { } define void @or_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: or_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: or_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: or_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: or_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = or <8 x i32> %a, %b @@ -3306,44 +2702,14 @@ define void @or_v8i32(ptr %x, ptr %y) { } define void @or_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: or_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: or_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: or_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: or_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = or <4 x i64> %a, %b @@ -3352,45 +2718,15 @@ define void @or_v4i64(ptr %x, ptr %y) { } define void @xor_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: xor_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: xor_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: xor_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: xor_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = xor <32 x i8> %a, %b @@ -3399,44 +2735,14 @@ define void @xor_v32i8(ptr %x, ptr %y) { } define void @xor_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: xor_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: xor_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: xor_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: xor_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = xor <16 x i16> %a, %b @@ -3445,44 +2751,14 @@ define void @xor_v16i16(ptr %x, ptr %y) { } define void @xor_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: xor_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: xor_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: xor_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: xor_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = xor <8 x i32> %a, %b @@ -3491,44 +2767,14 @@ define void @xor_v8i32(ptr %x, ptr %y) { } define void @xor_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: xor_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vxor.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: xor_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: xor_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: xor_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vxor.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = xor <4 x i64> %a, %b @@ -3537,45 +2783,15 @@ define void @xor_v4i64(ptr %x, ptr %y) { } define void @lshr_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: lshr_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: lshr_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: lshr_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: lshr_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = lshr <32 x i8> %a, %b @@ -3584,44 +2800,14 @@ define void @lshr_v32i8(ptr %x, ptr %y) { } define void @lshr_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: lshr_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: lshr_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: lshr_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: lshr_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = lshr <16 x i16> %a, %b @@ -3630,44 +2816,14 @@ define void @lshr_v16i16(ptr %x, ptr %y) { } define void @lshr_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: lshr_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: lshr_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: lshr_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: lshr_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = lshr <8 x i32> %a, %b @@ -3676,44 +2832,14 @@ define void @lshr_v8i32(ptr %x, ptr %y) { } define void @lshr_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: lshr_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: lshr_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: lshr_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: lshr_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = lshr <4 x i64> %a, %b @@ -3722,45 +2848,15 @@ define void @lshr_v4i64(ptr %x, ptr %y) { } define void @ashr_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ashr_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vsra.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ashr_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ashr_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ashr_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = ashr <32 x i8> %a, %b @@ -3769,44 +2865,14 @@ define void @ashr_v32i8(ptr %x, ptr %y) { } define void @ashr_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ashr_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vsra.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ashr_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ashr_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ashr_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = ashr <16 x i16> %a, %b @@ -3815,44 +2881,14 @@ define void @ashr_v16i16(ptr %x, ptr %y) { } define void @ashr_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ashr_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vsra.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ashr_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ashr_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ashr_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = ashr <8 x i32> %a, %b @@ -3861,44 +2897,14 @@ define void @ashr_v8i32(ptr %x, ptr %y) { } define void @ashr_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: ashr_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vsra.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: ashr_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ashr_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ashr_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = ashr <4 x i64> %a, %b @@ -3907,45 +2913,15 @@ define void @ashr_v4i64(ptr %x, ptr %y) { } define void @shl_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: shl_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vsll.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: shl_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: shl_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: shl_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = shl <32 x i8> %a, %b @@ -3954,44 +2930,14 @@ define void @shl_v32i8(ptr %x, ptr %y) { } define void @shl_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: shl_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vsll.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: shl_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: shl_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: shl_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = shl <16 x i16> %a, %b @@ -4000,44 +2946,14 @@ define void @shl_v16i16(ptr %x, ptr %y) { } define void @shl_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: shl_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vsll.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: shl_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: shl_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: shl_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = shl <8 x i32> %a, %b @@ -4046,44 +2962,14 @@ define void @shl_v8i32(ptr %x, ptr %y) { } define void @shl_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: shl_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vsll.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: shl_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: shl_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: shl_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = shl <4 x i64> %a, %b @@ -4092,45 +2978,15 @@ define void @shl_v4i64(ptr %x, ptr %y) { } define void @sdiv_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: sdiv_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vdiv.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: sdiv_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sdiv_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sdiv_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vdiv.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = sdiv <32 x i8> %a, %b @@ -4139,44 +2995,14 @@ define void @sdiv_v32i8(ptr %x, ptr %y) { } define void @sdiv_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: sdiv_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vdiv.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: sdiv_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sdiv_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sdiv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vdiv.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = sdiv <16 x i16> %a, %b @@ -4185,44 +3011,14 @@ define void @sdiv_v16i16(ptr %x, ptr %y) { } define void @sdiv_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: sdiv_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vdiv.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: sdiv_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sdiv_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sdiv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vdiv.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = sdiv <8 x i32> %a, %b @@ -4231,44 +3027,14 @@ define void @sdiv_v8i32(ptr %x, ptr %y) { } define void @sdiv_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: sdiv_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vdiv.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: sdiv_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sdiv_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sdiv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vdiv.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = sdiv <4 x i64> %a, %b @@ -4277,45 +3043,15 @@ define void @sdiv_v4i64(ptr %x, ptr %y) { } define void @srem_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: srem_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vrem.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: srem_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: srem_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: srem_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vrem.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = srem <32 x i8> %a, %b @@ -4324,44 +3060,14 @@ define void @srem_v32i8(ptr %x, ptr %y) { } define void @srem_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: srem_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vrem.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: srem_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: srem_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: srem_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vrem.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = srem <16 x i16> %a, %b @@ -4370,44 +3076,14 @@ define void @srem_v16i16(ptr %x, ptr %y) { } define void @srem_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: srem_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vrem.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: srem_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: srem_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: srem_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vrem.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = srem <8 x i32> %a, %b @@ -4416,44 +3092,14 @@ define void @srem_v8i32(ptr %x, ptr %y) { } define void @srem_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: srem_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vrem.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: srem_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: srem_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: srem_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vrem.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = srem <4 x i64> %a, %b @@ -4462,45 +3108,15 @@ define void @srem_v4i64(ptr %x, ptr %y) { } define void @udiv_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: udiv_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vdivu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: udiv_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: udiv_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: udiv_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vdivu.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = udiv <32 x i8> %a, %b @@ -4509,44 +3125,14 @@ define void @udiv_v32i8(ptr %x, ptr %y) { } define void @udiv_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: udiv_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vdivu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: udiv_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: udiv_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: udiv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vdivu.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = udiv <16 x i16> %a, %b @@ -4555,44 +3141,14 @@ define void @udiv_v16i16(ptr %x, ptr %y) { } define void @udiv_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: udiv_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vdivu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: udiv_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: udiv_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: udiv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vdivu.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = udiv <8 x i32> %a, %b @@ -4601,44 +3157,14 @@ define void @udiv_v8i32(ptr %x, ptr %y) { } define void @udiv_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: udiv_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vdivu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: udiv_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: udiv_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: udiv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vdivu.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = udiv <4 x i64> %a, %b @@ -4647,45 +3173,15 @@ define void @udiv_v4i64(ptr %x, ptr %y) { } define void @urem_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: urem_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vremu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: urem_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: urem_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: urem_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vremu.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = urem <32 x i8> %a, %b @@ -4694,44 +3190,14 @@ define void @urem_v32i8(ptr %x, ptr %y) { } define void @urem_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: urem_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vremu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: urem_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: urem_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: urem_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vremu.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = urem <16 x i16> %a, %b @@ -4740,44 +3206,14 @@ define void @urem_v16i16(ptr %x, ptr %y) { } define void @urem_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: urem_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vremu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: urem_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: urem_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: urem_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vremu.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = urem <8 x i32> %a, %b @@ -4786,44 +3222,14 @@ define void @urem_v8i32(ptr %x, ptr %y) { } define void @urem_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: urem_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vremu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: urem_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: urem_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: urem_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vremu.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = urem <4 x i64> %a, %b @@ -4832,29 +3238,14 @@ define void @urem_v4i64(ptr %x, ptr %y) { } define void @extract_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v4i64: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-NEXT: vle64.v v8, (a0) -; LMULMAX1-NEXT: addi a2, a0, 16 -; LMULMAX1-NEXT: vle64.v v9, (a2) -; LMULMAX1-NEXT: vle64.v v10, (a1) -; LMULMAX1-NEXT: addi a1, a1, 16 -; LMULMAX1-NEXT: vle64.v v11, (a1) -; LMULMAX1-NEXT: vadd.vv v9, v9, v11 -; LMULMAX1-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: vse64.v v9, (a2) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y br label %"compute" @@ -4865,70 +3256,55 @@ define void @extract_v4i64(ptr %x, ptr %y) { } define void @mulhu_v32i8(ptr %x) { -; LMULMAX2-LABEL: mulhu_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vmv.v.i v10, 0 -; LMULMAX2-NEXT: lui a1, 163907 -; LMULMAX2-NEXT: addi a1, a1, -2044 -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a1 -; LMULMAX2-NEXT: li a1, -128 -; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX2-NEXT: vmerge.vxm v12, v10, a1, v0 -; LMULMAX2-NEXT: lui a1, 66049 -; LMULMAX2-NEXT: addi a1, a1, 32 -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a1 -; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX2-NEXT: lui a1, %hi(.LCPI181_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI181_0) -; LMULMAX2-NEXT: vle8.v v14, (a1) -; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-NEXT: vsrl.vv v10, v8, v10 -; LMULMAX2-NEXT: vmulhu.vv v10, v10, v14 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vmv.v.i v10, 4 -; LMULMAX2-NEXT: lui a1, 8208 -; LMULMAX2-NEXT: addi a1, a1, 513 -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a1 -; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-NEXT: lui a1, 66785 -; LMULMAX2-NEXT: addi a1, a1, 78 -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a1 -; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX2-NEXT: vmerge.vim v10, v10, 3, v0 -; LMULMAX2-NEXT: lui a1, 529160 -; LMULMAX2-NEXT: addi a1, a1, 304 -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a1 -; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX2-NEXT: vmerge.vim v10, v10, 2, v0 -; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: mulhu_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle8.v v8, (a1) -; LMULMAX1-NEXT: lui a2, %hi(.LCPI181_0) -; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX1-NEXT: vle8.v v9, (a2) -; LMULMAX1-NEXT: vle8.v v10, (a0) -; LMULMAX1-NEXT: vdivu.vv v8, v8, v9 -; LMULMAX1-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-NEXT: vse8.v v9, (a0) -; LMULMAX1-NEXT: vse8.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: mulhu_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: lui a1, 163907 +; CHECK-NEXT: addi a1, a1, -2044 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: li a1, -128 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v12, v10, a1, v0 +; CHECK-NEXT: lui a1, 66049 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: lui a1, %hi(.LCPI181_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI181_0) +; CHECK-NEXT: vle8.v v14, (a1) +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vmulhu.vv v10, v10, v14 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vmulhu.vv v8, v8, v12 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: lui a1, 8208 +; CHECK-NEXT: addi a1, a1, 513 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: lui a1, 66785 +; CHECK-NEXT: addi a1, a1, 78 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v10, v10, 3, v0 +; CHECK-NEXT: lui a1, 529160 +; CHECK-NEXT: addi a1, a1, 304 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = udiv <32 x i8> %a, store <32 x i8> %b, ptr %x @@ -4936,93 +3312,78 @@ define void @mulhu_v32i8(ptr %x) { } define void @mulhu_v16i16(ptr %x) { -; LMULMAX2-RV32-LABEL: mulhu_v16i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX2-RV32-NEXT: lui a1, 1048568 -; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v8, a1, v0 -; LMULMAX2-RV32-NEXT: lui a1, 4 -; LMULMAX2-RV32-NEXT: addi a1, a1, 64 -; LMULMAX2-RV32-NEXT: vmv.s.x v8, a1 -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV32-NEXT: vle16.v v14, (a1) -; LMULMAX2-RV32-NEXT: vsext.vf2 v16, v9 -; LMULMAX2-RV32-NEXT: vsrl.vv v16, v10, v16 -; LMULMAX2-RV32-NEXT: vmulhu.vv v14, v16, v14 -; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v14 -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v14 -; LMULMAX2-RV32-NEXT: lui a1, 2 -; LMULMAX2-RV32-NEXT: addi a1, a1, 289 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v9, 3 -; LMULMAX2-RV32-NEXT: vmerge.vim v9, v9, 2, v0 -; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v8, v9, 1, v0 -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsext.vf2 v12, v8 -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v10, v12 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: mulhu_v16i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV64-NEXT: lui a1, 1048568 -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0 -; LMULMAX2-RV64-NEXT: li a1, 1 -; LMULMAX2-RV64-NEXT: slli a1, a1, 48 -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV64-NEXT: vle16.v v14, (a1) -; LMULMAX2-RV64-NEXT: vsext.vf2 v16, v12 -; LMULMAX2-RV64-NEXT: vsrl.vv v12, v8, v16 -; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v12, v14 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_1) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_1) -; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV64-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vsext.vf2 v12, v10 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; RV32-LABEL: mulhu_v16i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vle16.v v10, (a0) +; RV32-NEXT: li a1, 257 +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: lui a1, 1048568 +; RV32-NEXT: vmerge.vxm v12, v8, a1, v0 +; RV32-NEXT: lui a1, 4 +; RV32-NEXT: addi a1, a1, 64 +; RV32-NEXT: vmv.s.x v8, a1 +; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI182_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) +; RV32-NEXT: vle16.v v14, (a1) +; RV32-NEXT: vsext.vf2 v16, v9 +; RV32-NEXT: vsrl.vv v16, v10, v16 +; RV32-NEXT: vmulhu.vv v14, v16, v14 +; RV32-NEXT: vsub.vv v10, v10, v14 +; RV32-NEXT: vmulhu.vv v10, v10, v12 +; RV32-NEXT: vadd.vv v10, v10, v14 +; RV32-NEXT: lui a1, 2 +; RV32-NEXT: addi a1, a1, 289 +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 3 +; RV32-NEXT: vmerge.vim v9, v9, 2, v0 +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmerge.vim v8, v9, 1, v0 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsext.vf2 v12, v8 +; RV32-NEXT: vsrl.vv v8, v10, v12 +; RV32-NEXT: vse16.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-LABEL: mulhu_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v8, (a1) -; LMULMAX1-NEXT: lui a2, %hi(.LCPI182_0) -; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI182_0) -; LMULMAX1-NEXT: vle16.v v9, (a2) -; LMULMAX1-NEXT: vle16.v v10, (a0) -; LMULMAX1-NEXT: vdivu.vv v8, v8, v9 -; LMULMAX1-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-NEXT: vse16.v v9, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a1) -; LMULMAX1-NEXT: ret +; RV64-LABEL: mulhu_v16i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: li a1, 257 +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: lui a1, 1048568 +; RV64-NEXT: vmerge.vxm v10, v10, a1, v0 +; RV64-NEXT: li a1, 1 +; RV64-NEXT: slli a1, a1, 48 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: lui a1, %hi(.LCPI182_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) +; RV64-NEXT: vle16.v v14, (a1) +; RV64-NEXT: vsext.vf2 v16, v12 +; RV64-NEXT: vsrl.vv v12, v8, v16 +; RV64-NEXT: vmulhu.vv v12, v12, v14 +; RV64-NEXT: vsub.vv v8, v8, v12 +; RV64-NEXT: vmulhu.vv v8, v8, v10 +; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a1, %hi(.LCPI182_1) +; RV64-NEXT: addi a1, a1, %lo(.LCPI182_1) +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vlse64.v v10, (a1), zero +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vsext.vf2 v12, v10 +; RV64-NEXT: vsrl.vv v8, v8, v12 +; RV64-NEXT: vse16.v v8, (a0) +; RV64-NEXT: ret %a = load <16 x i16>, ptr %x %b = udiv <16 x i16> %a, store <16 x i16> %b, ptr %x @@ -5030,80 +3391,31 @@ define void @mulhu_v16i16(ptr %x) { } define void @mulhu_v8i32(ptr %x) { -; LMULMAX2-LABEL: mulhu_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: li a1, 68 -; LMULMAX2-NEXT: vmv.s.x v0, a1 -; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vmv.v.i v12, 0 -; LMULMAX2-NEXT: lui a1, 524288 -; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: lui a1, 4128 -; LMULMAX2-NEXT: addi a1, a1, 514 -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vmv.v.x v10, a1 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vsext.vf4 v12, v10 -; LMULMAX2-NEXT: vsrl.vv v8, v8, v12 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mulhu_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, 524288 -; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; LMULMAX1-RV32-NEXT: vslideup.vi v11, v10, 2 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI183_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI183_0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV32-NEXT: vmulhu.vv v12, v9, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: lui a2, 4128 -; LMULMAX1-RV32-NEXT: addi a2, a2, 514 -; LMULMAX1-RV32-NEXT: vmv.s.x v12, a2 -; LMULMAX1-RV32-NEXT: vsext.vf4 v13, v12 -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: mulhu_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV64-NEXT: lui a2, 36976 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1541 -; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV64-NEXT: vsext.vf4 v11, v10 -; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: mulhu_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: li a1, 68 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: lui a1, %hi(.LCPI183_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI183_0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmerge.vxm v12, v12, a1, v0 +; CHECK-NEXT: vmulhu.vv v10, v8, v10 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vmulhu.vv v8, v8, v12 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: lui a1, 4128 +; CHECK-NEXT: addi a1, a1, 514 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = udiv <8 x i32> %a, store <8 x i32> %b, ptr %x @@ -5111,131 +3423,61 @@ define void @mulhu_v8i32(ptr %x) { } define void @mulhu_v4i64(ptr %x) { -; LMULMAX2-RV32-LABEL: mulhu_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI184_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI184_0) -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v10, (a1) -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 524288 -; LMULMAX2-RV32-NEXT: vmv.s.x v12, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; LMULMAX2-RV32-NEXT: vslideup.vi v14, v12, 5 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI184_1) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI184_1) -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle8.v v10, (a1) -; LMULMAX2-RV32-NEXT: vsext.vf4 v12, v10 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: mulhu_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, -1 -; LMULMAX2-RV64-NEXT: slli a1, a1, 63 -; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; LMULMAX2-RV64-NEXT: vslideup.vi v12, v10, 2 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 12320 -; LMULMAX2-RV64-NEXT: addi a1, a1, 513 -; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 -; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mulhu_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, 144 -; LMULMAX1-RV32-NEXT: addi a2, a2, 7 -; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: lui a2, 80 -; LMULMAX1-RV32-NEXT: addi a2, a2, 3 -; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: mulhu_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: lui a1, %hi(.LCPI184_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI184_0) +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vle32.v v10, (a1) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmulhu.vv v10, v8, v10 +; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: vmv.s.x v12, a1 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.v.i v14, 0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV32-NEXT: vslideup.vi v14, v12, 5 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmulhu.vv v8, v8, v14 +; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: lui a1, %hi(.LCPI184_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI184_1) +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vle8.v v10, (a1) +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vsrl.vv v8, v8, v12 +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: mulhu_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0 -; LMULMAX1-RV64-NEXT: li a2, -1 -; LMULMAX1-RV64-NEXT: slli a2, a2, 63 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI184_0) -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_1) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI184_1)(a2) -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vid.v v10 -; LMULMAX1-RV64-NEXT: vadd.vi v11, v10, 2 -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: lui a2, 838861 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a3, a2, 32 -; LMULMAX1-RV64-NEXT: add a2, a2, a3 -; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 -; LMULMAX1-RV64-NEXT: lui a2, 699051 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a3, a2, 32 -; LMULMAX1-RV64-NEXT: add a2, a2, a3 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vadd.vi v10, v10, 1 -; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: mulhu_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmv.s.x v10, a1 +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; RV64-NEXT: vslideup.vi v12, v10, 2 +; RV64-NEXT: lui a1, %hi(.LCPI184_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v10, (a1) +; RV64-NEXT: vmulhu.vv v10, v8, v10 +; RV64-NEXT: vsub.vv v8, v8, v10 +; RV64-NEXT: vmulhu.vv v8, v8, v12 +; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a1, 12320 +; RV64-NEXT: addi a1, a1, 513 +; RV64-NEXT: vmv.s.x v10, a1 +; RV64-NEXT: vsext.vf8 v12, v10 +; RV64-NEXT: vsrl.vv v8, v8, v12 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret %a = load <4 x i64>, ptr %x %b = udiv <4 x i64> %a, store <4 x i64> %b, ptr %x @@ -5243,44 +3485,26 @@ define void @mulhu_v4i64(ptr %x) { } define void @mulhs_v32i8(ptr %x) { -; LMULMAX2-LABEL: mulhs_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vmv.v.i v10, 7 -; LMULMAX2-NEXT: lui a1, 304453 -; LMULMAX2-NEXT: addi a1, a1, -1452 -; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; LMULMAX2-NEXT: vmv.s.x v0, a1 -; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-NEXT: li a1, -123 -; LMULMAX2-NEXT: vmv.v.x v12, a1 -; LMULMAX2-NEXT: li a1, 57 -; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: mulhs_v32i8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX1-NEXT: vle8.v v8, (a0) -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle8.v v9, (a1) -; LMULMAX1-NEXT: lui a2, 5 -; LMULMAX1-NEXT: addi a2, a2, -1452 -; LMULMAX1-NEXT: vmv.s.x v0, a2 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; LMULMAX1-NEXT: vmv.v.i v10, -9 -; LMULMAX1-NEXT: vmerge.vim v10, v10, 9, v0 -; LMULMAX1-NEXT: vdivu.vv v9, v9, v10 -; LMULMAX1-NEXT: vdivu.vv v8, v8, v10 -; LMULMAX1-NEXT: vse8.v v8, (a0) -; LMULMAX1-NEXT: vse8.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: mulhs_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, 7 +; CHECK-NEXT: lui a1, 304453 +; CHECK-NEXT: addi a1, a1, -1452 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: li a1, -123 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: li a1, 57 +; CHECK-NEXT: vmerge.vxm v12, v12, a1, v0 +; CHECK-NEXT: vmulhu.vv v8, v8, v12 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = udiv <32 x i8> %a, store <32 x i8> %b, ptr %x @@ -5288,41 +3512,25 @@ define void @mulhs_v32i8(ptr %x) { } define void @mulhs_v16i16(ptr %x) { -; LMULMAX2-LABEL: mulhs_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: lui a1, 5 -; LMULMAX2-NEXT: addi a1, a1, -1755 -; LMULMAX2-NEXT: vmv.v.x v10, a1 -; LMULMAX2-NEXT: lui a1, 7 -; LMULMAX2-NEXT: addi a1, a1, -1687 -; LMULMAX2-NEXT: vmv.s.x v0, a1 -; LMULMAX2-NEXT: lui a1, 1048571 -; LMULMAX2-NEXT: addi a1, a1, 1755 -; LMULMAX2-NEXT: vmerge.vxm v10, v10, a1, v0 -; LMULMAX2-NEXT: vmulh.vv v8, v8, v10 -; LMULMAX2-NEXT: vsra.vi v8, v8, 1 -; LMULMAX2-NEXT: vsrl.vi v10, v8, 15 -; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: mulhs_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vle16.v v8, (a0) -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle16.v v9, (a1) -; LMULMAX1-NEXT: li a2, 105 -; LMULMAX1-NEXT: vmv.s.x v0, a2 -; LMULMAX1-NEXT: vmv.v.i v10, 7 -; LMULMAX1-NEXT: vmerge.vim v10, v10, -7, v0 -; LMULMAX1-NEXT: vdiv.vv v9, v9, v10 -; LMULMAX1-NEXT: vdiv.vv v8, v8, v10 -; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: vse16.v v9, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: mulhs_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addi a1, a1, -1755 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lui a1, 7 +; CHECK-NEXT: addi a1, a1, -1687 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: lui a1, 1048571 +; CHECK-NEXT: addi a1, a1, 1755 +; CHECK-NEXT: vmerge.vxm v10, v10, a1, v0 +; CHECK-NEXT: vmulh.vv v8, v8, v10 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsrl.vi v10, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = sdiv <16 x i16> %a, store <16 x i16> %b, ptr %x @@ -5330,83 +3538,40 @@ define void @mulhs_v16i16(ptr %x) { } define void @mulhs_v8i32(ptr %x) { -; LMULMAX2-RV32-LABEL: mulhs_v8i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: lui a1, 419430 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1639 -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: li a1, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: lui a1, 629146 -; LMULMAX2-RV32-NEXT: addi a1, a1, -1639 -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 -; LMULMAX2-RV32-NEXT: vmulh.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 31 -; LMULMAX2-RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: mulhs_v8i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI187_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI187_0) -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmulh.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsra.vi v8, v8, 1 -; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 31 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mulhs_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, 419430 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1639 -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 -; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 -; LMULMAX1-RV32-NEXT: lui a2, 629146 -; LMULMAX1-RV32-NEXT: addi a2, a2, -1639 -; LMULMAX1-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX1-RV32-NEXT: vmulh.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 31 -; LMULMAX1-RV32-NEXT: vsra.vi v9, v9, 1 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vmulh.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 31 -; LMULMAX1-RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: mulhs_v8i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: lui a1, 419430 +; RV32-NEXT: addi a1, a1, 1639 +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: li a1, 85 +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: lui a1, 629146 +; RV32-NEXT: addi a1, a1, -1639 +; RV32-NEXT: vmerge.vxm v10, v10, a1, v0 +; RV32-NEXT: vmulh.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v10, v8, 31 +; RV32-NEXT: vsra.vi v8, v8, 1 +; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: mulhs_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV64-NEXT: li a2, 3 -; LMULMAX1-RV64-NEXT: slli a2, a2, 33 -; LMULMAX1-RV64-NEXT: addi a2, a2, -5 -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.x v10, a2 -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vdiv.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: mulhs_v8i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: lui a1, %hi(.LCPI187_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI187_0) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vlse64.v v10, (a1), zero +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vmulh.vv v8, v8, v10 +; RV64-NEXT: vsra.vi v8, v8, 1 +; RV64-NEXT: vsrl.vi v10, v8, 31 +; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: ret %a = load <8 x i32>, ptr %x %b = sdiv <8 x i32> %a, store <8 x i32> %b, ptr %x @@ -5414,122 +3579,71 @@ define void @mulhs_v8i32(ptr %x) { } define void @mulhs_v4i64(ptr %x) { -; LMULMAX2-RV32-LABEL: mulhs_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a2, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 -; LMULMAX2-RV32-NEXT: li a2, 17 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1366 -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmulh.vv v10, v8, v10 -; LMULMAX2-RV32-NEXT: lui a1, 1048560 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsext.vf4 v14, v12 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmadd.vv v14, v8, v10 -; LMULMAX2-RV32-NEXT: li a1, 63 -; LMULMAX2-RV32-NEXT: vsrl.vx v8, v14, a1 -; LMULMAX2-RV32-NEXT: lui a1, 16 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsext.vf4 v12, v10 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsra.vv v10, v14, v12 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: mulhs_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, 349525 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a2, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a2 -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI188_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI188_0)(a1) -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.i v0, 5 -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 1044496 -; LMULMAX2-RV64-NEXT: addi a1, a1, -256 -; LMULMAX2-RV64-NEXT: vmv.s.x v12, a1 -; LMULMAX2-RV64-NEXT: vsext.vf8 v14, v12 -; LMULMAX2-RV64-NEXT: vmadd.vv v14, v8, v10 -; LMULMAX2-RV64-NEXT: li a1, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v14, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4096 -; LMULMAX2-RV64-NEXT: addi a1, a1, 256 -; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 -; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10 -; LMULMAX2-RV64-NEXT: vsra.vv v10, v14, v12 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV32-LABEL: mulhs_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, 1048528 -; LMULMAX1-RV32-NEXT: addi a2, a2, 3 -; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: mulhs_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a2, a1, 1365 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a2 +; RV32-NEXT: li a2, 17 +; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: addi a1, a1, 1366 +; RV32-NEXT: vmerge.vxm v10, v10, a1, v0 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmulh.vv v10, v8, v10 +; RV32-NEXT: lui a1, 1048560 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsext.vf4 v14, v12 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmadd.vv v14, v8, v10 +; RV32-NEXT: li a1, 63 +; RV32-NEXT: vsrl.vx v8, v14, a1 +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vsra.vv v10, v14, v12 +; RV32-NEXT: vadd.vv v8, v10, v8 +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: mulhs_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: lui a2, 349525 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a2, 32 -; LMULMAX1-RV64-NEXT: add a2, a2, a3 -; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI188_0) -; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI188_0)(a3) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: vmv.v.x v10, a2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v10, a3 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulh.vv v11, v9, v10 -; LMULMAX1-RV64-NEXT: vid.v v12 -; LMULMAX1-RV64-NEXT: vrsub.vi v13, v12, 0 -; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v9 -; LMULMAX1-RV64-NEXT: li a2, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v11, a2 -; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v11, v9 -; LMULMAX1-RV64-NEXT: vmulh.vv v10, v8, v10 -; LMULMAX1-RV64-NEXT: vmacc.vv v10, v8, v13 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v10, a2 -; LMULMAX1-RV64-NEXT: vsra.vv v10, v10, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: mulhs_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: lui a1, %hi(.LCPI188_0) +; RV64-NEXT: ld a1, %lo(.LCPI188_0)(a1) +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 5 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmerge.vxm v10, v10, a1, v0 +; RV64-NEXT: vmulh.vv v10, v8, v10 +; RV64-NEXT: lui a1, 1044496 +; RV64-NEXT: addi a1, a1, -256 +; RV64-NEXT: vmv.s.x v12, a1 +; RV64-NEXT: vsext.vf8 v14, v12 +; RV64-NEXT: vmadd.vv v14, v8, v10 +; RV64-NEXT: li a1, 63 +; RV64-NEXT: vsrl.vx v8, v14, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: addi a1, a1, 256 +; RV64-NEXT: vmv.s.x v10, a1 +; RV64-NEXT: vsext.vf8 v12, v10 +; RV64-NEXT: vsra.vv v10, v14, v12 +; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: ret %a = load <4 x i64>, ptr %x %b = sdiv <4 x i64> %a, store <4 x i64> %b, ptr %x @@ -5537,45 +3651,15 @@ define void @mulhs_v4i64(ptr %x) { } define void @smin_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: smin_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vmin.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: smin_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: smin_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: smin_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %cc = icmp slt <32 x i8> %a, %b @@ -5585,44 +3669,14 @@ define void @smin_v32i8(ptr %x, ptr %y) { } define void @smin_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: smin_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vmin.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: smin_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: smin_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: smin_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %cc = icmp slt <16 x i16> %a, %b @@ -5632,44 +3686,14 @@ define void @smin_v16i16(ptr %x, ptr %y) { } define void @smin_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: smin_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vmin.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: smin_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: smin_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: smin_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %cc = icmp slt <8 x i32> %a, %b @@ -5679,44 +3703,14 @@ define void @smin_v8i32(ptr %x, ptr %y) { } define void @smin_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: smin_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vmin.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: smin_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: smin_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: smin_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %cc = icmp slt <4 x i64> %a, %b @@ -5726,45 +3720,15 @@ define void @smin_v4i64(ptr %x, ptr %y) { } define void @smax_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: smax_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vmax.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: smax_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: smax_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: smax_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %cc = icmp sgt <32 x i8> %a, %b @@ -5774,44 +3738,14 @@ define void @smax_v32i8(ptr %x, ptr %y) { } define void @smax_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: smax_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vmax.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: smax_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: smax_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: smax_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %cc = icmp sgt <16 x i16> %a, %b @@ -5821,44 +3755,14 @@ define void @smax_v16i16(ptr %x, ptr %y) { } define void @smax_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: smax_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vmax.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: smax_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: smax_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: smax_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %cc = icmp sgt <8 x i32> %a, %b @@ -5868,44 +3772,14 @@ define void @smax_v8i32(ptr %x, ptr %y) { } define void @smax_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: smax_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vmax.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: smax_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: smax_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: smax_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %cc = icmp sgt <4 x i64> %a, %b @@ -5915,45 +3789,15 @@ define void @smax_v4i64(ptr %x, ptr %y) { } define void @umin_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: umin_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vminu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: umin_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: umin_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: umin_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %cc = icmp ult <32 x i8> %a, %b @@ -5963,44 +3807,14 @@ define void @umin_v32i8(ptr %x, ptr %y) { } define void @umin_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: umin_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vminu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: umin_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: umin_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: umin_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %cc = icmp ult <16 x i16> %a, %b @@ -6010,44 +3824,14 @@ define void @umin_v16i16(ptr %x, ptr %y) { } define void @umin_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: umin_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vminu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: umin_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: umin_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: umin_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %cc = icmp ult <8 x i32> %a, %b @@ -6057,44 +3841,14 @@ define void @umin_v8i32(ptr %x, ptr %y) { } define void @umin_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: umin_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vminu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: umin_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: umin_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: umin_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %cc = icmp ult <4 x i64> %a, %b @@ -6104,45 +3858,15 @@ define void @umin_v4i64(ptr %x, ptr %y) { } define void @umax_v32i8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: umax_v32i8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vle8.v v8, (a0) -; LMULMAX2-NEXT: vle8.v v10, (a1) -; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse8.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: umax_v32i8: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: umax_v32i8: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: umax_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %cc = icmp ugt <32 x i8> %a, %b @@ -6152,44 +3876,14 @@ define void @umax_v32i8(ptr %x, ptr %y) { } define void @umax_v16i16(ptr %x, ptr %y) { -; LMULMAX2-LABEL: umax_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vle16.v v8, (a0) -; LMULMAX2-NEXT: vle16.v v10, (a1) -; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse16.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: umax_v16i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: umax_v16i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: umax_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %cc = icmp ugt <16 x i16> %a, %b @@ -6199,44 +3893,14 @@ define void @umax_v16i16(ptr %x, ptr %y) { } define void @umax_v8i32(ptr %x, ptr %y) { -; LMULMAX2-LABEL: umax_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: umax_v8i32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: umax_v8i32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: umax_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v10, (a1) +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %cc = icmp ugt <8 x i32> %a, %b @@ -6246,44 +3910,14 @@ define void @umax_v8i32(ptr %x, ptr %y) { } define void @umax_v4i64(ptr %x, ptr %y) { -; LMULMAX2-LABEL: umax_v4i64: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-NEXT: vle64.v v8, (a0) -; LMULMAX2-NEXT: vle64.v v10, (a1) -; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10 -; LMULMAX2-NEXT: vse64.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: umax_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a2, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV32-NEXT: addi a3, a1, 16 -; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) -; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: umax_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a2, a1, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) -; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: umax_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v10, (a1) +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %cc = icmp ugt <4 x i64> %a, %b diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index f86286a14b2aea..dd0fc5a11a0ed6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -1,12 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX1 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX4 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; Test with ELEN limited ; RUN: llc -mtriple=riscv32 -mattr=+f,+zve32f,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVE32F ; RUN: llc -mtriple=riscv64 -mattr=+f,+zve32f,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVE32F @@ -462,73 +456,13 @@ define <16 x i1> @buildvec_mask_v16i1_undefs() { } define <32 x i1> @buildvec_mask_v32i1() { -; RV32-LMULMAX1-LABEL: buildvec_mask_v32i1: -; RV32-LMULMAX1: # %bb.0: -; RV32-LMULMAX1-NEXT: li a0, 1776 -; RV32-LMULMAX1-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX1-NEXT: lui a0, 11 -; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX1-NEXT: ret -; -; RV64-LMULMAX1-LABEL: buildvec_mask_v32i1: -; RV64-LMULMAX1: # %bb.0: -; RV64-LMULMAX1-NEXT: li a0, 1776 -; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX1-NEXT: lui a0, 11 -; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX1-NEXT: ret -; -; RV32-LMULMAX2-LABEL: buildvec_mask_v32i1: -; RV32-LMULMAX2: # %bb.0: -; RV32-LMULMAX2-NEXT: lui a0, 748384 -; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX2-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX2-NEXT: ret -; -; RV64-LMULMAX2-LABEL: buildvec_mask_v32i1: -; RV64-LMULMAX2: # %bb.0: -; RV64-LMULMAX2-NEXT: lui a0, 748384 -; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX2-NEXT: ret -; -; RV32-LMULMAX4-LABEL: buildvec_mask_v32i1: -; RV32-LMULMAX4: # %bb.0: -; RV32-LMULMAX4-NEXT: lui a0, 748384 -; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX4-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX4-NEXT: ret -; -; RV64-LMULMAX4-LABEL: buildvec_mask_v32i1: -; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, 748384 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX4-NEXT: ret -; -; RV32-LMULMAX8-LABEL: buildvec_mask_v32i1: -; RV32-LMULMAX8: # %bb.0: -; RV32-LMULMAX8-NEXT: lui a0, 748384 -; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX8-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-LMULMAX8-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX8-NEXT: ret -; -; RV64-LMULMAX8-LABEL: buildvec_mask_v32i1: -; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, 748384 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX8-NEXT: ret +; CHECK-LABEL: buildvec_mask_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 748384 +; CHECK-NEXT: addi a0, a0, 1776 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v32i1: ; ZVE32F: # %bb.0: @@ -541,95 +475,25 @@ define <32 x i1> @buildvec_mask_v32i1() { } define <64 x i1> @buildvec_mask_v64i1() { -; RV32-LMULMAX1-LABEL: buildvec_mask_v64i1: -; RV32-LMULMAX1: # %bb.0: -; RV32-LMULMAX1-NEXT: li a0, 1776 -; RV32-LMULMAX1-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX1-NEXT: lui a0, 4 -; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV32-LMULMAX1-NEXT: lui a0, 11 -; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX1-NEXT: vmv.v.v v10, v8 -; RV32-LMULMAX1-NEXT: ret -; -; RV64-LMULMAX1-LABEL: buildvec_mask_v64i1: -; RV64-LMULMAX1: # %bb.0: -; RV64-LMULMAX1-NEXT: li a0, 1776 -; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX1-NEXT: lui a0, 4 -; RV64-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV64-LMULMAX1-NEXT: lui a0, 11 -; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX1-NEXT: vmv.v.v v10, v8 -; RV64-LMULMAX1-NEXT: ret -; -; RV32-LMULMAX2-LABEL: buildvec_mask_v64i1: -; RV32-LMULMAX2: # %bb.0: -; RV32-LMULMAX2-NEXT: lui a0, 748384 -; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX2-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX2-NEXT: lui a0, 748388 -; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX2-NEXT: ret -; -; RV64-LMULMAX2-LABEL: buildvec_mask_v64i1: -; RV64-LMULMAX2: # %bb.0: -; RV64-LMULMAX2-NEXT: lui a0, 748384 -; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX2-NEXT: lui a0, 748388 -; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX2-NEXT: ret -; -; RV32-LMULMAX4-LABEL: buildvec_mask_v64i1: -; RV32-LMULMAX4: # %bb.0: -; RV32-LMULMAX4-NEXT: lui a0, 748388 -; RV32-LMULMAX4-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX4-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-LMULMAX4-NEXT: vmv.v.x v0, a0 -; RV32-LMULMAX4-NEXT: lui a0, 748384 -; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX4-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX4-NEXT: ret -; -; RV64-LMULMAX4-LABEL: buildvec_mask_v64i1: -; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI19_0) -; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-LMULMAX4-NEXT: vle64.v v0, (a0) -; RV64-LMULMAX4-NEXT: ret -; -; RV32-LMULMAX8-LABEL: buildvec_mask_v64i1: -; RV32-LMULMAX8: # %bb.0: -; RV32-LMULMAX8-NEXT: lui a0, 748388 -; RV32-LMULMAX8-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX8-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-LMULMAX8-NEXT: vmv.v.x v0, a0 -; RV32-LMULMAX8-NEXT: lui a0, 748384 -; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX8-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; RV32-LMULMAX8-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX8-NEXT: ret -; -; RV64-LMULMAX8-LABEL: buildvec_mask_v64i1: -; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI19_0) -; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-LMULMAX8-NEXT: vle64.v v0, (a0) -; RV64-LMULMAX8-NEXT: ret +; RV32-LABEL: buildvec_mask_v64i1: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 748388 +; RV32-NEXT: addi a0, a0, -1793 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: lui a0, 748384 +; RV32-NEXT: addi a0, a0, 1776 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_mask_v64i1: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v64i1: ; ZVE32F: # %bb.0: @@ -646,134 +510,25 @@ define <64 x i1> @buildvec_mask_v64i1() { } define <128 x i1> @buildvec_mask_v128i1() { -; RV32-LMULMAX1-LABEL: buildvec_mask_v128i1: -; RV32-LMULMAX1: # %bb.0: -; RV32-LMULMAX1-NEXT: li a0, 1776 -; RV32-LMULMAX1-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX1-NEXT: lui a0, 11 -; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX1-NEXT: lui a0, 8 -; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v12, a0 -; RV32-LMULMAX1-NEXT: lui a0, 4 -; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV32-LMULMAX1-NEXT: lui a0, 14 -; RV32-LMULMAX1-NEXT: addi a0, a0, 1722 -; RV32-LMULMAX1-NEXT: vmv.s.x v14, a0 -; RV32-LMULMAX1-NEXT: vmv.v.v v10, v8 -; RV32-LMULMAX1-NEXT: vmv.v.v v11, v0 -; RV32-LMULMAX1-NEXT: vmv.v.v v13, v9 -; RV32-LMULMAX1-NEXT: ret -; -; RV64-LMULMAX1-LABEL: buildvec_mask_v128i1: -; RV64-LMULMAX1: # %bb.0: -; RV64-LMULMAX1-NEXT: li a0, 1776 -; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX1-NEXT: lui a0, 11 -; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX1-NEXT: lui a0, 8 -; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v12, a0 -; RV64-LMULMAX1-NEXT: lui a0, 4 -; RV64-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV64-LMULMAX1-NEXT: lui a0, 14 -; RV64-LMULMAX1-NEXT: addi a0, a0, 1722 -; RV64-LMULMAX1-NEXT: vmv.s.x v14, a0 -; RV64-LMULMAX1-NEXT: vmv.v.v v10, v8 -; RV64-LMULMAX1-NEXT: vmv.v.v v11, v0 -; RV64-LMULMAX1-NEXT: vmv.v.v v13, v9 -; RV64-LMULMAX1-NEXT: ret -; -; RV32-LMULMAX2-LABEL: buildvec_mask_v128i1: -; RV32-LMULMAX2: # %bb.0: -; RV32-LMULMAX2-NEXT: lui a0, 748384 -; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX2-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX2-NEXT: lui a0, 748388 -; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX2-NEXT: lui a0, 551776 -; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX2-NEXT: vmv.s.x v9, a0 -; RV32-LMULMAX2-NEXT: lui a0, 945060 -; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v10, a0 -; RV32-LMULMAX2-NEXT: ret -; -; RV64-LMULMAX2-LABEL: buildvec_mask_v128i1: -; RV64-LMULMAX2: # %bb.0: -; RV64-LMULMAX2-NEXT: lui a0, 748384 -; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX2-NEXT: lui a0, 748388 -; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX2-NEXT: lui a0, 551776 -; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX2-NEXT: vmv.s.x v9, a0 -; RV64-LMULMAX2-NEXT: lui a0, 945060 -; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v10, a0 -; RV64-LMULMAX2-NEXT: ret -; -; RV32-LMULMAX4-LABEL: buildvec_mask_v128i1: -; RV32-LMULMAX4: # %bb.0: -; RV32-LMULMAX4-NEXT: lui a0, 748388 -; RV32-LMULMAX4-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX4-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-LMULMAX4-NEXT: vmv.v.x v0, a0 -; RV32-LMULMAX4-NEXT: lui a0, 748384 -; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX4-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX4-NEXT: lui a0, 945060 -; RV32-LMULMAX4-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX4-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; RV32-LMULMAX4-NEXT: vmv.v.x v8, a0 -; RV32-LMULMAX4-NEXT: lui a0, 551776 -; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX4-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; RV32-LMULMAX4-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX4-NEXT: ret -; -; RV64-LMULMAX4-LABEL: buildvec_mask_v128i1: -; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI20_0) -; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-LMULMAX4-NEXT: vle64.v v0, (a0) -; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_1) -; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI20_1) -; RV64-LMULMAX4-NEXT: vle64.v v8, (a0) -; RV64-LMULMAX4-NEXT: ret -; -; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1: -; RV32-LMULMAX8: # %bb.0: -; RV32-LMULMAX8-NEXT: lui a0, %hi(.LCPI20_0) -; RV32-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI20_0) -; RV32-LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-LMULMAX8-NEXT: vle32.v v0, (a0) -; RV32-LMULMAX8-NEXT: ret -; -; RV64-LMULMAX8-LABEL: buildvec_mask_v128i1: -; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI20_0) -; RV64-LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-LMULMAX8-NEXT: vlse64.v v0, (a0), zero -; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI20_1) -; RV64-LMULMAX8-NEXT: ld a0, %lo(.LCPI20_1)(a0) -; RV64-LMULMAX8-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX8-NEXT: ret +; RV32-LABEL: buildvec_mask_v128i1: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI20_0) +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vle32.v v0, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_mask_v128i1: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI20_0) +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vlse64.v v0, (a0), zero +; RV64-NEXT: lui a0, %hi(.LCPI20_1) +; RV64-NEXT: ld a0, %lo(.LCPI20_1)(a0) +; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v128i1: ; ZVE32F: # %bb.0: @@ -786,124 +541,14 @@ define <128 x i1> @buildvec_mask_v128i1() { } define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { -; RV32-LMULMAX1-LABEL: buildvec_mask_optsize_v128i1: -; RV32-LMULMAX1: # %bb.0: -; RV32-LMULMAX1-NEXT: li a0, 1776 -; RV32-LMULMAX1-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX1-NEXT: lui a0, 11 -; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX1-NEXT: lui a0, 8 -; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v12, a0 -; RV32-LMULMAX1-NEXT: lui a0, 4 -; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV32-LMULMAX1-NEXT: lui a0, 14 -; RV32-LMULMAX1-NEXT: addi a0, a0, 1722 -; RV32-LMULMAX1-NEXT: vmv.s.x v14, a0 -; RV32-LMULMAX1-NEXT: vmv.v.v v10, v8 -; RV32-LMULMAX1-NEXT: vmv.v.v v11, v0 -; RV32-LMULMAX1-NEXT: vmv.v.v v13, v9 -; RV32-LMULMAX1-NEXT: ret -; -; RV64-LMULMAX1-LABEL: buildvec_mask_optsize_v128i1: -; RV64-LMULMAX1: # %bb.0: -; RV64-LMULMAX1-NEXT: li a0, 1776 -; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX1-NEXT: lui a0, 11 -; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX1-NEXT: lui a0, 8 -; RV64-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v12, a0 -; RV64-LMULMAX1-NEXT: lui a0, 4 -; RV64-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV64-LMULMAX1-NEXT: lui a0, 14 -; RV64-LMULMAX1-NEXT: addi a0, a0, 1722 -; RV64-LMULMAX1-NEXT: vmv.s.x v14, a0 -; RV64-LMULMAX1-NEXT: vmv.v.v v10, v8 -; RV64-LMULMAX1-NEXT: vmv.v.v v11, v0 -; RV64-LMULMAX1-NEXT: vmv.v.v v13, v9 -; RV64-LMULMAX1-NEXT: ret -; -; RV32-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1: -; RV32-LMULMAX2: # %bb.0: -; RV32-LMULMAX2-NEXT: lui a0, 748384 -; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX2-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX2-NEXT: lui a0, 748388 -; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX2-NEXT: lui a0, 551776 -; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX2-NEXT: vmv.s.x v9, a0 -; RV32-LMULMAX2-NEXT: lui a0, 945060 -; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v10, a0 -; RV32-LMULMAX2-NEXT: ret -; -; RV64-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1: -; RV64-LMULMAX2: # %bb.0: -; RV64-LMULMAX2-NEXT: lui a0, 748384 -; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX2-NEXT: lui a0, 748388 -; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX2-NEXT: lui a0, 551776 -; RV64-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX2-NEXT: vmv.s.x v9, a0 -; RV64-LMULMAX2-NEXT: lui a0, 945060 -; RV64-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v10, a0 -; RV64-LMULMAX2-NEXT: ret -; -; RV32-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: -; RV32-LMULMAX4: # %bb.0: -; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0) -; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_0) -; RV32-LMULMAX4-NEXT: li a1, 64 -; RV32-LMULMAX4-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-LMULMAX4-NEXT: vlm.v v0, (a0) -; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_1) -; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_1) -; RV32-LMULMAX4-NEXT: vlm.v v8, (a0) -; RV32-LMULMAX4-NEXT: ret -; -; RV64-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: -; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_0) -; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-LMULMAX4-NEXT: vle64.v v0, (a0) -; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_1) -; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_1) -; RV64-LMULMAX4-NEXT: vle64.v v8, (a0) -; RV64-LMULMAX4-NEXT: ret -; -; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: -; RV32-LMULMAX8: # %bb.0: -; RV32-LMULMAX8-NEXT: lui a0, %hi(.LCPI21_0) -; RV32-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI21_0) -; RV32-LMULMAX8-NEXT: li a1, 128 -; RV32-LMULMAX8-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-LMULMAX8-NEXT: vlm.v v0, (a0) -; RV32-LMULMAX8-NEXT: ret -; -; RV64-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: -; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI21_0) -; RV64-LMULMAX8-NEXT: li a1, 128 -; RV64-LMULMAX8-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-LMULMAX8-NEXT: vlm.v v0, (a0) -; RV64-LMULMAX8-NEXT: ret +; CHECK-LABEL: buildvec_mask_optsize_v128i1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0) +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_v128i1: ; ZVE32F: # %bb.0: @@ -915,6 +560,3 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; ZVE32F-NEXT: ret ret <128 x i1> } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-RV32: {{.*}} -; CHECK-RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll index 0d0d21d9b45ea9..b73408d023207b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define void @load_store_v1i1(ptr %x, ptr %y) { ; CHECK-LABEL: load_store_v1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index cb501c10d03908..4f7b885d998e5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @splat_ones_v1i1(ptr %x) { ; CHECK-LABEL: splat_ones_v1i1: @@ -163,37 +161,15 @@ define void @splat_zeros_v32i1(ptr %x) { } define void @splat_v32i1(ptr %x, i1 %y) { -; LMULMAX2-LABEL: splat_v32i1: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: andi a1, a1, 1 -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.x v8, a1 -; LMULMAX2-NEXT: vmsne.vi v10, v8, 0 -; LMULMAX2-NEXT: vsm.v v10, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_v32i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: andi a1, a1, 1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v8, a1 -; LMULMAX1-RV32-NEXT: vmsne.vi v8, v8, 0 -; LMULMAX1-RV32-NEXT: addi a1, a0, 2 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_v32i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: andi a1, a1, 1 -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1 -; LMULMAX1-RV64-NEXT: vmsne.vi v8, v8, 0 -; LMULMAX1-RV64-NEXT: addi a1, a0, 2 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV64-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmsne.vi v10, v8, 0 +; CHECK-NEXT: vsm.v v10, (a0) +; CHECK-NEXT: ret %a = insertelement <32 x i1> poison, i1 %y, i32 0 %b = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> zeroinitializer store <32 x i1> %b, ptr %x @@ -201,70 +177,33 @@ define void @splat_v32i1(ptr %x, i1 %y) { } define void @splat_ones_v64i1(ptr %x) { -; LMULMAX1-RV32-LABEL: splat_ones_v64i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmset.m v8 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 6 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV32-NEXT: addi a1, a0, 4 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV32-NEXT: addi a0, a0, 2 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret +; RV32-LABEL: splat_ones_v64i1: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 64 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV32-NEXT: vmset.m v8 +; RV32-NEXT: vsm.v v8, (a0) +; RV32-NEXT: ret ; -; LMULMAX1-RV64-LABEL: splat_ones_v64i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: li a1, -1 -; LMULMAX1-RV64-NEXT: sd a1, 0(a0) -; LMULMAX1-RV64-NEXT: ret +; RV64-LABEL: splat_ones_v64i1: +; RV64: # %bb.0: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: sd a1, 0(a0) +; RV64-NEXT: ret store <64 x i1> , ptr %x ret void } define void @splat_v64i1(ptr %x, i1 %y) { -; LMULMAX2-LABEL: splat_v64i1: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: andi a1, a1, 1 -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vmv.v.x v8, a1 -; LMULMAX2-NEXT: vmsne.vi v10, v8, 0 -; LMULMAX2-NEXT: addi a1, a0, 4 -; LMULMAX2-NEXT: vsm.v v10, (a1) -; LMULMAX2-NEXT: vsm.v v10, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_v64i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: andi a1, a1, 1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v8, a1 -; LMULMAX1-RV32-NEXT: vmsne.vi v8, v8, 0 -; LMULMAX1-RV32-NEXT: addi a1, a0, 6 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV32-NEXT: addi a1, a0, 4 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV32-NEXT: addi a1, a0, 2 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_v64i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: andi a1, a1, 1 -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmv.v.x v8, a1 -; LMULMAX1-RV64-NEXT: vmsne.vi v8, v8, 0 -; LMULMAX1-RV64-NEXT: addi a1, a0, 6 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV64-NEXT: addi a1, a0, 4 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV64-NEXT: addi a1, a0, 2 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV64-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmsne.vi v12, v8, 0 +; CHECK-NEXT: vsm.v v12, (a0) +; CHECK-NEXT: ret %a = insertelement <64 x i1> poison, i1 %y, i32 0 %b = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> zeroinitializer store <64 x i1> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll index 5574d12d2d5dd8..0161ac4bc338db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,RV32LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,RV64LMULMAX1 -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,RV32LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,RV64LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.experimental.stepvector.v2i8() @@ -103,18 +101,11 @@ define <8 x i16> @stepvector_v8i16() { declare <16 x i16> @llvm.experimental.stepvector.v16i16() define <16 x i16> @stepvector_v16i16() { -; LMULMAX1-LABEL: stepvector_v16i16: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-NEXT: vid.v v8 -; LMULMAX1-NEXT: vadd.vi v9, v8, 8 -; LMULMAX1-NEXT: ret -; -; LMULMAX2-LABEL: stepvector_v16i16: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-NEXT: vid.v v8 -; LMULMAX2-NEXT: ret +; CHECK-LABEL: stepvector_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: ret %v = call <16 x i16> @llvm.experimental.stepvector.v16i16() ret <16 x i16> %v } @@ -146,18 +137,11 @@ define <4 x i32> @stepvector_v4i32() { declare <8 x i32> @llvm.experimental.stepvector.v8i32() define <8 x i32> @stepvector_v8i32() { -; LMULMAX1-LABEL: stepvector_v8i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vid.v v8 -; LMULMAX1-NEXT: vadd.vi v9, v8, 4 -; LMULMAX1-NEXT: ret -; -; LMULMAX2-LABEL: stepvector_v8i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vid.v v8 -; LMULMAX2-NEXT: ret +; CHECK-LABEL: stepvector_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: ret %v = call <8 x i32> @llvm.experimental.stepvector.v8i32() ret <8 x i32> %v } @@ -165,21 +149,11 @@ define <8 x i32> @stepvector_v8i32() { declare <16 x i32> @llvm.experimental.stepvector.v16i32() define <16 x i32> @stepvector_v16i32() { -; LMULMAX1-LABEL: stepvector_v16i32: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vid.v v8 -; LMULMAX1-NEXT: vadd.vi v9, v8, 4 -; LMULMAX1-NEXT: vadd.vi v10, v8, 8 -; LMULMAX1-NEXT: vadd.vi v11, v8, 12 -; LMULMAX1-NEXT: ret -; -; LMULMAX2-LABEL: stepvector_v16i32: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vid.v v8 -; LMULMAX2-NEXT: vadd.vi v10, v8, 8 -; LMULMAX2-NEXT: ret +; CHECK-LABEL: stepvector_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: ret %v = call <16 x i32> @llvm.experimental.stepvector.v16i32() ret <16 x i32> %v } @@ -187,33 +161,19 @@ define <16 x i32> @stepvector_v16i32() { declare <2 x i64> @llvm.experimental.stepvector.v2i64() define <2 x i64> @stepvector_v2i64() { -; RV32LMULMAX1-LABEL: stepvector_v2i64: -; RV32LMULMAX1: # %bb.0: -; RV32LMULMAX1-NEXT: lui a0, 16 -; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v8, v9 -; RV32LMULMAX1-NEXT: ret -; -; RV64LMULMAX1-LABEL: stepvector_v2i64: -; RV64LMULMAX1: # %bb.0: -; RV64LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64LMULMAX1-NEXT: vid.v v8 -; RV64LMULMAX1-NEXT: ret +; RV32-LABEL: stepvector_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 16 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vsext.vf4 v8, v9 +; RV32-NEXT: ret ; -; RV32LMULMAX2-LABEL: stepvector_v2i64: -; RV32LMULMAX2: # %bb.0: -; RV32LMULMAX2-NEXT: lui a0, 16 -; RV32LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX2-NEXT: vmv.s.x v9, a0 -; RV32LMULMAX2-NEXT: vsext.vf4 v8, v9 -; RV32LMULMAX2-NEXT: ret -; -; RV64LMULMAX2-LABEL: stepvector_v2i64: -; RV64LMULMAX2: # %bb.0: -; RV64LMULMAX2-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64LMULMAX2-NEXT: vid.v v8 -; RV64LMULMAX2-NEXT: ret +; RV64-LABEL: stepvector_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vid.v v8 +; RV64-NEXT: ret %v = call <2 x i64> @llvm.experimental.stepvector.v2i64() ret <2 x i64> %v } @@ -221,39 +181,20 @@ define <2 x i64> @stepvector_v2i64() { declare <4 x i64> @llvm.experimental.stepvector.v4i64() define <4 x i64> @stepvector_v4i64() { -; RV32LMULMAX1-LABEL: stepvector_v4i64: -; RV32LMULMAX1: # %bb.0: -; RV32LMULMAX1-NEXT: lui a0, 16 -; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v8, v9 -; RV32LMULMAX1-NEXT: lui a0, 48 -; RV32LMULMAX1-NEXT: addi a0, a0, 2 -; RV32LMULMAX1-NEXT: vmv.s.x v10, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v9, v10 -; RV32LMULMAX1-NEXT: ret -; -; RV64LMULMAX1-LABEL: stepvector_v4i64: -; RV64LMULMAX1: # %bb.0: -; RV64LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64LMULMAX1-NEXT: vid.v v8 -; RV64LMULMAX1-NEXT: vadd.vi v9, v8, 2 -; RV64LMULMAX1-NEXT: ret +; RV32-LABEL: stepvector_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI14_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI14_0) +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vle8.v v10, (a0) +; RV32-NEXT: vsext.vf4 v8, v10 +; RV32-NEXT: ret ; -; RV32LMULMAX2-LABEL: stepvector_v4i64: -; RV32LMULMAX2: # %bb.0: -; RV32LMULMAX2-NEXT: lui a0, %hi(.LCPI14_0) -; RV32LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI14_0) -; RV32LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32LMULMAX2-NEXT: vle8.v v10, (a0) -; RV32LMULMAX2-NEXT: vsext.vf4 v8, v10 -; RV32LMULMAX2-NEXT: ret -; -; RV64LMULMAX2-LABEL: stepvector_v4i64: -; RV64LMULMAX2: # %bb.0: -; RV64LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64LMULMAX2-NEXT: vid.v v8 -; RV64LMULMAX2-NEXT: ret +; RV64-LABEL: stepvector_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vid.v v8 +; RV64-NEXT: ret %v = call <4 x i64> @llvm.experimental.stepvector.v4i64() ret <4 x i64> %v } @@ -261,54 +202,20 @@ define <4 x i64> @stepvector_v4i64() { declare <8 x i64> @llvm.experimental.stepvector.v8i64() define <8 x i64> @stepvector_v8i64() { -; RV32LMULMAX1-LABEL: stepvector_v8i64: -; RV32LMULMAX1: # %bb.0: -; RV32LMULMAX1-NEXT: lui a0, 16 -; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v8, v9 -; RV32LMULMAX1-NEXT: lui a0, 48 -; RV32LMULMAX1-NEXT: addi a0, a0, 2 -; RV32LMULMAX1-NEXT: vmv.s.x v10, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v9, v10 -; RV32LMULMAX1-NEXT: lui a0, 80 -; RV32LMULMAX1-NEXT: addi a0, a0, 4 -; RV32LMULMAX1-NEXT: vmv.s.x v11, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v10, v11 -; RV32LMULMAX1-NEXT: lui a0, 112 -; RV32LMULMAX1-NEXT: addi a0, a0, 6 -; RV32LMULMAX1-NEXT: vmv.s.x v12, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v11, v12 -; RV32LMULMAX1-NEXT: ret -; -; RV64LMULMAX1-LABEL: stepvector_v8i64: -; RV64LMULMAX1: # %bb.0: -; RV64LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64LMULMAX1-NEXT: vid.v v8 -; RV64LMULMAX1-NEXT: vadd.vi v9, v8, 2 -; RV64LMULMAX1-NEXT: vadd.vi v10, v8, 4 -; RV64LMULMAX1-NEXT: vadd.vi v11, v8, 6 -; RV64LMULMAX1-NEXT: ret -; -; RV32LMULMAX2-LABEL: stepvector_v8i64: -; RV32LMULMAX2: # %bb.0: -; RV32LMULMAX2-NEXT: lui a0, %hi(.LCPI15_0) -; RV32LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI15_0) -; RV32LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32LMULMAX2-NEXT: vle8.v v10, (a0) -; RV32LMULMAX2-NEXT: lui a0, %hi(.LCPI15_1) -; RV32LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI15_1) -; RV32LMULMAX2-NEXT: vle8.v v12, (a0) -; RV32LMULMAX2-NEXT: vsext.vf4 v8, v10 -; RV32LMULMAX2-NEXT: vsext.vf4 v10, v12 -; RV32LMULMAX2-NEXT: ret +; RV32-LABEL: stepvector_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI15_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0) +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vle8.v v12, (a0) +; RV32-NEXT: vsext.vf4 v8, v12 +; RV32-NEXT: ret ; -; RV64LMULMAX2-LABEL: stepvector_v8i64: -; RV64LMULMAX2: # %bb.0: -; RV64LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64LMULMAX2-NEXT: vid.v v8 -; RV64LMULMAX2-NEXT: vadd.vi v10, v8, 4 -; RV64LMULMAX2-NEXT: ret +; RV64-LABEL: stepvector_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vid.v v8 +; RV64-NEXT: ret %v = call <8 x i64> @llvm.experimental.stepvector.v8i64() ret <8 x i64> %v } @@ -316,84 +223,21 @@ define <8 x i64> @stepvector_v8i64() { declare <16 x i64> @llvm.experimental.stepvector.v16i64() define <16 x i64> @stepvector_v16i64() { -; RV32LMULMAX1-LABEL: stepvector_v16i64: -; RV32LMULMAX1: # %bb.0: -; RV32LMULMAX1-NEXT: lui a0, 16 -; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vmv.s.x v9, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v8, v9 -; RV32LMULMAX1-NEXT: lui a0, 48 -; RV32LMULMAX1-NEXT: addi a0, a0, 2 -; RV32LMULMAX1-NEXT: vmv.s.x v10, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v9, v10 -; RV32LMULMAX1-NEXT: lui a0, 80 -; RV32LMULMAX1-NEXT: addi a0, a0, 4 -; RV32LMULMAX1-NEXT: vmv.s.x v11, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v10, v11 -; RV32LMULMAX1-NEXT: lui a0, 112 -; RV32LMULMAX1-NEXT: addi a0, a0, 6 -; RV32LMULMAX1-NEXT: vmv.s.x v12, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v11, v12 -; RV32LMULMAX1-NEXT: lui a0, 144 -; RV32LMULMAX1-NEXT: addi a0, a0, 8 -; RV32LMULMAX1-NEXT: vmv.s.x v13, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v12, v13 -; RV32LMULMAX1-NEXT: lui a0, 176 -; RV32LMULMAX1-NEXT: addi a0, a0, 10 -; RV32LMULMAX1-NEXT: vmv.s.x v14, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v13, v14 -; RV32LMULMAX1-NEXT: lui a0, 208 -; RV32LMULMAX1-NEXT: addi a0, a0, 12 -; RV32LMULMAX1-NEXT: vmv.s.x v15, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v14, v15 -; RV32LMULMAX1-NEXT: lui a0, 240 -; RV32LMULMAX1-NEXT: addi a0, a0, 14 -; RV32LMULMAX1-NEXT: vmv.s.x v16, a0 -; RV32LMULMAX1-NEXT: vsext.vf4 v15, v16 -; RV32LMULMAX1-NEXT: ret -; -; RV64LMULMAX1-LABEL: stepvector_v16i64: -; RV64LMULMAX1: # %bb.0: -; RV64LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64LMULMAX1-NEXT: vid.v v8 -; RV64LMULMAX1-NEXT: vadd.vi v9, v8, 2 -; RV64LMULMAX1-NEXT: vadd.vi v10, v8, 4 -; RV64LMULMAX1-NEXT: vadd.vi v11, v8, 6 -; RV64LMULMAX1-NEXT: vadd.vi v12, v8, 8 -; RV64LMULMAX1-NEXT: vadd.vi v13, v8, 10 -; RV64LMULMAX1-NEXT: vadd.vi v14, v8, 12 -; RV64LMULMAX1-NEXT: vadd.vi v15, v8, 14 -; RV64LMULMAX1-NEXT: ret -; -; RV32LMULMAX2-LABEL: stepvector_v16i64: -; RV32LMULMAX2: # %bb.0: -; RV32LMULMAX2-NEXT: lui a0, %hi(.LCPI16_0) -; RV32LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI16_0) -; RV32LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32LMULMAX2-NEXT: vle8.v v10, (a0) -; RV32LMULMAX2-NEXT: lui a0, %hi(.LCPI16_1) -; RV32LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI16_1) -; RV32LMULMAX2-NEXT: vle8.v v12, (a0) -; RV32LMULMAX2-NEXT: lui a0, %hi(.LCPI16_2) -; RV32LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI16_2) -; RV32LMULMAX2-NEXT: vle8.v v14, (a0) -; RV32LMULMAX2-NEXT: lui a0, %hi(.LCPI16_3) -; RV32LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI16_3) -; RV32LMULMAX2-NEXT: vle8.v v16, (a0) -; RV32LMULMAX2-NEXT: vsext.vf4 v8, v10 -; RV32LMULMAX2-NEXT: vsext.vf4 v10, v12 -; RV32LMULMAX2-NEXT: vsext.vf4 v12, v14 -; RV32LMULMAX2-NEXT: vsext.vf4 v14, v16 -; RV32LMULMAX2-NEXT: ret +; RV32-LABEL: stepvector_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI16_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI16_0) +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vle8.v v16, (a0) +; RV32-NEXT: vsext.vf4 v8, v16 +; RV32-NEXT: ret ; -; RV64LMULMAX2-LABEL: stepvector_v16i64: -; RV64LMULMAX2: # %bb.0: -; RV64LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64LMULMAX2-NEXT: vid.v v8 -; RV64LMULMAX2-NEXT: vadd.vi v10, v8, 4 -; RV64LMULMAX2-NEXT: vadd.vi v12, v8, 8 -; RV64LMULMAX2-NEXT: vadd.vi v14, v8, 12 -; RV64LMULMAX2-NEXT: ret +; RV64-LABEL: stepvector_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vid.v v8 +; RV64-NEXT: ret %v = call <16 x i64> @llvm.experimental.stepvector.v16i64() ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll index b18e235bb97650..44d4a8a1e04cda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>) @@ -474,21 +472,13 @@ define zeroext i1 @vreduce_smin_v16i1(<16 x i1> %v) { declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>) define zeroext i1 @vreduce_or_v32i1(<32 x i1> %v) { -; LMULMAX1-LABEL: vreduce_or_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmor.mm v8, v0, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_or_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_or_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %v) ret i1 %red } @@ -496,21 +486,13 @@ define zeroext i1 @vreduce_or_v32i1(<32 x i1> %v) { declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>) define zeroext i1 @vreduce_xor_v32i1(<32 x i1> %v) { -; LMULMAX1-LABEL: vreduce_xor_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmxor.mm v8, v0, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: andi a0, a0, 1 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_xor_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: andi a0, a0, 1 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_xor_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %v) ret i1 %red } @@ -518,22 +500,14 @@ define zeroext i1 @vreduce_xor_v32i1(<32 x i1> %v) { declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>) define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) { -; LMULMAX1-LABEL: vreduce_and_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_and_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vmnot.m v8, v0 -; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_and_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmnot.m v8, v0 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v) ret i1 %red } @@ -541,21 +515,13 @@ define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) { declare i1 @llvm.vector.reduce.umax.v32i1(<32 x i1>) define zeroext i1 @vreduce_umax_v32i1(<32 x i1> %v) { -; LMULMAX1-LABEL: vreduce_umax_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmor.mm v8, v0, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_umax_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_umax_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> %v) ret i1 %red } @@ -563,22 +529,14 @@ define zeroext i1 @vreduce_umax_v32i1(<32 x i1> %v) { declare i1 @llvm.vector.reduce.smax.v32i1(<32 x i1>) define zeroext i1 @vreduce_smax_v32i1(<32 x i1> %v) { -; LMULMAX1-LABEL: vreduce_smax_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_smax_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vmnot.m v8, v0 -; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_smax_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmnot.m v8, v0 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> %v) ret i1 %red } @@ -586,22 +544,14 @@ define zeroext i1 @vreduce_smax_v32i1(<32 x i1> %v) { declare i1 @llvm.vector.reduce.umin.v32i1(<32 x i1>) define zeroext i1 @vreduce_umin_v32i1(<32 x i1> %v) { -; LMULMAX1-LABEL: vreduce_umin_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_umin_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vmnot.m v8, v0 -; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_umin_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmnot.m v8, v0 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> %v) ret i1 %red } @@ -609,21 +559,13 @@ define zeroext i1 @vreduce_umin_v32i1(<32 x i1> %v) { declare i1 @llvm.vector.reduce.smin.v32i1(<32 x i1>) define zeroext i1 @vreduce_smin_v32i1(<32 x i1> %v) { -; LMULMAX1-LABEL: vreduce_smin_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmor.mm v8, v0, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_smin_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_smin_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> %v) ret i1 %red } @@ -631,23 +573,13 @@ define zeroext i1 @vreduce_smin_v32i1(<32 x i1> %v) { declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>) define zeroext i1 @vreduce_or_v64i1(<64 x i1> %v) { -; LMULMAX1-LABEL: vreduce_or_v64i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmor.mm v8, v8, v10 -; LMULMAX1-NEXT: vmor.mm v9, v0, v9 -; LMULMAX1-NEXT: vmor.mm v8, v9, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_or_v64i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 64 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_or_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %v) ret i1 %red } @@ -655,23 +587,13 @@ define zeroext i1 @vreduce_or_v64i1(<64 x i1> %v) { declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>) define zeroext i1 @vreduce_xor_v64i1(<64 x i1> %v) { -; LMULMAX1-LABEL: vreduce_xor_v64i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmxor.mm v8, v8, v10 -; LMULMAX1-NEXT: vmxor.mm v9, v0, v9 -; LMULMAX1-NEXT: vmxor.mm v8, v9, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: andi a0, a0, 1 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_xor_v64i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 64 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: andi a0, a0, 1 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_xor_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %v) ret i1 %red } @@ -679,24 +601,14 @@ define zeroext i1 @vreduce_xor_v64i1(<64 x i1> %v) { declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>) define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) { -; LMULMAX1-LABEL: vreduce_and_v64i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmand.mm v8, v8, v10 -; LMULMAX1-NEXT: vmand.mm v9, v0, v9 -; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_and_v64i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 64 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-NEXT: vmnot.m v8, v0 -; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_and_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmnot.m v8, v0 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v) ret i1 %red } @@ -704,23 +616,13 @@ define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) { declare i1 @llvm.vector.reduce.umax.v64i1(<64 x i1>) define zeroext i1 @vreduce_umax_v64i1(<64 x i1> %v) { -; LMULMAX1-LABEL: vreduce_umax_v64i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmor.mm v8, v8, v10 -; LMULMAX1-NEXT: vmor.mm v9, v0, v9 -; LMULMAX1-NEXT: vmor.mm v8, v9, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_umax_v64i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 64 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_umax_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> %v) ret i1 %red } @@ -728,24 +630,14 @@ define zeroext i1 @vreduce_umax_v64i1(<64 x i1> %v) { declare i1 @llvm.vector.reduce.smax.v64i1(<64 x i1>) define zeroext i1 @vreduce_smax_v64i1(<64 x i1> %v) { -; LMULMAX1-LABEL: vreduce_smax_v64i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmand.mm v8, v8, v10 -; LMULMAX1-NEXT: vmand.mm v9, v0, v9 -; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_smax_v64i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 64 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-NEXT: vmnot.m v8, v0 -; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_smax_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmnot.m v8, v0 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> %v) ret i1 %red } @@ -753,24 +645,14 @@ define zeroext i1 @vreduce_smax_v64i1(<64 x i1> %v) { declare i1 @llvm.vector.reduce.umin.v64i1(<64 x i1>) define zeroext i1 @vreduce_umin_v64i1(<64 x i1> %v) { -; LMULMAX1-LABEL: vreduce_umin_v64i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmand.mm v8, v8, v10 -; LMULMAX1-NEXT: vmand.mm v9, v0, v9 -; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_umin_v64i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 64 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-NEXT: vmnot.m v8, v0 -; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_umin_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmnot.m v8, v0 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> %v) ret i1 %red } @@ -778,23 +660,13 @@ define zeroext i1 @vreduce_umin_v64i1(<64 x i1> %v) { declare i1 @llvm.vector.reduce.smin.v64i1(<64 x i1>) define zeroext i1 @vreduce_smin_v64i1(<64 x i1> %v) { -; LMULMAX1-LABEL: vreduce_smin_v64i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmor.mm v8, v8, v10 -; LMULMAX1-NEXT: vmor.mm v9, v0, v9 -; LMULMAX1-NEXT: vmor.mm v8, v9, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_smin_v64i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 64 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_smin_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> %v) ret i1 %red } @@ -867,21 +739,13 @@ define zeroext i1 @vreduce_add_v16i1(<16 x i1> %v) { declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>) define zeroext i1 @vreduce_add_v32i1(<32 x i1> %v) { -; LMULMAX1-LABEL: vreduce_add_v32i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmxor.mm v8, v0, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: andi a0, a0, 1 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_add_v32i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: andi a0, a0, 1 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_add_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> %v) ret i1 %red } @@ -889,23 +753,13 @@ define zeroext i1 @vreduce_add_v32i1(<32 x i1> %v) { declare i1 @llvm.vector.reduce.add.v64i1(<64 x i1>) define zeroext i1 @vreduce_add_v64i1(<64 x i1> %v) { -; LMULMAX1-LABEL: vreduce_add_v64i1: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vmxor.mm v8, v8, v10 -; LMULMAX1-NEXT: vmxor.mm v9, v0, v9 -; LMULMAX1-NEXT: vmxor.mm v8, v9, v8 -; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: andi a0, a0, 1 -; LMULMAX1-NEXT: ret -; -; LMULMAX8-LABEL: vreduce_add_v64i1: -; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a0, 64 -; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: andi a0, a0, 1 -; LMULMAX8-NEXT: ret +; CHECK-LABEL: vreduce_add_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> %v) ret i1 %red } From bd2f7bbdb46df5a0d1bd58c66e3e4cdbf7c96e2e Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 14 Feb 2024 09:18:35 +0100 Subject: [PATCH 108/240] [bazel] Port for 09e98950bfcff7ad376922932efb2b56e4db9898 --- .../llvm-project-overlay/clang/BUILD.bazel | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index b5de786adaaf8d..d9aab5dbea431e 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1843,6 +1843,7 @@ cc_library( ":driver", ":driver_options_inc_gen", ":edit", + ":install_api", ":lex", ":parse", ":sema", @@ -2052,6 +2053,22 @@ cc_library( ], ) +cc_library( + name = "install_api", + srcs = glob([ + "lib/InstallAPI/*.cpp", + ]), + hdrs = glob([ + "include/clang/InstallAPI/*.h", + ]), + includes = ["include"], + deps = [ + ":ast", + ":support", + "//llvm:TextAPI", + ], +) + cc_library( name = "serialization", srcs = [ From 5c8985e7703b013c5df0612c3cbc1d333f4c5fa1 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 14 Feb 2024 17:27:53 +0900 Subject: [PATCH 109/240] clangCodeGen: Introduce `MCDC::State` with `MCDCState.h` (#81497) This packs; * `BitmapBytes` * `BitmapMap` * `CondIDMap` into `MCDC::State`. --- clang/lib/CodeGen/CodeGenPGO.cpp | 47 +++++++++++------------- clang/lib/CodeGen/CodeGenPGO.h | 8 ++-- clang/lib/CodeGen/CoverageMappingGen.cpp | 32 +++++++--------- clang/lib/CodeGen/CoverageMappingGen.h | 14 ++++--- clang/lib/CodeGen/MCDCState.h | 32 ++++++++++++++++ 5 files changed, 78 insertions(+), 55 deletions(-) create mode 100644 clang/lib/CodeGen/MCDCState.h diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 5d7c3847745762..b5ce1aad7ea1e5 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -165,8 +165,7 @@ struct MapRegionCounters : public RecursiveASTVisitor { llvm::DenseMap &CounterMap; /// The next bitmap byte index to assign. unsigned NextMCDCBitmapIdx; - /// The map of statements to MC/DC bitmap coverage objects. - llvm::DenseMap &MCDCBitmapMap; + MCDC::State &MCDCState; /// Maximum number of supported MC/DC conditions in a boolean expression. unsigned MCDCMaxCond; /// The profile version. @@ -176,11 +175,11 @@ struct MapRegionCounters : public RecursiveASTVisitor { MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion, llvm::DenseMap &CounterMap, - llvm::DenseMap &MCDCBitmapMap, - unsigned MCDCMaxCond, DiagnosticsEngine &Diag) + MCDC::State &MCDCState, unsigned MCDCMaxCond, + DiagnosticsEngine &Diag) : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap), - NextMCDCBitmapIdx(0), MCDCBitmapMap(MCDCBitmapMap), - MCDCMaxCond(MCDCMaxCond), ProfileVersion(ProfileVersion), Diag(Diag) {} + NextMCDCBitmapIdx(0), MCDCState(MCDCState), MCDCMaxCond(MCDCMaxCond), + ProfileVersion(ProfileVersion), Diag(Diag) {} // Blocks and lambdas are handled as separate functions, so we need not // traverse them in the parent context. @@ -309,7 +308,7 @@ struct MapRegionCounters : public RecursiveASTVisitor { // Otherwise, allocate the number of bytes required for the bitmap // based on the number of conditions. Must be at least 1-byte long. - MCDCBitmapMap[BinOp] = NextMCDCBitmapIdx; + MCDCState.BitmapMap[BinOp] = NextMCDCBitmapIdx; unsigned SizeInBits = std::max(1L << NumCond, CHAR_BIT); NextMCDCBitmapIdx += SizeInBits / CHAR_BIT; } @@ -987,10 +986,9 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { unsigned MCDCMaxConditions = (CGM.getCodeGenOpts().MCDCCoverage) ? 6 : 0; RegionCounterMap.reset(new llvm::DenseMap); - RegionMCDCBitmapMap.reset(new llvm::DenseMap); + RegionMCDCState.reset(new MCDC::State); MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap, - *RegionMCDCBitmapMap, MCDCMaxConditions, - CGM.getDiags()); + *RegionMCDCState, MCDCMaxConditions, CGM.getDiags()); if (const FunctionDecl *FD = dyn_cast_or_null(D)) Walker.TraverseDecl(const_cast(FD)); else if (const ObjCMethodDecl *MD = dyn_cast_or_null(D)) @@ -1001,7 +999,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { Walker.TraverseDecl(const_cast(CD)); assert(Walker.NextCounter > 0 && "no entry counter mapped for decl"); NumRegionCounters = Walker.NextCounter; - MCDCBitmapBytes = Walker.NextMCDCBitmapIdx; + RegionMCDCState->BitmapBytes = Walker.NextMCDCBitmapIdx; FunctionHash = Walker.Hash.finalize(); } @@ -1033,11 +1031,10 @@ void CodeGenPGO::emitCounterRegionMapping(const Decl *D) { std::string CoverageMapping; llvm::raw_string_ostream OS(CoverageMapping); - RegionCondIDMap.reset(new llvm::DenseMap); + RegionMCDCState->CondIDMap.clear(); CoverageMappingGen MappingGen( *CGM.getCoverageMapping(), CGM.getContext().getSourceManager(), - CGM.getLangOpts(), RegionCounterMap.get(), RegionMCDCBitmapMap.get(), - RegionCondIDMap.get()); + CGM.getLangOpts(), RegionCounterMap.get(), RegionMCDCState.get()); MappingGen.emitCounterMapping(D, OS); OS.flush(); @@ -1119,7 +1116,7 @@ bool CodeGenPGO::canEmitMCDCCoverage(const CGBuilderTy &Builder) { } void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) { - if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) + if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState) return; auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); @@ -1129,7 +1126,7 @@ void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) { // anything. llvm::Value *Args[3] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FunctionHash), - Builder.getInt32(MCDCBitmapBytes)}; + Builder.getInt32(RegionMCDCState->BitmapBytes)}; Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_parameters), Args); } @@ -1137,13 +1134,13 @@ void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) { void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, const Expr *S, Address MCDCCondBitmapAddr) { - if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) + if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState) return; S = S->IgnoreParens(); - auto ExprMCDCBitmapMapIterator = RegionMCDCBitmapMap->find(S); - if (ExprMCDCBitmapMapIterator == RegionMCDCBitmapMap->end()) + auto ExprMCDCBitmapMapIterator = RegionMCDCState->BitmapMap.find(S); + if (ExprMCDCBitmapMapIterator == RegionMCDCState->BitmapMap.end()) return; // Extract the ID of the global bitmap associated with this expression. @@ -1157,7 +1154,7 @@ void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, // index represents an executed test vector. llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FunctionHash), - Builder.getInt32(MCDCBitmapBytes), + Builder.getInt32(RegionMCDCState->BitmapBytes), Builder.getInt32(MCDCTestVectorBitmapID), MCDCCondBitmapAddr.getPointer()}; Builder.CreateCall( @@ -1166,12 +1163,12 @@ void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, void CodeGenPGO::emitMCDCCondBitmapReset(CGBuilderTy &Builder, const Expr *S, Address MCDCCondBitmapAddr) { - if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) + if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState) return; S = S->IgnoreParens(); - if (RegionMCDCBitmapMap->find(S) == RegionMCDCBitmapMap->end()) + if (!RegionMCDCState->BitmapMap.contains(S)) return; // Emit intrinsic that resets a dedicated temporary value on the stack to 0. @@ -1181,7 +1178,7 @@ void CodeGenPGO::emitMCDCCondBitmapReset(CGBuilderTy &Builder, const Expr *S, void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, Address MCDCCondBitmapAddr, llvm::Value *Val) { - if (!canEmitMCDCCoverage(Builder) || !RegionCondIDMap) + if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState) return; // Even though, for simplicity, parentheses and unary logical-NOT operators @@ -1193,8 +1190,8 @@ void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, // also make debugging a bit easier. S = CodeGenFunction::stripCond(S); - auto ExprMCDCConditionIDMapIterator = RegionCondIDMap->find(S); - if (ExprMCDCConditionIDMapIterator == RegionCondIDMap->end()) + auto ExprMCDCConditionIDMapIterator = RegionMCDCState->CondIDMap.find(S); + if (ExprMCDCConditionIDMapIterator == RegionMCDCState->CondIDMap.end()) return; // Extract the ID of the condition we are setting in the bitmap. diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h index 6596b6c3527764..d3c2b277238fc7 100644 --- a/clang/lib/CodeGen/CodeGenPGO.h +++ b/clang/lib/CodeGen/CodeGenPGO.h @@ -16,6 +16,7 @@ #include "CGBuilder.h" #include "CodeGenModule.h" #include "CodeGenTypes.h" +#include "MCDCState.h" #include "llvm/ProfileData/InstrProfReader.h" #include #include @@ -33,21 +34,18 @@ class CodeGenPGO { std::array NumValueSites; unsigned NumRegionCounters; - unsigned MCDCBitmapBytes; uint64_t FunctionHash; std::unique_ptr> RegionCounterMap; - std::unique_ptr> RegionMCDCBitmapMap; - std::unique_ptr> RegionCondIDMap; std::unique_ptr> StmtCountMap; std::unique_ptr ProfRecord; + std::unique_ptr RegionMCDCState; std::vector RegionCounts; uint64_t CurrentRegionCount; public: CodeGenPGO(CodeGenModule &CGModule) : CGM(CGModule), FuncNameVar(nullptr), NumValueSites({{0}}), - NumRegionCounters(0), MCDCBitmapBytes(0), FunctionHash(0), - CurrentRegionCount(0) {} + NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {} /// Whether or not we have PGO region data for the current function. This is /// false both when we have no data at all and when our data has been diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 93fe76eb9903e9..3b711c05e92754 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -689,8 +689,8 @@ struct MCDCCoverageBuilder { CodeGenModule &CGM; llvm::SmallVector DecisionStack; + MCDC::State &MCDCState; llvm::DenseMap &CondIDs; - llvm::DenseMap &MCDCBitmapMap; mcdc::ConditionID NextID = 1; bool NotMapped = false; @@ -703,12 +703,9 @@ struct MCDCCoverageBuilder { } public: - MCDCCoverageBuilder( - CodeGenModule &CGM, - llvm::DenseMap &CondIDMap, - llvm::DenseMap &MCDCBitmapMap) - : CGM(CGM), DecisionStack(1, DecisionStackSentinel), CondIDs(CondIDMap), - MCDCBitmapMap(MCDCBitmapMap) {} + MCDCCoverageBuilder(CodeGenModule &CGM, MCDC::State &MCDCState) + : CGM(CGM), DecisionStack(1, DecisionStackSentinel), MCDCState(MCDCState), + CondIDs(MCDCState.CondIDMap) {} /// Return whether the build of the control flow map is at the top-level /// (root) of a logical operator nest in a boolean expression prior to the @@ -745,7 +742,8 @@ struct MCDCCoverageBuilder { return; // If binary expression is disqualified, don't do mapping. - if (!isBuilding() && !MCDCBitmapMap.contains(CodeGenFunction::stripCond(E))) + if (!isBuilding() && + !MCDCState.BitmapMap.contains(CodeGenFunction::stripCond(E))) NotMapped = true; // Don't go any further if we don't need to map condition IDs. @@ -818,8 +816,7 @@ struct CounterCoverageMappingBuilder /// The map of statements to count values. llvm::DenseMap &CounterMap; - /// The map of statements to bitmap coverage object values. - llvm::DenseMap &MCDCBitmapMap; + MCDC::State &MCDCState; /// A stack of currently live regions. llvm::SmallVector RegionStack; @@ -863,7 +860,7 @@ struct CounterCoverageMappingBuilder return Counter::getCounter(CounterMap[S]); } - unsigned getRegionBitmap(const Stmt *S) { return MCDCBitmapMap[S]; } + unsigned getRegionBitmap(const Stmt *S) { return MCDCState.BitmapMap[S]; } /// Push a region onto the stack. /// @@ -1341,12 +1338,9 @@ struct CounterCoverageMappingBuilder CounterCoverageMappingBuilder( CoverageMappingModuleGen &CVM, llvm::DenseMap &CounterMap, - llvm::DenseMap &MCDCBitmapMap, - llvm::DenseMap &CondIDMap, - SourceManager &SM, const LangOptions &LangOpts) + MCDC::State &MCDCState, SourceManager &SM, const LangOptions &LangOpts) : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap), - MCDCBitmapMap(MCDCBitmapMap), - MCDCBuilder(CVM.getCodeGenModule(), CondIDMap, MCDCBitmapMap) {} + MCDCState(MCDCState), MCDCBuilder(CVM.getCodeGenModule(), MCDCState) {} /// Write the mapping data to the output stream void write(llvm::raw_ostream &OS) { @@ -2350,9 +2344,9 @@ unsigned CoverageMappingModuleGen::getFileID(FileEntryRef File) { void CoverageMappingGen::emitCounterMapping(const Decl *D, llvm::raw_ostream &OS) { - assert(CounterMap && MCDCBitmapMap); - CounterCoverageMappingBuilder Walker(CVM, *CounterMap, *MCDCBitmapMap, - *CondIDMap, SM, LangOpts); + assert(CounterMap && MCDCState); + CounterCoverageMappingBuilder Walker(CVM, *CounterMap, *MCDCState, SM, + LangOpts); Walker.VisitDecl(D); Walker.write(OS); } diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h index 62cea173c9fc93..f7c59c48c18396 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.h +++ b/clang/lib/CodeGen/CoverageMappingGen.h @@ -91,6 +91,10 @@ namespace CodeGen { class CodeGenModule; +namespace MCDC { +struct State; +} + /// Organizes the cross-function state that is used while generating /// code coverage mapping data. class CoverageMappingModuleGen { @@ -150,22 +154,20 @@ class CoverageMappingGen { SourceManager &SM; const LangOptions &LangOpts; llvm::DenseMap *CounterMap; - llvm::DenseMap *MCDCBitmapMap; - llvm::DenseMap *CondIDMap; + MCDC::State *MCDCState; public: CoverageMappingGen(CoverageMappingModuleGen &CVM, SourceManager &SM, const LangOptions &LangOpts) : CVM(CVM), SM(SM), LangOpts(LangOpts), CounterMap(nullptr), - MCDCBitmapMap(nullptr), CondIDMap(nullptr) {} + MCDCState(nullptr) {} CoverageMappingGen(CoverageMappingModuleGen &CVM, SourceManager &SM, const LangOptions &LangOpts, llvm::DenseMap *CounterMap, - llvm::DenseMap *MCDCBitmapMap, - llvm::DenseMap *CondIDMap) + MCDC::State *MCDCState) : CVM(CVM), SM(SM), LangOpts(LangOpts), CounterMap(CounterMap), - MCDCBitmapMap(MCDCBitmapMap), CondIDMap(CondIDMap) {} + MCDCState(MCDCState) {} /// Emit the coverage mapping data which maps the regions of /// code to counters that will be used to find the execution diff --git a/clang/lib/CodeGen/MCDCState.h b/clang/lib/CodeGen/MCDCState.h new file mode 100644 index 00000000000000..dfb0d99921e87e --- /dev/null +++ b/clang/lib/CodeGen/MCDCState.h @@ -0,0 +1,32 @@ +//===---- MCDCState.h - Per-Function MC/DC state ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Per-Function MC/DC state for PGO +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_MCDCSTATE_H +#define LLVM_CLANG_LIB_CODEGEN_MCDCSTATE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ProfileData/Coverage/MCDCTypes.h" + +namespace clang::CodeGen::MCDC { + +using namespace llvm::coverage::mcdc; + +/// Per-Function MC/DC state +struct State { + unsigned BitmapBytes = 0; + llvm::DenseMap BitmapMap; + llvm::DenseMap CondIDMap; +}; + +} // namespace clang::CodeGen::MCDC + +#endif // LLVM_CLANG_LIB_CODEGEN_MCDCSTATE_H From 243f14d23643e28e98d8c8d2993bd17947c101b2 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 14 Feb 2024 09:33:44 +0100 Subject: [PATCH 110/240] [bazel] Add missing dependencies for the newly-added install_api lib --- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index d9aab5dbea431e..a6b0e1e3f52aab 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1863,6 +1863,7 @@ cc_library( "//llvm:Support", "//llvm:Target", "//llvm:TargetParser", + "//llvm:TextAPI", "//llvm:config", ], ) @@ -2064,7 +2065,9 @@ cc_library( includes = ["include"], deps = [ ":ast", + ":basic", ":support", + "//llvm:Support", "//llvm:TextAPI", ], ) From 470c5b8011b94ecb0ee8dbdb34ab8d680f652f70 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 14 Feb 2024 16:40:36 +0800 Subject: [PATCH 111/240] [InstSimplify][InstCombine] Remove unnecessary `m_c_*` matchers. (#81712) This patch removes unnecessary `m_c_*` matchers since we always canonicalize `commutive_op Cst, X` into `commutive_op X, Cst`. Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=bfc0b7c6891896ee8e9818f22800472510093864&to=d27b058bb9acaa43d3cadbf3cd889e8f79e5c634&stat=instructions:u --- llvm/lib/Analysis/IVDescriptors.cpp | 2 +- llvm/lib/Analysis/InstructionSimplify.cpp | 4 +-- llvm/lib/IR/IntrinsicInst.cpp | 2 +- .../InstCombine/InstCombineAndOrXor.cpp | 2 +- .../InstCombine/InstCombineNegator.cpp | 6 ++-- llvm/test/Transforms/InstSimplify/compare.ll | 30 ------------------- 6 files changed, 8 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 1aa324c6b5f380..055f121e743411 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -76,7 +76,7 @@ static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT // with a new integer type of the corresponding bit width. - if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { + if (match(J, m_And(m_Instruction(I), m_APInt(M)))) { int32_t Bits = (*M + 1).exactLogBase2(); if (Bits > 0) { RT = IntegerType::get(Phi->getContext(), Bits); diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index cbe183296d2c90..08050becd2df88 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3246,8 +3246,8 @@ static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS, Value *X; const APInt *C1, *C2; - if (!match(LHS, m_c_Add(m_Value(X), m_APInt(C1))) || - !match(RHS, m_c_Add(m_Specific(X), m_APInt(C2)))) + if (!match(LHS, m_Add(m_Value(X), m_APInt(C1))) || + !match(RHS, m_Add(m_Specific(X), m_APInt(C2)))) return false; return (C1->slt(*C2) && C1->isNonNegative()) || diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 7a3b708e740067..5050091836b7f9 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -623,7 +623,7 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const { if (EC.isScalable()) { // Compare vscale patterns uint64_t VScaleFactor; - if (match(VLParam, m_c_Mul(m_ConstantInt(VScaleFactor), m_VScale()))) + if (match(VLParam, m_Mul(m_VScale(), m_ConstantInt(VScaleFactor)))) return VScaleFactor >= EC.getKnownMinValue(); return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale()); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4465eb8992fbbf..0af9a2786f6901 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -4450,7 +4450,7 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) { } // ~(X + C) --> ~C - X - if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C)))) + if (match(NotVal, m_Add(m_Value(X), m_ImmConstant(C)))) return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X); // ~(X - Y) --> ~X + Y diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp index 62e49469cb0198..f73679f9461bad 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -258,9 +258,9 @@ std::array Negator::getSortedOperandsOfBinOp(Instruction *I) { case Instruction::And: { Constant *ShAmt; // sub(y,and(lshr(x,C),1)) --> add(ashr(shl(x,(BW-1)-C),BW-1),y) - if (match(I, m_c_And(m_OneUse(m_TruncOrSelf( - m_LShr(m_Value(X), m_ImmConstant(ShAmt)))), - m_One()))) { + if (match(I, m_And(m_OneUse(m_TruncOrSelf( + m_LShr(m_Value(X), m_ImmConstant(ShAmt)))), + m_One()))) { unsigned BW = X->getType()->getScalarSizeInBits(); Constant *BWMinusOne = ConstantInt::get(X->getType(), BW - 1); Value *R = Builder.CreateShl(X, Builder.CreateSub(BWMinusOne, ShAmt)); diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index ac2ebf52ed6296..1e90f0edbd8003 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -2453,36 +2453,6 @@ define i1 @icmp_nsw_2(i32 %V) { ret i1 %cmp } -define i1 @icmp_nsw_commute(i32 %V) { -; CHECK-LABEL: @icmp_nsw_commute( -; CHECK-NEXT: ret i1 true -; - %add5 = add i32 5, %V - %add6 = add nsw i32 %V, 6 - %cmp = icmp slt i32 %add5, %add6 - ret i1 %cmp -} - -define i1 @icmp_nsw_commute2(i32 %V) { -; CHECK-LABEL: @icmp_nsw_commute2( -; CHECK-NEXT: ret i1 true -; - %add5 = add i32 %V, 5 - %add6 = add nsw i32 6, %V - %cmp = icmp slt i32 %add5, %add6 - ret i1 %cmp -} - -define i1 @icmp_nsw_commute3(i32 %V) { -; CHECK-LABEL: @icmp_nsw_commute3( -; CHECK-NEXT: ret i1 true -; - %add5 = add i32 5, %V - %add6 = add nsw i32 6, %V - %cmp = icmp slt i32 %add5, %add6 - ret i1 %cmp -} - define i1 @icmp_nsw_22(i32 %V) { ; CHECK-LABEL: @icmp_nsw_22( ; CHECK-NEXT: ret i1 true From 5932f3f861f84305bd01050d0af8e0dcb232a8b3 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 14 Feb 2024 09:53:48 +0100 Subject: [PATCH 112/240] [clangCodeGen] Fix the build failure for 5c8985e7703b013c5df0612c3cbc1d333f4c5fa1 --- clang/lib/CodeGen/MCDCState.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/CodeGen/MCDCState.h b/clang/lib/CodeGen/MCDCState.h index dfb0d99921e87e..e6e39237a1b41a 100644 --- a/clang/lib/CodeGen/MCDCState.h +++ b/clang/lib/CodeGen/MCDCState.h @@ -16,6 +16,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ProfileData/Coverage/MCDCTypes.h" +namespace clang { +class Stmt; +} // namespace clang + namespace clang::CodeGen::MCDC { using namespace llvm::coverage::mcdc; From 855bac2f134b268e3749d113d33a95d16ac89398 Mon Sep 17 00:00:00 2001 From: Enna1 Date: Wed, 14 Feb 2024 17:14:37 +0800 Subject: [PATCH 113/240] [llvm-dwp] Add missing llvm-dwp tool in lit config tool substitution (#81217) --- llvm/test/lit.cfg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index c6f9ee82e08cc1..8ecae5dbe37202 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -190,6 +190,7 @@ def get_asan_rtlib(): "llvm-dis", "llvm-dwarfdump", "llvm-dwarfutil", + "llvm-dwp", "llvm-dlltool", "llvm-exegesis", "llvm-extract", From 8f0435febc256f77d53e7001d81d86bc66ad593b Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 14 Feb 2024 10:46:52 +0100 Subject: [PATCH 114/240] [bazel][mlir] Fix deps of //mlir:AMDGPUTransformOps --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index c825d99b60a6b9..9fe7d1741434c7 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1525,24 +1525,10 @@ cc_library( ":AMDGPUTransformOpsIncGen", ":AMDGPUTransforms", ":AffineDialect", - ":Analysis", - ":ArithDialect", - ":ArithUtils", - ":DialectUtils", ":FuncDialect", - ":GPUCommonTransforms", - ":GPUCompilationAttrInterfacesIncGen", - ":GPUDialect", ":IR", - ":LLVMCommonConversion", - ":LinalgDialect", - ":MemRefDialect", - ":SCFDialect", - ":SCFTransforms", - ":Support", ":TransformDialect", ":VectorDialect", - "//llvm:Support", ], ) From 17ac5b15d10d6cf7726e9018f858abf8a0f6bfa2 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 14 Feb 2024 09:59:31 +0000 Subject: [PATCH 115/240] [AMDGPU] Make use of defvar in DSDIR definitions. NFC. --- llvm/lib/Target/AMDGPU/DSDIRInstructions.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td index 757845ae2b7a03..3ef765f93395eb 100644 --- a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td @@ -159,8 +159,8 @@ def : GCNPat < // GFX11 //===----------------------------------------------------------------------===// -multiclass DSDIR_Real_gfx11 op, - DSDIR_Pseudo lds = !cast(NAME)> { +multiclass DSDIR_Real_gfx11 op> { + defvar lds = !cast(NAME); def _gfx11 : DSDIR_Real.ret, SIEncodingFamily.GFX11>, @@ -177,8 +177,8 @@ defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>; // GFX12+ //===----------------------------------------------------------------------===// -multiclass DSDIR_Real_gfx12 op, - DSDIR_Pseudo lds = !cast(NAME)> { +multiclass DSDIR_Real_gfx12 op> { + defvar lds = !cast(NAME); def _gfx12 : DSDIR_Real.ret, SIEncodingFamily.GFX12>, From cb8f910035e45bf6c30a61f74015c0dba5842747 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 14 Feb 2024 10:15:06 +0000 Subject: [PATCH 116/240] [AMDGPU] Do not test both wave sizes for DSDIR disassembly (#81719) There is nothing in these instruction definitions that depends on wave size so testing both seems like overkill. The corresponding assembler tests do not do it. --- llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_ldsdir.txt | 1 - llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_ldsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_ldsdir.txt index d9803fdfd7ac2a..0dd97bc0b50da7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_ldsdir.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_ldsdir.txt @@ -1,5 +1,4 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s # GFX11: lds_direct_load v10 wait_vdst:6 ; encoding: [0x0a,0x00,0x16,0xce] 0x0a,0x00,0x16,0xce diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt index b7c0394429dc3b..705dd643812418 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt @@ -1,5 +1,4 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s -# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck --strict-whitespace -check-prefix=GFX12 %s # GFX12: ds_direct_load v10 wait_va_vdst:6 wait_vm_vsrc:0 ; encoding: [0x0a,0x00,0x16,0xce] 0x0a,0x00,0x16,0xce From 65b5647e167f8be8010cbfaefd1591946c533d72 Mon Sep 17 00:00:00 2001 From: Shreyansh Chouhan Date: Wed, 14 Feb 2024 15:55:11 +0530 Subject: [PATCH 117/240] [DeadStoreElimination] Optimize tautological assignments (#75744) If a store is dominated by a condition that ensures that the value being stored in a memory location is already present at that memory location, consider the store a noop. Fixes #63419 --- .../Scalar/DeadStoreElimination.cpp | 54 +++ .../DeadStoreElimination/noop-stores.ll | 357 ++++++++++++++++++ 2 files changed, 411 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 663c06ebafb5f4..d30c68a2f08712 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1904,6 +1904,57 @@ struct DSEState { return true; } + // Check if there is a dominating condition, that implies that the value + // being stored in a ptr is already present in the ptr. + bool dominatingConditionImpliesValue(MemoryDef *Def) { + auto *StoreI = cast(Def->getMemoryInst()); + BasicBlock *StoreBB = StoreI->getParent(); + Value *StorePtr = StoreI->getPointerOperand(); + Value *StoreVal = StoreI->getValueOperand(); + + DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom(); + if (!IDom) + return false; + + auto *BI = dyn_cast(IDom->getBlock()->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + + // In case both blocks are the same, it is not possible to determine + // if optimization is possible. (We would not want to optimize a store + // in the FalseBB if condition is true and vice versa.) + if (BI->getSuccessor(0) == BI->getSuccessor(1)) + return false; + + Instruction *ICmpL; + ICmpInst::Predicate Pred; + if (!match(BI->getCondition(), + m_c_ICmp(Pred, + m_CombineAnd(m_Load(m_Specific(StorePtr)), + m_Instruction(ICmpL)), + m_Specific(StoreVal))) || + !ICmpInst::isEquality(Pred)) + return false; + + // In case the else blocks also branches to the if block or the other way + // around it is not possible to determine if the optimization is possible. + if (Pred == ICmpInst::ICMP_EQ && + !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)), + StoreBB)) + return false; + + if (Pred == ICmpInst::ICMP_NE && + !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)), + StoreBB)) + return false; + + MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL); + MemoryAccess *ClobAcc = + MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA); + + return MSSA.dominates(ClobAcc, LoadAcc); + } + /// \returns true if \p Def is a no-op store, either because it /// directly stores back a loaded value or stores zero to a calloced object. bool storeIsNoop(MemoryDef *Def, const Value *DefUO) { @@ -1934,6 +1985,9 @@ struct DSEState { if (!Store) return false; + if (dominatingConditionImpliesValue(Def)) + return true; + if (auto *LoadI = dyn_cast(Store->getOperand(0))) { if (LoadI->getPointerOperand() == Store->getOperand(1)) { // Get the defining access for the load. diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll index 3703b8d039ead0..9fc20d76da5eb4 100644 --- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll @@ -795,3 +795,360 @@ join: store i8 %v, ptr %q, align 1 ret void } + +; Dominating condition implies value already exists, optimize store +define void @remove_tautological_store_eq(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_eq( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + %cmp = icmp eq i32 %val, 4 + br i1 %cmp, label %if.eq, label %end + +if.eq: + store i32 4, ptr %x, align 4 + br label %end + +end: + ret void +} + +; Dominating condition implies value already exists, optimize store +define void @remove_tautological_store_var(ptr %x, ptr %y) { +; CHECK-LABEL: @remove_tautological_store_var( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VALX:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[VALY:%.*]] = load i32, ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VALX]], [[VALY]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %valx = load i32, ptr %x, align 4 + %valy = load i32, ptr %y, align 4 + %cmp = icmp eq i32 %valx, %valy + br i1 %cmp, label %if.eq, label %end + +if.eq: + store i32 %valy, ptr %x, align 4 + br label %end + +end: + ret void +} + +; Dominating condition implies value already exists, optimize store +define void @remove_tautological_store_ne(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_ne( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_NE:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.ne: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + %cmp = icmp ne i32 %val, 4 + br i1 %cmp, label %if.ne, label %if.else + +if.ne: + br label %end + +if.else: + store i32 4, ptr %x, align 4 + br label %end + +end: + ret void +} + +; Dominating condition implies value already exists, optimize store +; Optimizes unordered atomic stores +define void @remove_tautological_store_atomic_unordered(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_atomic_unordered( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + %cmp = icmp eq i32 %val, 4 + br i1 %cmp, label %if.eq, label %end + +if.eq: + store atomic i32 4, ptr %x unordered, align 4 + br label %end + +end: + ret void +} + +; Should not optimize ordered atomic stores +define void @remove_tautological_store_atomic_monotonic(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_atomic_monotonic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: store atomic i32 4, ptr [[X]] monotonic, align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + %cmp = icmp eq i32 %val, 4 + br i1 %cmp, label %if.eq, label %end + +if.eq: + store atomic i32 4, ptr %x monotonic, align 4 + br label %end + +end: + ret void +} + +; Should not optimize since the store is in incorrect branch +define void @remove_tautological_store_eq_wrong_branch(ptr %x, ptr %y) { +; CHECK-LABEL: @remove_tautological_store_eq_wrong_branch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VALX:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[VALY:%.*]] = load i32, ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VALX]], [[VALY]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: store i32 [[VALY]], ptr [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + %valx = load i32, ptr %x, align 4 + %valy = load i32, ptr %y, align 4 + %cmp = icmp eq i32 %valx, %valy + br i1 %cmp, label %if.eq, label %end + +if.eq: + br label %end + +end: + store i32 %valy, ptr %x, align 4 + ret void +} + +; Should not optimize since the store is in incorrect branch +define void @remove_tautological_store_ne_wrong_branch(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_ne_wrong_branch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_NE:%.*]], label [[END:%.*]] +; CHECK: if.ne: +; CHECK-NEXT: store i32 4, ptr [[X]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + %cmp = icmp ne i32 %val, 4 + br i1 %cmp, label %if.ne, label %end + +if.ne: + store i32 4, ptr %x, align 4 + br label %end + +end: + ret void +} + +; Dominating condition implies value already exists, optimize store +; Should not optimize since we cannot determine if we should when both +; branches are the same +define void @remove_tautological_store_same_branch(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_same_branch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[IF_EQ]] +; CHECK: if.eq: +; CHECK-NEXT: store i32 4, ptr [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + %cmp = icmp eq i32 %val, 4 + br i1 %cmp, label %if.eq, label %if.eq + +if.eq: + store i32 4, ptr %x, align 4 + ret void +} + +; Dominating condition implies value already exists, optimize store +; Should not optimize since value being stored is different from cond check +define void @remove_tautological_store_wrong_value(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_wrong_value( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: store i32 5, ptr [[X]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + %cmp = icmp eq i32 %val, 4 + br i1 %cmp, label %if.eq, label %end + +if.eq: + store i32 5, ptr %x, align 4 + br label %end + +end: + ret void +} + +; Should not optimize since there is a clobbering acc after load +define void @remove_tautological_store_clobber(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_clobber( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: store i32 5, ptr [[X]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: store i32 4, ptr [[X]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + store i32 5, ptr %x, align 4 + %cmp = icmp eq i32 %val, 4 + br i1 %cmp, label %if.eq, label %end + +if.eq: + store i32 4, ptr %x, align 4 + br label %end + +end: + ret void +} + +; Should not optimize since the condition does not dominate the store +define void @remove_tautological_store_no_dom(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_no_dom( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: store i32 4, ptr [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + store i32 5, ptr %x, align 4 + %cmp = icmp eq i32 %val, 4 + br i1 %cmp, label %if.eq, label %if.else + +if.eq: + br label %end + +if.else: + br label %end + +end: + store i32 4, ptr %x, align 4 + ret void +} + +; Should not optimize volatile stores +define void @remove_tautological_store_volatile(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_volatile( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 4 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: store volatile i32 4, ptr [[X]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %x, align 4 + %cmp = icmp eq i32 %val, 4 + br i1 %cmp, label %if.eq, label %end + +if.eq: + store volatile i32 4, ptr %x, align 4 + br label %end + +end: + ret void +} + +; Should not optimize stores where the edge from branch inst to +; conditional block does not dominate the conditional block. +; (A conditional block post dominates the branch inst.) +define void @remove_tautological_store_no_edge_domination(ptr %x) { +; CHECK-LABEL: @remove_tautological_store_no_edge_domination( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[X1]], null +; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.eq: +; CHECK-NEXT: store ptr null, ptr [[X]], align 8 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[IF_EQ]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %x1 = load ptr, ptr %x, align 8 + %cmp = icmp eq ptr %x1, null + br i1 %cmp, label %if.eq, label %if.else + +if.eq: + store ptr null, ptr %x, align 8 + br label %end + +if.else: + br label %if.eq + +end: + ret void +} From 55a7ff8c1ac23a78f9701d357d292e6a0bdfdb4f Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Wed, 14 Feb 2024 10:30:46 +0000 Subject: [PATCH 118/240] [mlir][nfc] Move Op signature to one line This was accidentally split with a comment --- mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir b/mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir index 61e13d1bfa9c62..6aba2b3bb368e5 100644 --- a/mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-mmt4d-to-fma.mlir @@ -16,10 +16,9 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module: !transform.any_op {transform.readonly}) { %func = transform.structured.match ops{["func.func"]} in %module : (!transform.any_op) -> !transform.op<"func.func"> - %mmt4d = transform.structured.match ops{["linalg.mmt4d"]} in %func + %mmt4d = transform.structured.match ops{["linalg.mmt4d"]} in %func : (!transform.op<"func.func">) -> !transform.any_op // Step 1: Tile - : (!transform.op<"func.func">) -> !transform.any_op // Tile parallel dims %tiled_linalg_op_p, %loops:4 = transform.structured.tile_using_for %mmt4d[1, 1, 0, 8, 8, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) From 124cd11d7f9d875b7d6dd441dc758ed9341ccec2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 14 Feb 2024 11:32:06 +0100 Subject: [PATCH 119/240] Revert "[GitHub][workflows] Ask reviewers to merge PRs when author cannot (#81142)" This reverts commit 38c706e30f5f339bfb0bfb26fd7b5c2d5086064a. This workflow always fails in cases where it needs to create a comment, due to a permissions issue, see the discussion at: https://discourse.llvm.org/t/rfc-fyi-pull-request-greetings-for-new-contributors/75458/20 --- .github/workflows/approved-prs.yml | 39 ----------------- llvm/utils/git/github-automation.py | 65 ----------------------------- 2 files changed, 104 deletions(-) delete mode 100644 .github/workflows/approved-prs.yml diff --git a/.github/workflows/approved-prs.yml b/.github/workflows/approved-prs.yml deleted file mode 100644 index 309a9217e42d31..00000000000000 --- a/.github/workflows/approved-prs.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: "Prompt reviewers to merge PRs on behalf of authors" - -permissions: - contents: read - -on: - pull_request_review: - types: - - submitted - -jobs: - merge-on-behalf-information-comment: - runs-on: ubuntu-latest - permissions: - pull-requests: write - if: >- - (github.repository == 'llvm/llvm-project') && - (github.event.review.state == 'APPROVED') - steps: - - name: Checkout Automation Script - uses: actions/checkout@v4 - with: - sparse-checkout: llvm/utils/git/ - ref: main - - - name: Setup Automation Script - working-directory: ./llvm/utils/git/ - run: | - pip install -r requirements.txt - - - name: Add Merge On Behalf Comment - working-directory: ./llvm/utils/git/ - run: | - python3 ./github-automation.py \ - --token '${{ secrets.GITHUB_TOKEN }}' \ - pr-merge-on-behalf-information \ - --issue-number "${{ github.event.pull_request.number }}" \ - --author "${{ github.event.pull_request.user.login }}" \ - --reviewer "${{ github.event.review.user.login }}" diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index ccef274c4c1f7c..b475eff06fc3eb 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -298,55 +298,6 @@ def run(self) -> bool: return True -class PRMergeOnBehalfInformation: - COMMENT_TAG = "\n" - - def __init__( - self, token: str, repo: str, pr_number: int, author: str, reviewer: str - ): - self.repo = github.Github(token).get_repo(repo) - self.pr = self.repo.get_issue(pr_number).as_pull_request() - self.author = author - self.reviewer = reviewer - - def can_merge(self, user: str) -> bool: - try: - return self.repo.get_collaborator_permission(user) in ["admin", "write"] - # There is a UnknownObjectException for this scenario, but this method - # does not use it. - except github.GithubException as e: - # 404 means the author was not found in the collaborator list, so we - # know they don't have push permissions. Anything else is a real API - # issue, raise it so it is visible. - if e.status != 404: - raise e - return False - - def run(self) -> bool: - # Check this first because it only costs 1 API point. - if self.can_merge(self.author): - return - - # A review can be approved more than once, only comment the first time. - for comment in self.pr.as_issue().get_comments(): - if self.COMMENT_TAG in comment.body: - return - - # This text is using Markdown formatting. - if self.can_merge(self.reviewer): - comment = f"""\ -{self.COMMENT_TAG} -@{self.reviewer} the PR author does not have permission to merge their own PRs yet. Please merge on their behalf.""" - else: - comment = f"""\ -{self.COMMENT_TAG} -@{self.reviewer} the author of this PR does not have permission to merge and neither do you. -Please find someone who has merge permissions who can merge it on the author's behalf. This could be one of the other reviewers or you can ask on [Discord](https://discord.com/invite/xS7Z362).""" - - self.pr.as_issue().create_comment(comment) - return True - - def setup_llvmbot_git(git_dir="."): """ Configure the git repo in `git_dir` with the llvmbot account so @@ -714,17 +665,6 @@ def execute_command(self) -> bool: pr_buildbot_information_parser.add_argument("--issue-number", type=int, required=True) pr_buildbot_information_parser.add_argument("--author", type=str, required=True) -pr_merge_on_behalf_information_parser = subparsers.add_parser( - "pr-merge-on-behalf-information" -) -pr_merge_on_behalf_information_parser.add_argument( - "--issue-number", type=int, required=True -) -pr_merge_on_behalf_information_parser.add_argument("--author", type=str, required=True) -pr_merge_on_behalf_information_parser.add_argument( - "--reviewer", type=str, required=True -) - release_workflow_parser = subparsers.add_parser("release-workflow") release_workflow_parser.add_argument( "--llvm-project-dir", @@ -784,11 +724,6 @@ def execute_command(self) -> bool: args.token, args.repo, args.issue_number, args.author ) pr_buildbot_information.run() -elif args.command == "pr-merge-on-behalf-information": - pr_merge_on_behalf_information = PRMergeOnBehalfInformation( - args.token, args.repo, args.issue_number, args.author, args.reviewer - ) - pr_merge_on_behalf_information.run() elif args.command == "release-workflow": release_workflow = ReleaseWorkflow( args.token, From 786537e66e295002d15a8c3d48b8a463de5adbf4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 13 Feb 2024 12:28:20 +0000 Subject: [PATCH 120/240] [X86] Use explicit const SDValue& to avoid implicit copy in for-range across op_values(). NFC. Fixes static analysis warning. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 18f9871b2bd0c3..067676703141a3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -52816,7 +52816,7 @@ static SDValue getInvertedVectorForFMA(SDValue V, SelectionDAG &DAG) { SmallVector Ops; EVT VT = V.getValueType(); EVT EltVT = VT.getVectorElementType(); - for (auto Op : V->op_values()) { + for (const SDValue &Op : V->op_values()) { if (auto *Cst = dyn_cast(Op)) { Ops.push_back(DAG.getConstantFP(-Cst->getValueAPF(), SDLoc(Op), EltVT)); } else { @@ -52838,8 +52838,8 @@ static SDValue getInvertedVectorForFMA(SDValue V, SelectionDAG &DAG) { // prefer one of the values. We prefer a constant with a negative value on // the first place. // N.B. We need to skip undefs that may precede a value. - for (auto op : V->op_values()) { - if (auto *Cst = dyn_cast(op)) { + for (const SDValue &Op : V->op_values()) { + if (auto *Cst = dyn_cast(Op)) { if (Cst->isNegative()) return SDValue(); break; From f82e0809ba12170e2f648f8a1ac01e78ef06c958 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 13 Feb 2024 16:39:29 +0000 Subject: [PATCH 121/240] [X86] Add v8i64/v16i32/v16i64 ctpop reduction test coverage Add test coverage for types wider than legal --- llvm/test/CodeGen/X86/vector-reduce-ctpop.ll | 460 +++++++++++++++++++ 1 file changed, 460 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-reduce-ctpop.ll b/llvm/test/CodeGen/X86/vector-reduce-ctpop.ll index aced5e0290b0d9..e218d912d0b2f2 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-ctpop.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-ctpop.ll @@ -505,6 +505,466 @@ define i32 @reduce_ctpop_v8i32(<8 x i32> %a0) { ret i32 %r0 } +define i64 @reduce_ctpop_v8i64(<8 x i64> %a0) { +; SSE42-LABEL: reduce_ctpop_v8i64: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE42-NEXT: movdqa %xmm2, %xmm6 +; SSE42-NEXT: pand %xmm5, %xmm6 +; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSE42-NEXT: movdqa %xmm4, %xmm7 +; SSE42-NEXT: pshufb %xmm6, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm2 +; SSE42-NEXT: pand %xmm5, %xmm2 +; SSE42-NEXT: movdqa %xmm4, %xmm8 +; SSE42-NEXT: pshufb %xmm2, %xmm8 +; SSE42-NEXT: paddb %xmm7, %xmm8 +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: psadbw %xmm2, %xmm8 +; SSE42-NEXT: movdqa %xmm0, %xmm6 +; SSE42-NEXT: pand %xmm5, %xmm6 +; SSE42-NEXT: movdqa %xmm4, %xmm7 +; SSE42-NEXT: pshufb %xmm6, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm0 +; SSE42-NEXT: pand %xmm5, %xmm0 +; SSE42-NEXT: movdqa %xmm4, %xmm6 +; SSE42-NEXT: pshufb %xmm0, %xmm6 +; SSE42-NEXT: paddb %xmm7, %xmm6 +; SSE42-NEXT: psadbw %xmm2, %xmm6 +; SSE42-NEXT: paddq %xmm8, %xmm6 +; SSE42-NEXT: movdqa %xmm3, %xmm0 +; SSE42-NEXT: pand %xmm5, %xmm0 +; SSE42-NEXT: movdqa %xmm4, %xmm7 +; SSE42-NEXT: pshufb %xmm0, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm3 +; SSE42-NEXT: pand %xmm5, %xmm3 +; SSE42-NEXT: movdqa %xmm4, %xmm0 +; SSE42-NEXT: pshufb %xmm3, %xmm0 +; SSE42-NEXT: paddb %xmm7, %xmm0 +; SSE42-NEXT: psadbw %xmm2, %xmm0 +; SSE42-NEXT: movdqa %xmm1, %xmm3 +; SSE42-NEXT: pand %xmm5, %xmm3 +; SSE42-NEXT: movdqa %xmm4, %xmm7 +; SSE42-NEXT: pshufb %xmm3, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm1 +; SSE42-NEXT: pand %xmm5, %xmm1 +; SSE42-NEXT: pshufb %xmm1, %xmm4 +; SSE42-NEXT: paddb %xmm7, %xmm4 +; SSE42-NEXT: psadbw %xmm2, %xmm4 +; SSE42-NEXT: paddq %xmm0, %xmm4 +; SSE42-NEXT: paddq %xmm6, %xmm4 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] +; SSE42-NEXT: paddq %xmm4, %xmm0 +; SSE42-NEXT: movq %xmm0, %rax +; SSE42-NEXT: retq +; +; AVX2-LABEL: reduce_ctpop_v8i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3 +; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX2-NEXT: # ymm4 = mem[0,1,0,1] +; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3 +; AVX2-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpshufb %ymm1, %ymm4, %ymm1 +; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm5 +; AVX2-NEXT: vpshufb %ymm5, %ymm4, %ymm5 +; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpaddb %ymm5, %ymm0, %ymm0 +; AVX2-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: reduce_ctpop_v8i64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm2 +; AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512VL-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512VL-NEXT: vpshufb %zmm2, %zmm3, %zmm2 +; AVX512VL-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpshufb %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vpaddb %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512VPOPCNT-LABEL: reduce_ctpop_v8i64: +; AVX512VPOPCNT: # %bb.0: +; AVX512VPOPCNT-NEXT: vpopcntq %zmm0, %zmm0 +; AVX512VPOPCNT-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512VPOPCNT-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNT-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNT-NEXT: vmovq %xmm0, %rax +; AVX512VPOPCNT-NEXT: vzeroupper +; AVX512VPOPCNT-NEXT: retq + %p0 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a0) + %r0 = tail call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %p0) + ret i64 %r0 +} + +define i32 @reduce_ctpop_v16i32(<16 x i32> %a0) { +; SSE42-LABEL: reduce_ctpop_v16i32: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE42-NEXT: movdqa %xmm2, %xmm6 +; SSE42-NEXT: pand %xmm5, %xmm6 +; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSE42-NEXT: movdqa %xmm4, %xmm7 +; SSE42-NEXT: pshufb %xmm6, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm2 +; SSE42-NEXT: pand %xmm5, %xmm2 +; SSE42-NEXT: movdqa %xmm4, %xmm6 +; SSE42-NEXT: pshufb %xmm2, %xmm6 +; SSE42-NEXT: paddb %xmm7, %xmm6 +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pmovzxdq {{.*#+}} xmm7 = xmm6[0],zero,xmm6[1],zero +; SSE42-NEXT: punpckhdq {{.*#+}} xmm6 = xmm6[2],xmm2[2],xmm6[3],xmm2[3] +; SSE42-NEXT: psadbw %xmm2, %xmm6 +; SSE42-NEXT: psadbw %xmm2, %xmm7 +; SSE42-NEXT: packuswb %xmm6, %xmm7 +; SSE42-NEXT: movdqa %xmm0, %xmm6 +; SSE42-NEXT: pand %xmm5, %xmm6 +; SSE42-NEXT: movdqa %xmm4, %xmm8 +; SSE42-NEXT: pshufb %xmm6, %xmm8 +; SSE42-NEXT: psrlw $4, %xmm0 +; SSE42-NEXT: pand %xmm5, %xmm0 +; SSE42-NEXT: movdqa %xmm4, %xmm6 +; SSE42-NEXT: pshufb %xmm0, %xmm6 +; SSE42-NEXT: paddb %xmm8, %xmm6 +; SSE42-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero +; SSE42-NEXT: punpckhdq {{.*#+}} xmm6 = xmm6[2],xmm2[2],xmm6[3],xmm2[3] +; SSE42-NEXT: psadbw %xmm2, %xmm6 +; SSE42-NEXT: psadbw %xmm2, %xmm0 +; SSE42-NEXT: packuswb %xmm6, %xmm0 +; SSE42-NEXT: paddd %xmm7, %xmm0 +; SSE42-NEXT: movdqa %xmm3, %xmm6 +; SSE42-NEXT: pand %xmm5, %xmm6 +; SSE42-NEXT: movdqa %xmm4, %xmm7 +; SSE42-NEXT: pshufb %xmm6, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm3 +; SSE42-NEXT: pand %xmm5, %xmm3 +; SSE42-NEXT: movdqa %xmm4, %xmm6 +; SSE42-NEXT: pshufb %xmm3, %xmm6 +; SSE42-NEXT: paddb %xmm7, %xmm6 +; SSE42-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm6[0],zero,xmm6[1],zero +; SSE42-NEXT: punpckhdq {{.*#+}} xmm6 = xmm6[2],xmm2[2],xmm6[3],xmm2[3] +; SSE42-NEXT: psadbw %xmm2, %xmm6 +; SSE42-NEXT: psadbw %xmm2, %xmm3 +; SSE42-NEXT: packuswb %xmm6, %xmm3 +; SSE42-NEXT: movdqa %xmm1, %xmm6 +; SSE42-NEXT: pand %xmm5, %xmm6 +; SSE42-NEXT: movdqa %xmm4, %xmm7 +; SSE42-NEXT: pshufb %xmm6, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm1 +; SSE42-NEXT: pand %xmm5, %xmm1 +; SSE42-NEXT: pshufb %xmm1, %xmm4 +; SSE42-NEXT: paddb %xmm7, %xmm4 +; SSE42-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero +; SSE42-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; SSE42-NEXT: psadbw %xmm2, %xmm4 +; SSE42-NEXT: psadbw %xmm2, %xmm1 +; SSE42-NEXT: packuswb %xmm4, %xmm1 +; SSE42-NEXT: paddd %xmm3, %xmm1 +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %eax +; SSE42-NEXT: retq +; +; AVX2-LABEL: reduce_ctpop_v16i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3 +; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX2-NEXT: # ymm4 = mem[0,1,0,1] +; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3 +; AVX2-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpshufb %ymm1, %ymm4, %ymm1 +; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] +; AVX2-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 +; AVX2-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] +; AVX2-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm5 +; AVX2-NEXT: vpshufb %ymm5, %ymm4, %ymm5 +; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpaddb %ymm5, %ymm0, %ymm0 +; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] +; AVX2-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] +; AVX2-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: reduce_ctpop_v16i32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm2 +; AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512VL-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512VL-NEXT: vpshufb %zmm2, %zmm3, %zmm2 +; AVX512VL-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpshufb %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vpaddb %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; AVX512VL-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 +; AVX512VL-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; AVX512VL-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512VPOPCNT-LABEL: reduce_ctpop_v16i32: +; AVX512VPOPCNT: # %bb.0: +; AVX512VPOPCNT-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNT-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VPOPCNT-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNT-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNT-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX512VPOPCNT-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNT-NEXT: vmovd %xmm0, %eax +; AVX512VPOPCNT-NEXT: vzeroupper +; AVX512VPOPCNT-NEXT: retq + %p0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a0) + %r0 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %p0) + ret i32 %r0 +} + +define i64 @reduce_ctpop_v16i64(<16 x i64> %a0) { +; SSE42-LABEL: reduce_ctpop_v16i64: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa {{.*#+}} xmm9 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE42-NEXT: movdqa %xmm5, %xmm10 +; SSE42-NEXT: pand %xmm9, %xmm10 +; SSE42-NEXT: movdqa {{.*#+}} xmm8 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSE42-NEXT: movdqa %xmm8, %xmm11 +; SSE42-NEXT: pshufb %xmm10, %xmm11 +; SSE42-NEXT: psrlw $4, %xmm5 +; SSE42-NEXT: pand %xmm9, %xmm5 +; SSE42-NEXT: movdqa %xmm8, %xmm12 +; SSE42-NEXT: pshufb %xmm5, %xmm12 +; SSE42-NEXT: paddb %xmm11, %xmm12 +; SSE42-NEXT: pxor %xmm5, %xmm5 +; SSE42-NEXT: psadbw %xmm5, %xmm12 +; SSE42-NEXT: movdqa %xmm1, %xmm10 +; SSE42-NEXT: pand %xmm9, %xmm10 +; SSE42-NEXT: movdqa %xmm8, %xmm11 +; SSE42-NEXT: pshufb %xmm10, %xmm11 +; SSE42-NEXT: psrlw $4, %xmm1 +; SSE42-NEXT: pand %xmm9, %xmm1 +; SSE42-NEXT: movdqa %xmm8, %xmm10 +; SSE42-NEXT: pshufb %xmm1, %xmm10 +; SSE42-NEXT: paddb %xmm11, %xmm10 +; SSE42-NEXT: psadbw %xmm5, %xmm10 +; SSE42-NEXT: paddq %xmm12, %xmm10 +; SSE42-NEXT: movdqa %xmm7, %xmm1 +; SSE42-NEXT: pand %xmm9, %xmm1 +; SSE42-NEXT: movdqa %xmm8, %xmm11 +; SSE42-NEXT: pshufb %xmm1, %xmm11 +; SSE42-NEXT: psrlw $4, %xmm7 +; SSE42-NEXT: pand %xmm9, %xmm7 +; SSE42-NEXT: movdqa %xmm8, %xmm12 +; SSE42-NEXT: pshufb %xmm7, %xmm12 +; SSE42-NEXT: paddb %xmm11, %xmm12 +; SSE42-NEXT: psadbw %xmm5, %xmm12 +; SSE42-NEXT: movdqa %xmm3, %xmm1 +; SSE42-NEXT: pand %xmm9, %xmm1 +; SSE42-NEXT: movdqa %xmm8, %xmm7 +; SSE42-NEXT: pshufb %xmm1, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm3 +; SSE42-NEXT: pand %xmm9, %xmm3 +; SSE42-NEXT: movdqa %xmm8, %xmm1 +; SSE42-NEXT: pshufb %xmm3, %xmm1 +; SSE42-NEXT: paddb %xmm7, %xmm1 +; SSE42-NEXT: psadbw %xmm5, %xmm1 +; SSE42-NEXT: paddq %xmm12, %xmm1 +; SSE42-NEXT: paddq %xmm10, %xmm1 +; SSE42-NEXT: movdqa %xmm4, %xmm3 +; SSE42-NEXT: pand %xmm9, %xmm3 +; SSE42-NEXT: movdqa %xmm8, %xmm7 +; SSE42-NEXT: pshufb %xmm3, %xmm7 +; SSE42-NEXT: psrlw $4, %xmm4 +; SSE42-NEXT: pand %xmm9, %xmm4 +; SSE42-NEXT: movdqa %xmm8, %xmm10 +; SSE42-NEXT: pshufb %xmm4, %xmm10 +; SSE42-NEXT: paddb %xmm7, %xmm10 +; SSE42-NEXT: psadbw %xmm5, %xmm10 +; SSE42-NEXT: movdqa %xmm0, %xmm3 +; SSE42-NEXT: pand %xmm9, %xmm3 +; SSE42-NEXT: movdqa %xmm8, %xmm4 +; SSE42-NEXT: pshufb %xmm3, %xmm4 +; SSE42-NEXT: psrlw $4, %xmm0 +; SSE42-NEXT: pand %xmm9, %xmm0 +; SSE42-NEXT: movdqa %xmm8, %xmm3 +; SSE42-NEXT: pshufb %xmm0, %xmm3 +; SSE42-NEXT: paddb %xmm4, %xmm3 +; SSE42-NEXT: psadbw %xmm5, %xmm3 +; SSE42-NEXT: paddq %xmm10, %xmm3 +; SSE42-NEXT: movdqa %xmm6, %xmm0 +; SSE42-NEXT: pand %xmm9, %xmm0 +; SSE42-NEXT: movdqa %xmm8, %xmm4 +; SSE42-NEXT: pshufb %xmm0, %xmm4 +; SSE42-NEXT: psrlw $4, %xmm6 +; SSE42-NEXT: pand %xmm9, %xmm6 +; SSE42-NEXT: movdqa %xmm8, %xmm0 +; SSE42-NEXT: pshufb %xmm6, %xmm0 +; SSE42-NEXT: paddb %xmm4, %xmm0 +; SSE42-NEXT: psadbw %xmm5, %xmm0 +; SSE42-NEXT: movdqa %xmm2, %xmm4 +; SSE42-NEXT: pand %xmm9, %xmm4 +; SSE42-NEXT: movdqa %xmm8, %xmm6 +; SSE42-NEXT: pshufb %xmm4, %xmm6 +; SSE42-NEXT: psrlw $4, %xmm2 +; SSE42-NEXT: pand %xmm9, %xmm2 +; SSE42-NEXT: pshufb %xmm2, %xmm8 +; SSE42-NEXT: paddb %xmm6, %xmm8 +; SSE42-NEXT: psadbw %xmm5, %xmm8 +; SSE42-NEXT: paddq %xmm0, %xmm8 +; SSE42-NEXT: paddq %xmm3, %xmm8 +; SSE42-NEXT: paddq %xmm1, %xmm8 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm8[2,3,2,3] +; SSE42-NEXT: paddq %xmm8, %xmm0 +; SSE42-NEXT: movq %xmm0, %rax +; SSE42-NEXT: retq +; +; AVX2-LABEL: reduce_ctpop_v16i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm5 +; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX2-NEXT: # ymm6 = mem[0,1,0,1] +; AVX2-NEXT: vpshufb %ymm5, %ymm6, %ymm5 +; AVX2-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2 +; AVX2-NEXT: vpshufb %ymm2, %ymm6, %ymm2 +; AVX2-NEXT: vpaddb %ymm5, %ymm2, %ymm2 +; AVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX2-NEXT: vpsadbw %ymm5, %ymm2, %ymm2 +; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm7 +; AVX2-NEXT: vpshufb %ymm7, %ymm6, %ymm7 +; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpaddb %ymm7, %ymm0, %ymm0 +; AVX2-NEXT: vpsadbw %ymm5, %ymm0, %ymm0 +; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm2 +; AVX2-NEXT: vpshufb %ymm2, %ymm6, %ymm2 +; AVX2-NEXT: vpsrlw $4, %ymm3, %ymm3 +; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3 +; AVX2-NEXT: vpshufb %ymm3, %ymm6, %ymm3 +; AVX2-NEXT: vpaddb %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpsadbw %ymm5, %ymm2, %ymm2 +; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm3 +; AVX2-NEXT: vpshufb %ymm3, %ymm6, %ymm3 +; AVX2-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX2-NEXT: vpshufb %ymm1, %ymm6, %ymm1 +; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpsadbw %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: reduce_ctpop_v16i64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpandq %zmm2, %zmm0, %zmm3 +; AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512VL-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512VL-NEXT: vpshufb %zmm3, %zmm4, %zmm3 +; AVX512VL-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512VL-NEXT: vpandq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpshufb %zmm0, %zmm4, %zmm0 +; AVX512VL-NEXT: vpaddb %zmm3, %zmm0, %zmm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpsadbw %zmm3, %zmm0, %zmm0 +; AVX512VL-NEXT: vpandq %zmm2, %zmm1, %zmm5 +; AVX512VL-NEXT: vpshufb %zmm5, %zmm4, %zmm5 +; AVX512VL-NEXT: vpsrlw $4, %zmm1, %zmm1 +; AVX512VL-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512VL-NEXT: vpshufb %zmm1, %zmm4, %zmm1 +; AVX512VL-NEXT: vpaddb %zmm5, %zmm1, %zmm1 +; AVX512VL-NEXT: vpsadbw %zmm3, %zmm1, %zmm1 +; AVX512VL-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512VL-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512VPOPCNT-LABEL: reduce_ctpop_v16i64: +; AVX512VPOPCNT: # %bb.0: +; AVX512VPOPCNT-NEXT: vpopcntq %zmm0, %zmm0 +; AVX512VPOPCNT-NEXT: vpopcntq %zmm1, %zmm1 +; AVX512VPOPCNT-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512VPOPCNT-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512VPOPCNT-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VPOPCNT-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNT-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNT-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX512VPOPCNT-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNT-NEXT: vmovq %xmm0, %rax +; AVX512VPOPCNT-NEXT: vzeroupper +; AVX512VPOPCNT-NEXT: retq + %p0 = tail call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> %a0) + %r0 = tail call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %p0) + ret i64 %r0 +} + ; ; Vector of reductions of per-element ctpop results (create vector of each count all bits in each vector) ; From ca5696668477750b4ddc31ea4cff25054dc8cf2a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 14 Feb 2024 11:11:45 +0000 Subject: [PATCH 122/240] [VPlan] Properly retain flags when cloning VPReplicateRecipe. This makes sure the correct flags are used for the clone (i.e. the ones present on the recipe), instead of the ones on the original IR instruction. At the moment, this should not change anything, as flags of replicate recipe should not be dropped before they are cloned at the moment. But that will change in a follow-up patch. --- llvm/lib/Transforms/Vectorize/VPlan.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 162a3c4b195e53..13e1859ad6b250 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2085,8 +2085,11 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags { ~VPReplicateRecipe() override = default; VPRecipeBase *clone() override { - return new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform, - isPredicated() ? getMask() : nullptr); + auto *Copy = + new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform, + isPredicated() ? getMask() : nullptr); + Copy->transferFlags(*this); + return Copy; } VP_CLASSOF_IMPL(VPDef::VPReplicateSC) From f1b2865b6a61e570d3c7d88c5a84bbd1a046a203 Mon Sep 17 00:00:00 2001 From: Ronan Keryell Date: Wed, 14 Feb 2024 03:26:48 -0800 Subject: [PATCH 123/240] [mlir][docs] Fix typos in TargetLLVMIR.md (#81549) --- mlir/docs/TargetLLVMIR.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/docs/TargetLLVMIR.md b/mlir/docs/TargetLLVMIR.md index 27a399c520647c..df281f65768e8c 100644 --- a/mlir/docs/TargetLLVMIR.md +++ b/mlir/docs/TargetLLVMIR.md @@ -336,7 +336,7 @@ func.func @bar() { // is transformed into llvm.func @foo(%arg0: i32, %arg1: i64) -> !llvm.struct<(i32, i64)> { - // insert the vales into a structure + // insert the values into a structure %0 = llvm.mlir.undef : !llvm.struct<(i32, i64)> %1 = llvm.insertvalue %arg0, %0[0] : !llvm.struct<(i32, i64)> %2 = llvm.insertvalue %arg1, %1[1] : !llvm.struct<(i32, i64)> @@ -349,8 +349,8 @@ llvm.func @bar() { %1 = llvm.mlir.constant(17 : i64) : i64 // call and extract the values from the structure - %2 = llvm.call @bar(%0, %1) - : (i32, i32) -> !llvm.struct<(i32, i64)> + %2 = llvm.call @foo(%0, %1) + : (i32, i64) -> !llvm.struct<(i32, i64)> %3 = llvm.extractvalue %2[0] : !llvm.struct<(i32, i64)> %4 = llvm.extractvalue %2[1] : !llvm.struct<(i32, i64)> From 0c8b5942c7cfa53d9e2a57fb860a21b7bce3fb26 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Wed, 14 Feb 2024 13:01:58 +0100 Subject: [PATCH 124/240] [llvm-dlltool][NFC] Factor out parseModuleDefinition helper. (#81620) In preparation for ARM64EC support. --- .../llvm-dlltool/DlltoolDriver.cpp | 90 +++++++++++-------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index 834903857a88eb..98795c51ce1336 100644 --- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -110,6 +110,46 @@ std::optional getPrefix(StringRef Argv0) { return ProgName.str(); } +bool parseModuleDefinition(StringRef DefFileName, MachineTypes Machine, + bool AddUnderscores, + std::vector &Exports, + std::string &OutputFile) { + std::unique_ptr MB = openFile(DefFileName); + if (!MB) + return false; + + if (!MB->getBufferSize()) { + llvm::errs() << "definition file empty\n"; + return false; + } + + Expected Def = parseCOFFModuleDefinition( + *MB, Machine, /*MingwDef=*/true, AddUnderscores); + if (!Def) { + llvm::errs() << "error parsing definition\n" + << errorToErrorCode(Def.takeError()).message() << "\n"; + return false; + } + + if (OutputFile.empty()) + OutputFile = std::move(Def->OutputFile); + + // If ExtName is set (if the "ExtName = Name" syntax was used), overwrite + // Name with ExtName and clear ExtName. When only creating an import + // library and not linking, the internal name is irrelevant. This avoids + // cases where writeImportLibrary tries to transplant decoration from + // symbol decoration onto ExtName. + for (COFFShortExport &E : Def->Exports) { + if (!E.ExtName.empty()) { + E.Name = E.ExtName; + E.ExtName.clear(); + } + } + + Exports = std::move(Def->Exports); + return true; +} + } // namespace int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { @@ -141,16 +181,6 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { return 1; } - std::unique_ptr MB = - openFile(Args.getLastArg(OPT_d)->getValue()); - if (!MB) - return 1; - - if (!MB->getBufferSize()) { - llvm::errs() << "definition file empty\n"; - return 1; - } - COFF::MachineTypes Machine = getDefaultMachine(); if (std::optional Prefix = getPrefix(ArgsArr[0])) { Triple T(*Prefix); @@ -166,40 +196,23 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { } bool AddUnderscores = !Args.hasArg(OPT_no_leading_underscore); - Expected Def = parseCOFFModuleDefinition( - *MB, Machine, /*MingwDef=*/true, AddUnderscores); - if (!Def) { - llvm::errs() << "error parsing definition\n" - << errorToErrorCode(Def.takeError()).message() << "\n"; - return 1; - } - - // Do this after the parser because parseCOFFModuleDefinition sets OutputFile. + std::string OutputFile; if (auto *Arg = Args.getLastArg(OPT_D)) - Def->OutputFile = Arg->getValue(); + OutputFile = Arg->getValue(); - if (Def->OutputFile.empty()) { - llvm::errs() << "no DLL name specified\n"; + std::vector Exports; + if (!parseModuleDefinition(Args.getLastArg(OPT_d)->getValue(), Machine, + AddUnderscores, Exports, OutputFile)) return 1; - } - std::string Path = std::string(Args.getLastArgValue(OPT_l)); - - // If ExtName is set (if the "ExtName = Name" syntax was used), overwrite - // Name with ExtName and clear ExtName. When only creating an import - // library and not linking, the internal name is irrelevant. This avoids - // cases where writeImportLibrary tries to transplant decoration from - // symbol decoration onto ExtName. - for (COFFShortExport& E : Def->Exports) { - if (!E.ExtName.empty()) { - E.Name = E.ExtName; - E.ExtName.clear(); - } + if (OutputFile.empty()) { + llvm::errs() << "no DLL name specified\n"; + return 1; } if (Machine == IMAGE_FILE_MACHINE_I386 && Args.hasArg(OPT_k)) { - for (COFFShortExport& E : Def->Exports) { + for (COFFShortExport &E : Exports) { if (!E.AliasTarget.empty() || (!E.Name.empty() && E.Name[0] == '?')) continue; E.SymbolName = E.Name; @@ -215,8 +228,9 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { } } - if (!Path.empty() && writeImportLibrary(Def->OutputFile, Path, Def->Exports, - Machine, /*MinGW=*/true)) + std::string Path = std::string(Args.getLastArgValue(OPT_l)); + if (!Path.empty() && writeImportLibrary(OutputFile, Path, Exports, Machine, + /*MinGW=*/true)) return 1; return 0; } From 82f3cbc860bfd9f1e10e3605ae1b162b1f423cc7 Mon Sep 17 00:00:00 2001 From: Sergei Lebedev <185856+superbobry@users.noreply.github.com> Date: Wed, 14 Feb 2024 12:02:49 +0000 Subject: [PATCH 125/240] [MLIR][Python] Added a base class to all builtin floating point types (#81720) This allows to * check if a given ir.Type is a floating point type via isinstance() or issubclass() * get the bitwidth of a floating point type See motivation and discussion in https://discourse.llvm.org/t/add-floattype-to-mlir-python-bindings/76959. --- mlir/include/mlir-c/BuiltinTypes.h | 6 ++++ mlir/lib/Bindings/Python/IRTypes.cpp | 38 +++++++++++++++++------- mlir/lib/CAPI/IR/BuiltinTypes.cpp | 8 +++++ mlir/python/mlir/_mlir_libs/_mlir/ir.pyi | 28 +++++++++++------ mlir/test/python/ir/builtin_types.py | 35 +++++++++++++++++++++- 5 files changed, 95 insertions(+), 20 deletions(-) diff --git a/mlir/include/mlir-c/BuiltinTypes.h b/mlir/include/mlir-c/BuiltinTypes.h index 881b6dad2b84d7..99c5e3f46b04c1 100644 --- a/mlir/include/mlir-c/BuiltinTypes.h +++ b/mlir/include/mlir-c/BuiltinTypes.h @@ -73,6 +73,12 @@ MLIR_CAPI_EXPORTED MlirType mlirIndexTypeGet(MlirContext ctx); // Floating-point types. //===----------------------------------------------------------------------===// +/// Checks whether the given type is a floating-point type. +MLIR_CAPI_EXPORTED bool mlirTypeIsAFloat(MlirType type); + +/// Returns the bitwidth of a floating-point type. +MLIR_CAPI_EXPORTED unsigned mlirFloatTypeGetWidth(MlirType type); + /// Returns the typeID of an Float8E5M2 type. MLIR_CAPI_EXPORTED MlirTypeID mlirFloat8E5M2TypeGetTypeID(void); diff --git a/mlir/lib/Bindings/Python/IRTypes.cpp b/mlir/lib/Bindings/Python/IRTypes.cpp index 820992de659068..e1e4eb999b3aa8 100644 --- a/mlir/lib/Bindings/Python/IRTypes.cpp +++ b/mlir/lib/Bindings/Python/IRTypes.cpp @@ -109,8 +109,22 @@ class PyIndexType : public PyConcreteType { } }; +class PyFloatType : public PyConcreteType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat; + static constexpr const char *pyClassName = "FloatType"; + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_property_readonly( + "width", [](PyFloatType &self) { return mlirFloatTypeGetWidth(self); }, + "Returns the width of the floating-point type"); + } +}; + /// Floating Point Type subclass - Float8E4M3FNType. -class PyFloat8E4M3FNType : public PyConcreteType { +class PyFloat8E4M3FNType + : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat8E4M3FN; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -130,7 +144,7 @@ class PyFloat8E4M3FNType : public PyConcreteType { }; /// Floating Point Type subclass - Float8M5E2Type. -class PyFloat8E5M2Type : public PyConcreteType { +class PyFloat8E5M2Type : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat8E5M2; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -150,7 +164,8 @@ class PyFloat8E5M2Type : public PyConcreteType { }; /// Floating Point Type subclass - Float8E4M3FNUZ. -class PyFloat8E4M3FNUZType : public PyConcreteType { +class PyFloat8E4M3FNUZType + : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat8E4M3FNUZ; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -170,7 +185,8 @@ class PyFloat8E4M3FNUZType : public PyConcreteType { }; /// Floating Point Type subclass - Float8E4M3B11FNUZ. -class PyFloat8E4M3B11FNUZType : public PyConcreteType { +class PyFloat8E4M3B11FNUZType + : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat8E4M3B11FNUZ; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -190,7 +206,8 @@ class PyFloat8E4M3B11FNUZType : public PyConcreteType { }; /// Floating Point Type subclass - Float8E5M2FNUZ. -class PyFloat8E5M2FNUZType : public PyConcreteType { +class PyFloat8E5M2FNUZType + : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat8E5M2FNUZ; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -210,7 +227,7 @@ class PyFloat8E5M2FNUZType : public PyConcreteType { }; /// Floating Point Type subclass - BF16Type. -class PyBF16Type : public PyConcreteType { +class PyBF16Type : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsABF16; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -230,7 +247,7 @@ class PyBF16Type : public PyConcreteType { }; /// Floating Point Type subclass - F16Type. -class PyF16Type : public PyConcreteType { +class PyF16Type : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAF16; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -250,7 +267,7 @@ class PyF16Type : public PyConcreteType { }; /// Floating Point Type subclass - TF32Type. -class PyTF32Type : public PyConcreteType { +class PyTF32Type : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsATF32; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -270,7 +287,7 @@ class PyTF32Type : public PyConcreteType { }; /// Floating Point Type subclass - F32Type. -class PyF32Type : public PyConcreteType { +class PyF32Type : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAF32; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -290,7 +307,7 @@ class PyF32Type : public PyConcreteType { }; /// Floating Point Type subclass - F64Type. -class PyF64Type : public PyConcreteType { +class PyF64Type : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAF64; static constexpr GetTypeIDFunctionTy getTypeIdFunction = @@ -819,6 +836,7 @@ class PyOpaqueType : public PyConcreteType { void mlir::python::populateIRTypes(py::module &m) { PyIntegerType::bind(m); + PyFloatType::bind(m); PyIndexType::bind(m); PyFloat8E4M3FNType::bind(m); PyFloat8E5M2Type::bind(m); diff --git a/mlir/lib/CAPI/IR/BuiltinTypes.cpp b/mlir/lib/CAPI/IR/BuiltinTypes.cpp index 18c9414c5d0f34..e1a5d82587cf9e 100644 --- a/mlir/lib/CAPI/IR/BuiltinTypes.cpp +++ b/mlir/lib/CAPI/IR/BuiltinTypes.cpp @@ -78,6 +78,14 @@ MlirType mlirIndexTypeGet(MlirContext ctx) { // Floating-point types. //===----------------------------------------------------------------------===// +bool mlirTypeIsAFloat(MlirType type) { + return llvm::isa(unwrap(type)); +} + +unsigned mlirFloatTypeGetWidth(MlirType type) { + return llvm::cast(unwrap(type)).getWidth(); +} + MlirTypeID mlirFloat8E5M2TypeGetTypeID() { return wrap(Float8E5M2Type::getTypeID()); } diff --git a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi index 344abb64a57d23..586bf7f8e93fba 100644 --- a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi +++ b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi @@ -1442,7 +1442,17 @@ class DictAttr(Attribute): @property def typeid(self) -> TypeID: ... -class F16Type(Type): +class FloatType(Type): + @staticmethod + def isinstance(other: Type) -> bool: ... + def __init__(self, cast_from_type: Type) -> None: ... + @property + def width(self) -> int: + """ + Returns the width of the floating-point type. + """ + +class F16Type(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> F16Type: @@ -1455,7 +1465,7 @@ class F16Type(Type): @property def typeid(self) -> TypeID: ... -class F32Type(Type): +class F32Type(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> F32Type: @@ -1468,7 +1478,7 @@ class F32Type(Type): @property def typeid(self) -> TypeID: ... -class F64Type(Type): +class F64Type(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> F64Type: @@ -1502,7 +1512,7 @@ class FlatSymbolRefAttr(Attribute): Returns the value of the FlatSymbolRef attribute as a string """ -class Float8E4M3B11FNUZType(Type): +class Float8E4M3B11FNUZType(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> Float8E4M3B11FNUZType: @@ -1515,7 +1525,7 @@ class Float8E4M3B11FNUZType(Type): @property def typeid(self) -> TypeID: ... -class Float8E4M3FNType(Type): +class Float8E4M3FNType(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> Float8E4M3FNType: @@ -1528,7 +1538,7 @@ class Float8E4M3FNType(Type): @property def typeid(self) -> TypeID: ... -class Float8E4M3FNUZType(Type): +class Float8E4M3FNUZType(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> Float8E4M3FNUZType: @@ -1541,7 +1551,7 @@ class Float8E4M3FNUZType(Type): @property def typeid(self) -> TypeID: ... -class Float8E5M2FNUZType(Type): +class Float8E5M2FNUZType(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> Float8E5M2FNUZType: @@ -1554,7 +1564,7 @@ class Float8E5M2FNUZType(Type): @property def typeid(self) -> TypeID: ... -class Float8E5M2Type(Type): +class Float8E5M2Type(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> Float8E5M2Type: @@ -1601,7 +1611,7 @@ class FloatAttr(Attribute): Returns the value of the float attribute """ -class FloatTF32Type(Type): +class FloatTF32Type(FloatType): static_typeid: ClassVar[TypeID] # value = @staticmethod def get(context: Optional[Context] = None) -> FloatTF32Type: diff --git a/mlir/test/python/ir/builtin_types.py b/mlir/test/python/ir/builtin_types.py index 30a5054ada91ac..4eea1a9c372ef7 100644 --- a/mlir/test/python/ir/builtin_types.py +++ b/mlir/test/python/ir/builtin_types.py @@ -100,8 +100,38 @@ def testTypeIsInstance(): print(IntegerType.isinstance(t1)) # CHECK: False print(F32Type.isinstance(t1)) + # CHECK: False + print(FloatType.isinstance(t1)) # CHECK: True print(F32Type.isinstance(t2)) + # CHECK: True + print(FloatType.isinstance(t2)) + + +# CHECK-LABEL: TEST: testFloatTypeSubclasses +@run +def testFloatTypeSubclasses(): + ctx = Context() + # CHECK: True + print(isinstance(Type.parse("f8E4M3FN", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("f8E5M2", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("f8E4M3FNUZ", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("f8E4M3B11FNUZ", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("f8E5M2FNUZ", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("f16", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("bf16", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("f32", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("tf32", ctx), FloatType)) + # CHECK: True + print(isinstance(Type.parse("f64", ctx), FloatType)) # CHECK-LABEL: TEST: testTypeEqDoesNotRaise @@ -218,7 +248,10 @@ def testFloatType(): # CHECK: float: f32 print("float:", F32Type.get()) # CHECK: float: f64 - print("float:", F64Type.get()) + f64 = F64Type.get() + print("float:", f64) + # CHECK: f64 width: 64 + print("f64 width:", f64.width) # CHECK-LABEL: TEST: testNoneType From 6cab375b4b3d33c18c5f8686105ea20458451a71 Mon Sep 17 00:00:00 2001 From: Philipp Tomsich Date: Wed, 14 Feb 2024 04:05:22 -0800 Subject: [PATCH 126/240] [AArch64] Add tests for fusion on Ampere1/1A/1B (#81725) As commented on the PR #81293, the Ampere1-family does not have test cases for the common fusion cases it implements. This adds the Ampere1 targets to the relevant misched-fusion testcases: * addadrp * addr * aes --- llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll | 3 +++ llvm/test/CodeGen/AArch64/misched-fusion-addr.ll | 3 +++ llvm/test/CodeGen/AArch64/misched-fusion-aes.ll | 3 +++ 3 files changed, 9 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll index a75c30317f13c5..70b6b91d3cf663 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll @@ -15,6 +15,9 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v2 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=apple-a16 -mattr=-fuse-literals | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=apple-a17 -mattr=-fuse-literals | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1 -mattr=-fuse-literals | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1a -mattr=-fuse-literals | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1b -mattr=-fuse-literals | FileCheck %s @g = common local_unnamed_addr global ptr null, align 8 diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll index 29349952ce8769..459b68853e57ff 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll @@ -3,6 +3,9 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1a | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1b | FileCheck %s target triple = "aarch64-unknown" diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll index ee3e808f9f9218..bf166954d80c98 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -17,6 +17,9 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1a | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1b | FileCheck %s declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k) declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %d) From debca7ee43522e1702ade36f4954517e35f82886 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 14 Feb 2024 12:28:58 +0000 Subject: [PATCH 127/240] [VPlan] Move dropping of poison flags to VPlanTransforms. (NFC) Move collectPoisonGeneratingFlags from InnerLoopVectorizer to VPlanTransforms and also update its name. collectPoisonGeneratingFlags already directly drops poison-generating flags, not only collecting it. This means it is more appropriate to integerate it directly into the VPlan transform pipeline. The current implementation still calls back to legal to check if a block needs predication, which should be improved in the future. --- .../Transforms/Vectorize/LoopVectorize.cpp | 102 +----------------- .../Transforms/Vectorize/VPlanTransforms.cpp | 83 ++++++++++++++ .../Transforms/Vectorize/VPlanTransforms.h | 15 +++ 3 files changed, 102 insertions(+), 98 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1a7b301c35f2b8..b39bf21cacdbb5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -670,17 +670,6 @@ class InnerLoopVectorizer { /// running the verifier. Return the preheader of the completed vector loop. BasicBlock *completeLoopSkeleton(); - /// Collect poison-generating recipes that may generate a poison value that is - /// used after vectorization, even when their operands are not poison. Those - /// recipes meet the following conditions: - /// * Contribute to the address computation of a recipe generating a widen - /// memory load/store (VPWidenMemoryInstructionRecipe or - /// VPInterleaveRecipe). - /// * Such a widen memory load/store has at least one underlying Instruction - /// that is in a basic block that needs predication and after vectorization - /// the generated instruction won't be predicated. - void collectPoisonGeneratingRecipes(VPTransformState &State); - /// Allow subclasses to override and print debug traces before/after vplan /// execution, when trace information is requested. virtual void printDebugTracesAtStart(){}; @@ -1069,91 +1058,6 @@ static std::string getDebugLocString(const Loop *L) { } #endif -void InnerLoopVectorizer::collectPoisonGeneratingRecipes( - VPTransformState &State) { - - // Collect recipes in the backward slice of `Root` that may generate a poison - // value that is used after vectorization. - SmallPtrSet Visited; - auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) { - SmallVector Worklist; - Worklist.push_back(Root); - - // Traverse the backward slice of Root through its use-def chain. - while (!Worklist.empty()) { - VPRecipeBase *CurRec = Worklist.back(); - Worklist.pop_back(); - - if (!Visited.insert(CurRec).second) - continue; - - // Prune search if we find another recipe generating a widen memory - // instruction. Widen memory instructions involved in address computation - // will lead to gather/scatter instructions, which don't need to be - // handled. - if (isa(CurRec) || - isa(CurRec) || - isa(CurRec) || - isa(CurRec) || - isa(CurRec)) - continue; - - // This recipe contributes to the address computation of a widen - // load/store. If the underlying instruction has poison-generating flags, - // drop them directly. - if (auto *RecWithFlags = dyn_cast(CurRec)) { - RecWithFlags->dropPoisonGeneratingFlags(); - } else { - Instruction *Instr = dyn_cast_or_null( - CurRec->getVPSingleValue()->getUnderlyingValue()); - (void)Instr; - assert((!Instr || !Instr->hasPoisonGeneratingFlags()) && - "found instruction with poison generating flags not covered by " - "VPRecipeWithIRFlags"); - } - - // Add new definitions to the worklist. - for (VPValue *operand : CurRec->operands()) - if (VPRecipeBase *OpDef = operand->getDefiningRecipe()) - Worklist.push_back(OpDef); - } - }); - - // Traverse all the recipes in the VPlan and collect the poison-generating - // recipes in the backward slice starting at the address of a VPWidenRecipe or - // VPInterleaveRecipe. - auto Iter = vp_depth_first_deep(State.Plan->getEntry()); - for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { - for (VPRecipeBase &Recipe : *VPBB) { - if (auto *WidenRec = dyn_cast(&Recipe)) { - Instruction &UnderlyingInstr = WidenRec->getIngredient(); - VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe(); - if (AddrDef && WidenRec->isConsecutive() && - Legal->blockNeedsPredication(UnderlyingInstr.getParent())) - collectPoisonGeneratingInstrsInBackwardSlice(AddrDef); - } else if (auto *InterleaveRec = dyn_cast(&Recipe)) { - VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe(); - if (AddrDef) { - // Check if any member of the interleave group needs predication. - const InterleaveGroup *InterGroup = - InterleaveRec->getInterleaveGroup(); - bool NeedPredication = false; - for (int I = 0, NumMembers = InterGroup->getNumMembers(); - I < NumMembers; ++I) { - Instruction *Member = InterGroup->getMember(I); - if (Member) - NeedPredication |= - Legal->blockNeedsPredication(Member->getParent()); - } - - if (NeedPredication) - collectPoisonGeneratingInstrsInBackwardSlice(AddrDef); - } - } - } - } -} - namespace llvm { // Loop vectorization cost-model hints how the scalar epilogue loop should be @@ -7591,8 +7495,6 @@ LoopVectorizationPlanner::executePlan( State.LVer->prepareNoAliasMetadata(); } - ILV.collectPoisonGeneratingRecipes(State); - ILV.printDebugTracesAtStart(); //===------------------------------------------------===// @@ -8869,6 +8771,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // in ways that accessing values using original IR values is incorrect. Plan->disableValue2VPValue(); + VPlanTransforms::dropPoisonGeneratingRecipes(*Plan, [this](BasicBlock *BB) { + return Legal->blockNeedsPredication(BB); + }); + // Sink users of fixed-order recurrence past the recipe defining the previous // value and introduce FirstOrderRecurrenceSplice VPInstructions. if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 71f5285f90236b..16855a9227a186 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1201,3 +1201,86 @@ void VPlanTransforms::addActiveLaneMask( CompareToReplace->eraseFromParent(); } } + +void VPlanTransforms::dropPoisonGeneratingRecipes( + VPlan &Plan, function_ref BlockNeedsPredication) { + // Collect recipes in the backward slice of `Root` that may generate a poison + // value that is used after vectorization. + SmallPtrSet Visited; + auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) { + SmallVector Worklist; + Worklist.push_back(Root); + + // Traverse the backward slice of Root through its use-def chain. + while (!Worklist.empty()) { + VPRecipeBase *CurRec = Worklist.back(); + Worklist.pop_back(); + + if (!Visited.insert(CurRec).second) + continue; + + // Prune search if we find another recipe generating a widen memory + // instruction. Widen memory instructions involved in address computation + // will lead to gather/scatter instructions, which don't need to be + // handled. + if (isa(CurRec) || + isa(CurRec) || + isa(CurRec) || + isa(CurRec) || + isa(CurRec)) + continue; + + // This recipe contributes to the address computation of a widen + // load/store. If the underlying instruction has poison-generating flags, + // drop them directly. + if (auto *RecWithFlags = dyn_cast(CurRec)) { + RecWithFlags->dropPoisonGeneratingFlags(); + } else { + Instruction *Instr = dyn_cast_or_null( + CurRec->getVPSingleValue()->getUnderlyingValue()); + (void)Instr; + assert((!Instr || !Instr->hasPoisonGeneratingFlags()) && + "found instruction with poison generating flags not covered by " + "VPRecipeWithIRFlags"); + } + + // Add new definitions to the worklist. + for (VPValue *operand : CurRec->operands()) + if (VPRecipeBase *OpDef = operand->getDefiningRecipe()) + Worklist.push_back(OpDef); + } + }); + + // Traverse all the recipes in the VPlan and collect the poison-generating + // recipes in the backward slice starting at the address of a VPWidenRecipe or + // VPInterleaveRecipe. + auto Iter = vp_depth_first_deep(Plan.getEntry()); + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { + for (VPRecipeBase &Recipe : *VPBB) { + if (auto *WidenRec = dyn_cast(&Recipe)) { + Instruction &UnderlyingInstr = WidenRec->getIngredient(); + VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe(); + if (AddrDef && WidenRec->isConsecutive() && + BlockNeedsPredication(UnderlyingInstr.getParent())) + collectPoisonGeneratingInstrsInBackwardSlice(AddrDef); + } else if (auto *InterleaveRec = dyn_cast(&Recipe)) { + VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe(); + if (AddrDef) { + // Check if any member of the interleave group needs predication. + const InterleaveGroup *InterGroup = + InterleaveRec->getInterleaveGroup(); + bool NeedPredication = false; + for (int I = 0, NumMembers = InterGroup->getNumMembers(); + I < NumMembers; ++I) { + Instruction *Member = InterGroup->getMember(I); + if (Member) + NeedPredication |= BlockNeedsPredication(Member->getParent()); + } + + if (NeedPredication) + collectPoisonGeneratingInstrsInBackwardSlice(AddrDef); + } + } + } + } +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 3bf91115debb7d..4a8e9f5793acfb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -84,6 +84,21 @@ struct VPlanTransforms { const MapVector &MinBWs, LLVMContext &Ctx); + /// Drop poison flags from recipes that may generate a poison value that is + /// used after vectorization, even when their operands are not poison. Those + /// recipes meet the following conditions: + /// * Contribute to the address computation of a recipe generating a widen + /// memory load/store (VPWidenMemoryInstructionRecipe or + /// VPInterleaveRecipe). + /// * Such a widen memory load/store has at least one underlying Instruction + /// that is in a basic block that needs predication and after vectorization + /// the generated instruction won't be predicated. + /// Uses \p BlockNeedsPredication to check if a block needs predicating. + /// TODO: Replace BlockNeedsPredication callback with retrieving info from + /// VPlan directly. + static void dropPoisonGeneratingRecipes( + VPlan &Plan, function_ref BlockNeedsPredication); + private: /// Remove redundant VPBasicBlocks by merging them into their predecessor if /// the predecessor has a single successor. From 502756905c7de5f6217a071b73adda72c46ffd1c Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Wed, 14 Feb 2024 16:39:00 +0400 Subject: [PATCH 128/240] [clang][NFC] Use "notable" for "interesting" identifiers in `IdentifierInfo` (#81542) This patch expands notion of "interesting" in `IdentifierInto` it to also cover ObjC keywords and builtins, which matches notion of "interesting" in serialization layer. What was previously "interesting" in `IdentifierInto` is now called "notable". Beyond clearing confusion between serialization and the rest of the compiler, it also resolved a naming problem: ObjC keywords, notable identifiers, and builtin IDs are all stored in the same bit-field. Now we can use "interesting" to name it and its corresponding type, instead of `ObjCKeywordOrInterestingOrBuiltin` abomination. --- clang/include/clang/Basic/IdentifierTable.h | 104 +++++++++----------- clang/include/clang/Basic/TokenKinds.def | 22 ++--- clang/include/clang/Basic/TokenKinds.h | 8 +- clang/lib/Basic/IdentifierTable.cpp | 18 ++-- clang/lib/Sema/SemaDecl.cpp | 14 +-- clang/lib/Serialization/ASTReader.cpp | 3 +- clang/lib/Serialization/ASTWriter.cpp | 4 +- 7 files changed, 82 insertions(+), 91 deletions(-) diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index fa8969eb73ddbf..a091639bfa2542 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -84,28 +84,28 @@ using IdentifierLocPair = std::pair; /// of a pointer to one of these classes. enum { IdentifierInfoAlignment = 8 }; -static constexpr int ObjCOrBuiltinIDBits = 16; +static constexpr int InterestingIdentifierBits = 16; -/// The "layout" of ObjCOrBuiltinID is: +/// The "layout" of InterestingIdentifier is: /// - ObjCKeywordKind enumerators -/// - InterestingIdentifierKind enumerators +/// - NotableIdentifierKind enumerators /// - Builtin::ID enumerators -/// - NonSpecialIdentifier -enum class ObjCKeywordOrInterestingOrBuiltin { +/// - NotInterestingIdentifier +enum class InterestingIdentifier { #define OBJC_AT_KEYWORD(X) objc_##X, #include "clang/Basic/TokenKinds.def" NUM_OBJC_KEYWORDS, -#define INTERESTING_IDENTIFIER(X) X, +#define NOTABLE_IDENTIFIER(X) X, #include "clang/Basic/TokenKinds.def" - NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS, + NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS, NotBuiltin, #define BUILTIN(ID, TYPE, ATTRS) BI##ID, #include "clang/Basic/Builtins.inc" FirstTSBuiltin, - NonSpecialIdentifier = 65534 + NotInterestingIdentifier = 65534 }; /// One of these records is kept for each identifier that @@ -121,8 +121,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(tok::TokenKind) unsigned TokenID : 9; - LLVM_PREFERRED_TYPE(ObjCKeywordOrInterestingOrBuiltin) - unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits; + LLVM_PREFERRED_TYPE(InterestingIdentifier) + unsigned InterestingIdentifierID : InterestingIdentifierBits; // True if there is a #define for this. LLVM_PREFERRED_TYPE(bool) @@ -205,8 +205,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { IdentifierInfo() : TokenID(tok::identifier), - ObjCOrBuiltinID(llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier)), + InterestingIdentifierID(llvm::to_underlying( + InterestingIdentifier::NotInterestingIdentifier)), HasMacro(false), HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false), IsPoisoned(false), IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), @@ -341,71 +341,63 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { /// /// For example, 'class' will return tok::objc_class if ObjC is enabled. tok::ObjCKeywordKind getObjCKeywordID() const { - assert(0 == llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin::objc_not_keyword)); - auto Value = - static_cast(ObjCOrBuiltinID); - if (Value < ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS) - return static_cast(ObjCOrBuiltinID); + assert(0 == llvm::to_underlying(InterestingIdentifier::objc_not_keyword)); + auto Value = static_cast(InterestingIdentifierID); + if (Value < InterestingIdentifier::NUM_OBJC_KEYWORDS) + return static_cast(InterestingIdentifierID); return tok::objc_not_keyword; } void setObjCKeywordID(tok::ObjCKeywordKind ID) { - assert(0 == llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin::objc_not_keyword)); - ObjCOrBuiltinID = ID; + assert(0 == llvm::to_underlying(InterestingIdentifier::objc_not_keyword)); + InterestingIdentifierID = ID; assert(getObjCKeywordID() == ID && "ID too large for field!"); } /// Return a value indicating whether this is a builtin function. unsigned getBuiltinID() const { - auto Value = - static_cast(ObjCOrBuiltinID); - if (Value > ObjCKeywordOrInterestingOrBuiltin:: - NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS && - Value != ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier) { + auto Value = static_cast(InterestingIdentifierID); + if (Value > + InterestingIdentifier::NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS && + Value != InterestingIdentifier::NotInterestingIdentifier) { auto FirstBuiltin = - llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin::NotBuiltin); - return static_cast(ObjCOrBuiltinID - FirstBuiltin); + llvm::to_underlying(InterestingIdentifier::NotBuiltin); + return static_cast(InterestingIdentifierID - FirstBuiltin); } return Builtin::ID::NotBuiltin; } void setBuiltinID(unsigned ID) { assert(ID != Builtin::ID::NotBuiltin); - auto FirstBuiltin = - llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin::NotBuiltin); - ObjCOrBuiltinID = ID + FirstBuiltin; + auto FirstBuiltin = llvm::to_underlying(InterestingIdentifier::NotBuiltin); + InterestingIdentifierID = ID + FirstBuiltin; assert(getBuiltinID() == ID && "ID too large for field!"); } void clearBuiltinID() { - ObjCOrBuiltinID = llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier); - } - - tok::InterestingIdentifierKind getInterestingIdentifierID() const { - auto Value = - static_cast(ObjCOrBuiltinID); - if (Value > ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS && - Value < ObjCKeywordOrInterestingOrBuiltin:: - NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS) { - auto FirstInterestingIdentifier = - 1 + llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS); - return static_cast( - ObjCOrBuiltinID - FirstInterestingIdentifier); + InterestingIdentifierID = + llvm::to_underlying(InterestingIdentifier::NotInterestingIdentifier); + } + + tok::NotableIdentifierKind getNotableIdentifierID() const { + auto Value = static_cast(InterestingIdentifierID); + if (Value > InterestingIdentifier::NUM_OBJC_KEYWORDS && + Value < + InterestingIdentifier::NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS) { + auto FirstNotableIdentifier = + 1 + llvm::to_underlying(InterestingIdentifier::NUM_OBJC_KEYWORDS); + return static_cast(InterestingIdentifierID - + FirstNotableIdentifier); } - return tok::not_interesting; + return tok::not_notable; } - void setInterestingIdentifierID(unsigned ID) { - assert(ID != tok::not_interesting); - auto FirstInterestingIdentifier = - 1 + llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS); - ObjCOrBuiltinID = ID + FirstInterestingIdentifier; - assert(getInterestingIdentifierID() == ID && "ID too large for field!"); + void setNotableIdentifierID(unsigned ID) { + assert(ID != tok::not_notable); + auto FirstNotableIdentifier = + 1 + llvm::to_underlying(InterestingIdentifier::NUM_OBJC_KEYWORDS); + InterestingIdentifierID = ID + FirstNotableIdentifier; + assert(getNotableIdentifierID() == ID && "ID too large for field!"); } - unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } - void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } + unsigned getObjCOrBuiltinID() const { return InterestingIdentifierID; } + void setObjCOrBuiltinID(unsigned ID) { InterestingIdentifierID = ID; } /// get/setExtension - Initialize information about whether or not this /// language token is an extension. This controls extension warnings, and is diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 23817cde7a9354..2046ab9dc0198c 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -85,8 +85,8 @@ #ifndef PRAGMA_ANNOTATION #define PRAGMA_ANNOTATION(X) ANNOTATION(X) #endif -#ifndef INTERESTING_IDENTIFIER -#define INTERESTING_IDENTIFIER(X) +#ifndef NOTABLE_IDENTIFIER +#define NOTABLE_IDENTIFIER(X) #endif //===----------------------------------------------------------------------===// @@ -808,15 +808,15 @@ OBJC_AT_KEYWORD(import) OBJC_AT_KEYWORD(available) //===----------------------------------------------------------------------===// -// Interesting identifiers. +// Notable identifiers. //===----------------------------------------------------------------------===// -INTERESTING_IDENTIFIER(not_interesting) -INTERESTING_IDENTIFIER(FILE) -INTERESTING_IDENTIFIER(jmp_buf) -INTERESTING_IDENTIFIER(sigjmp_buf) -INTERESTING_IDENTIFIER(ucontext_t) -INTERESTING_IDENTIFIER(float_t) -INTERESTING_IDENTIFIER(double_t) +NOTABLE_IDENTIFIER(not_notable) +NOTABLE_IDENTIFIER(FILE) +NOTABLE_IDENTIFIER(jmp_buf) +NOTABLE_IDENTIFIER(sigjmp_buf) +NOTABLE_IDENTIFIER(ucontext_t) +NOTABLE_IDENTIFIER(float_t) +NOTABLE_IDENTIFIER(double_t) // TODO: What to do about context-sensitive keywords like: // bycopy/byref/in/inout/oneway/out? @@ -1011,4 +1011,4 @@ ANNOTATION(repl_input_end) #undef TOK #undef C99_KEYWORD #undef C23_KEYWORD -#undef INTERESTING_IDENTIFIER +#undef NOTABLE_IDENTIFIER diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h index 7529b922619ada..e5183a27d2bc5f 100644 --- a/clang/include/clang/Basic/TokenKinds.h +++ b/clang/include/clang/Basic/TokenKinds.h @@ -44,12 +44,12 @@ enum ObjCKeywordKind { NUM_OBJC_KEYWORDS }; -/// Provides a namespace for interesting identifers such as float_t and +/// Provides a namespace for notable identifers such as float_t and /// double_t. -enum InterestingIdentifierKind { -#define INTERESTING_IDENTIFIER(X) X, +enum NotableIdentifierKind { +#define NOTABLE_IDENTIFIER(X) X, #include "clang/Basic/TokenKinds.def" - NUM_INTERESTING_IDENTIFIERS + NUM_NOTABLE_IDENTIFIERS }; /// Defines the possible values of an on-off-switch (C99 6.10.6p2). diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index d0d8316385b452..a9b07aca65c052 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -36,7 +36,7 @@ using namespace clang; // A check to make sure the ObjCOrBuiltinID has sufficient room to store the // largest possible target/aux-target combination. If we exceed this, we likely // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h. -static_assert(2 * LargestBuiltinID < (2 << (ObjCOrBuiltinIDBits - 1)), +static_assert(2 * LargestBuiltinID < (2 << (InterestingIdentifierBits - 1)), "Insufficient ObjCOrBuiltinID Bits"); //===----------------------------------------------------------------------===// @@ -280,13 +280,13 @@ static void AddObjCKeyword(StringRef Name, Table.get(Name).setObjCKeywordID(ObjCID); } -static void AddInterestingIdentifier(StringRef Name, - tok::InterestingIdentifierKind BTID, - IdentifierTable &Table) { - // Don't add 'not_interesting' identifier. - if (BTID != tok::not_interesting) { +static void AddNotableIdentifier(StringRef Name, + tok::NotableIdentifierKind BTID, + IdentifierTable &Table) { + // Don't add 'not_notable' identifier. + if (BTID != tok::not_notable) { IdentifierInfo &Info = Table.get(Name, tok::identifier); - Info.setInterestingIdentifierID(BTID); + Info.setNotableIdentifierID(BTID); } } @@ -306,8 +306,8 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { #define OBJC_AT_KEYWORD(NAME) \ if (LangOpts.ObjC) \ AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this); -#define INTERESTING_IDENTIFIER(NAME) \ - AddInterestingIdentifier(StringRef(#NAME), tok::NAME, *this); +#define NOTABLE_IDENTIFIER(NAME) \ + AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this); #define TESTING_KEYWORD(NAME, FLAGS) #include "clang/Basic/TokenKinds.def" diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index e95e675d0507ed..09a35fddba1954 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6842,21 +6842,21 @@ Sema::ActOnTypedefNameDecl(Scope *S, DeclContext *DC, TypedefNameDecl *NewTD, if (IdentifierInfo *II = NewTD->getIdentifier()) if (!NewTD->isInvalidDecl() && NewTD->getDeclContext()->getRedeclContext()->isTranslationUnit()) { - switch (II->getInterestingIdentifierID()) { - case tok::InterestingIdentifierKind::FILE: + switch (II->getNotableIdentifierID()) { + case tok::NotableIdentifierKind::FILE: Context.setFILEDecl(NewTD); break; - case tok::InterestingIdentifierKind::jmp_buf: + case tok::NotableIdentifierKind::jmp_buf: Context.setjmp_bufDecl(NewTD); break; - case tok::InterestingIdentifierKind::sigjmp_buf: + case tok::NotableIdentifierKind::sigjmp_buf: Context.setsigjmp_bufDecl(NewTD); break; - case tok::InterestingIdentifierKind::ucontext_t: + case tok::NotableIdentifierKind::ucontext_t: Context.setucontext_tDecl(NewTD); break; - case tok::InterestingIdentifierKind::float_t: - case tok::InterestingIdentifierKind::double_t: + case tok::NotableIdentifierKind::float_t: + case tok::NotableIdentifierKind::double_t: NewTD->addAttr(AvailableOnlyInDefaultEvalMethodAttr::Create(Context)); break; default: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index eea14a66fa1818..683a076e6bc399 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -988,8 +988,7 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) { static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II, bool IsModule) { bool IsInteresting = - II.getInterestingIdentifierID() != - tok::InterestingIdentifierKind::not_interesting || + II.getNotableIdentifierID() != tok::NotableIdentifierKind::not_notable || II.getBuiltinID() != Builtin::ID::NotBuiltin || II.getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword; return II.hadMacroDefinition() || II.isPoisoned() || diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 7966b3175ec9f1..740bec586a5e33 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3599,8 +3599,8 @@ class ASTIdentifierTableTrait { bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) { II->getObjCOrBuiltinID(); bool IsInteresting = - II->getInterestingIdentifierID() != - tok::InterestingIdentifierKind::not_interesting || + II->getNotableIdentifierID() != + tok::NotableIdentifierKind::not_notable || II->getBuiltinID() != Builtin::ID::NotBuiltin || II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword; if (MacroOffset || II->isPoisoned() || (!IsModule && IsInteresting) || From c5e13840fdc20adce51673a63d5703bf1ed02aba Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 14 Feb 2024 12:50:29 +0000 Subject: [PATCH 129/240] [clang][docs] Remove trailing whitespace Which is causing CI checks to fail. clang/docs/LanguageExtensions.rst:2794:takes no arguments and produces an unsigned long long result. The builtin does clang/docs/LanguageExtensions.rst:2795:not guarantee any particular frequency, only that it is stable. Knowledge of the + echo '*** Trailing whitespace has been found in Clang source files as described above ***' --- clang/docs/LanguageExtensions.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index ee1d25396ca865..1db80262b8fb8e 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2791,8 +2791,8 @@ frequency is fixed, making it suitable for measuring elapsed time. The ``__builtin_readsteadycounter()`` builtin returns the frequency counter value. When not supported by the target, the return value is always zero. This builtin -takes no arguments and produces an unsigned long long result. The builtin does -not guarantee any particular frequency, only that it is stable. Knowledge of the +takes no arguments and produces an unsigned long long result. The builtin does +not guarantee any particular frequency, only that it is stable. Knowledge of the counter's true frequency will need to be provided by the user. Query for this feature with ``__has_builtin(__builtin_readsteadycounter)``. From 16a0629e7c16cc1ec1a5066c57be3044a1e00395 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 14 Feb 2024 20:53:16 +0800 Subject: [PATCH 130/240] [ValueTracking] Compute known FPClass from signbit idiom (#80740) This patch improves `computeKnownFPClass` by using context-sensitive information from `DomConditionCache`. The motivation of this patch is to optimize the following case found in [fmt/format.h](https://github.com/fmtlib/fmt/blob/e17bc67547a66cdd378ca6a90c56b865d30d6168/include/fmt/format.h#L3555-L3566): ``` define float @test(float %x, i1 %cond) { %i32 = bitcast float %x to i32 %cmp = icmp slt i32 %i32, 0 br i1 %cmp, label %if.then1, label %if.else if.then1: %fneg = fneg float %x br label %if.end if.else: br i1 %cond, label %if.then2, label %if.end if.then2: br label %if.end if.end: %value = phi float [ %fneg, %if.then1 ], [ %x, %if.then2 ], [ %x, %if.else ] %ret = call float @llvm.fabs.f32(float %value) ret float %ret } ``` We can prove the sign bit of %value is always zero. Then the fabs can be eliminated. This pattern also exists in cpython/duckdb/oiio/openexr. Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=f82e0809ba12170e2f648f8a1ac01e78ef06c958&to=041218bf5491996edd828cc15b3aec5a59ddc636&stat=instructions:u |stage1-O3|stage1-ReleaseThinLTO|stage1-ReleaseLTO-g|stage1-O0-g|stage2-O3|stage2-O0-g|stage2-clang| |--|--|--|--|--|--|--| |-0.00%|+0.01%|+0.00%|-0.03%|+0.00%|+0.00%|+0.02%| --- llvm/lib/Analysis/DomConditionCache.cpp | 5 + llvm/lib/Analysis/ValueTracking.cpp | 11 +- .../InstCombine/fpclass-from-dom-cond.ll | 115 ++++++++++++++++++ 3 files changed, 130 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/DomConditionCache.cpp b/llvm/lib/Analysis/DomConditionCache.cpp index c07a8a76f111df..274f3ff44b2a6f 100644 --- a/llvm/lib/Analysis/DomConditionCache.cpp +++ b/llvm/lib/Analysis/DomConditionCache.cpp @@ -66,6 +66,11 @@ static void findAffectedValues(Value *Cond, // A > C3 && A < C4. if (match(A, m_Add(m_Value(X), m_ConstantInt()))) AddAffected(X); + // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported by + // computeKnownFPClass(). + if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT) && + match(A, m_ElementWiseBitCast(m_Value(X)))) + Affected.push_back(X); } } else if (match(Cond, m_CombineOr(m_FCmp(Pred, m_Value(A), m_Constant()), m_Intrinsic( diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 6c42facea3b2b3..cc1d5b74dcfc53 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4272,7 +4272,7 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond, Value *LHS; uint64_t ClassVal = 0; const APFloat *CRHS; - // TODO: handle sign-bit check idiom + const APInt *RHS; if (match(Cond, m_FCmp(Pred, m_Value(LHS), m_APFloat(CRHS)))) { auto [CmpVal, MaskIfTrue, MaskIfFalse] = fcmpImpliesClass( Pred, *CxtI->getParent()->getParent(), LHS, *CRHS, LHS != V); @@ -4282,6 +4282,15 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond, m_Value(LHS), m_ConstantInt(ClassVal)))) { FPClassTest Mask = static_cast(ClassVal); KnownFromContext.knownNot(CondIsTrue ? ~Mask : Mask); + } else if (match(Cond, m_ICmp(Pred, m_ElementWiseBitCast(m_Value(LHS)), + m_APInt(RHS)))) { + bool TrueIfSigned; + if (!isSignBitCheck(Pred, *RHS, TrueIfSigned)) + return; + if (TrueIfSigned == CondIsTrue) + KnownFromContext.signBitMustBeOne(); + else + KnownFromContext.signBitMustBeZero(); } } diff --git a/llvm/test/Transforms/InstCombine/fpclass-from-dom-cond.ll b/llvm/test/Transforms/InstCombine/fpclass-from-dom-cond.ll index 5d4840159dc9a0..d40cd7fd503ecc 100644 --- a/llvm/test/Transforms/InstCombine/fpclass-from-dom-cond.ll +++ b/llvm/test/Transforms/InstCombine/fpclass-from-dom-cond.ll @@ -320,3 +320,118 @@ if.else: %ret = call i1 @llvm.is.fpclass.f32(float %x, i32 783) ret i1 %ret } + +define float @test_signbit_check(float %x, i1 %cond) { +; CHECK-LABEL: define float @test_signbit_check( +; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[I32:%.*]] = bitcast float [[X]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I32]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN1:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN2:%.*]], label [[IF_END]] +; CHECK: if.then2: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[VALUE:%.*]] = phi float [ [[FNEG]], [[IF_THEN1]] ], [ [[X]], [[IF_THEN2]] ], [ [[X]], [[IF_ELSE]] ] +; CHECK-NEXT: ret float [[VALUE]] +; + %i32 = bitcast float %x to i32 + %cmp = icmp slt i32 %i32, 0 + br i1 %cmp, label %if.then1, label %if.else + +if.then1: + %fneg = fneg float %x + br label %if.end + +if.else: + br i1 %cond, label %if.then2, label %if.end + +if.then2: + br label %if.end + +if.end: + %value = phi float [ %fneg, %if.then1 ], [ %x, %if.then2 ], [ %x, %if.else ] + %ret = call float @llvm.fabs.f32(float %value) + ret float %ret +} + +define float @test_signbit_check_fail(float %x, i1 %cond) { +; CHECK-LABEL: define float @test_signbit_check_fail( +; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[I32:%.*]] = bitcast float [[X]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I32]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN1:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN2:%.*]], label [[IF_END]] +; CHECK: if.then2: +; CHECK-NEXT: [[FNEG2:%.*]] = fneg float [[X]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[VALUE:%.*]] = phi float [ [[FNEG]], [[IF_THEN1]] ], [ [[FNEG2]], [[IF_THEN2]] ], [ [[X]], [[IF_ELSE]] ] +; CHECK-NEXT: [[RET:%.*]] = call float @llvm.fabs.f32(float [[VALUE]]) +; CHECK-NEXT: ret float [[RET]] +; + %i32 = bitcast float %x to i32 + %cmp = icmp slt i32 %i32, 0 + br i1 %cmp, label %if.then1, label %if.else + +if.then1: + %fneg = fneg float %x + br label %if.end + +if.else: + br i1 %cond, label %if.then2, label %if.end + +if.then2: + %fneg2 = fneg float %x + br label %if.end + +if.end: + %value = phi float [ %fneg, %if.then1 ], [ %fneg2, %if.then2 ], [ %x, %if.else ] + %ret = call float @llvm.fabs.f32(float %value) + ret float %ret +} + +define <2 x float> @test_signbit_check_wrong_type(<2 x float> %x, i1 %cond) { +; CHECK-LABEL: define <2 x float> @test_signbit_check_wrong_type( +; CHECK-SAME: <2 x float> [[X:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[I32:%.*]] = bitcast <2 x float> [[X]] to i64 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[I32]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN1:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[X]] +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN2:%.*]], label [[IF_END]] +; CHECK: if.then2: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[VALUE:%.*]] = phi <2 x float> [ [[FNEG]], [[IF_THEN1]] ], [ [[X]], [[IF_THEN2]] ], [ [[X]], [[IF_ELSE]] ] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VALUE]]) +; CHECK-NEXT: ret <2 x float> [[RET]] +; + %i32 = bitcast <2 x float> %x to i64 + %cmp = icmp slt i64 %i32, 0 + br i1 %cmp, label %if.then1, label %if.else + +if.then1: + %fneg = fneg <2 x float> %x + br label %if.end + +if.else: + br i1 %cond, label %if.then2, label %if.end + +if.then2: + br label %if.end + +if.end: + %value = phi <2 x float> [ %fneg, %if.then1 ], [ %x, %if.then2 ], [ %x, %if.else ] + %ret = call <2 x float> @llvm.fabs.v2f32(<2 x float> %value) + ret <2 x float> %ret +} From 03232350536655234a30eca17ea56c8cb636831c Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 14 Feb 2024 14:08:09 +0100 Subject: [PATCH 131/240] [libc] Add user defined literals to initialize `BigInt` and `__uint128_t` constants (#81267) Adds user defined literal to construct unsigned integer constants. This is useful when constructing constants for non native C++ types like `__uint128_t` or our custom `BigInt` type. --- libc/src/__support/CMakeLists.txt | 9 + libc/src/__support/integer_literals.h | 173 ++++++++ libc/test/src/__support/CMakeLists.txt | 11 + libc/test/src/__support/FPUtil/CMakeLists.txt | 1 + .../test/src/__support/FPUtil/fpbits_test.cpp | 390 ++++++++---------- .../src/__support/integer_literals_test.cpp | 134 ++++++ .../llvm-project-overlay/libc/BUILD.bazel | 9 + .../libc/test/src/__support/BUILD.bazel | 8 + .../test/src/__support/FPUtil/BUILD.bazel | 1 + 9 files changed, 523 insertions(+), 213 deletions(-) create mode 100644 libc/src/__support/integer_literals.h create mode 100644 libc/test/src/__support/integer_literals_test.cpp diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index bd814a080c4f87..013627788940d8 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -146,6 +146,15 @@ add_header_library( libc.src.errno.errno ) +add_header_library( + integer_literals + HDRS + integer_literals.h + DEPENDS + .uint128 + libc.src.__support.CPP.limits +) + add_header_library( integer_operations HDRS diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h new file mode 100644 index 00000000000000..c8e965c1a03a05 --- /dev/null +++ b/libc/src/__support/integer_literals.h @@ -0,0 +1,173 @@ +//===-- User literal for unsigned integers ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This set of user defined literals allows uniform constructions of constants +// up to 256 bits and also help with unit tests (EXPECT_EQ requires the same +// type for LHS and RHS). +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H +#define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H + +#include "src/__support/CPP/limits.h" // CHAR_BIT +#include "src/__support/UInt128.h" // UInt128 +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include // size_t +#include // uintxx_t + +namespace LIBC_NAMESPACE { + +LIBC_INLINE constexpr uint8_t operator""_u8(unsigned long long value) { + return value; +} + +LIBC_INLINE constexpr uint16_t operator""_u16(unsigned long long value) { + return value; +} + +LIBC_INLINE constexpr uint32_t operator""_u32(unsigned long long value) { + return value; +} + +LIBC_INLINE constexpr uint64_t operator""_u64(unsigned long long value) { + return value; +} + +namespace internal { + +// Creates a T by reading digits from an array. +template +LIBC_INLINE constexpr T accumulate(int base, const uint8_t *digits, + size_t size) { + T value{}; + for (; size; ++digits, --size) { + value *= base; + value += *digits; + } + return value; +} + +// A static buffer to hold the digits for a T. +template struct DigitBuffer { + static_assert(base == 2 || base == 10 || base == 16); + // One character provides log2(base) bits. + // Base 2 and 16 provide exactly one and four bits per character respectively. + // For base 10, a character provides log2(10) ≈ 3.32... which we round to 3 + // for the purpose of buffer allocation. + LIBC_INLINE_VAR static constexpr size_t BITS_PER_DIGIT = base == 2 ? 1 + : base == 10 ? 3 + : base == 16 ? 4 + : 0; + LIBC_INLINE_VAR static constexpr size_t MAX_DIGITS = + sizeof(T) * CHAR_BIT / BITS_PER_DIGIT; + + uint8_t digits[MAX_DIGITS] = {}; + size_t size = 0; + + constexpr DigitBuffer(const char *str) { + for (; *str != '\0'; ++str) + push(*str); + } + + // Returns the digit for a particular character. + // Returns 255 if the character is invalid. + LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) { + const auto to_lower = [](char c) { return c | 32; }; + const auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; + const auto is_alpha = [](char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); + }; + if (is_digit(c)) + return c - '0'; + if (base > 10 && is_alpha(c)) + return to_lower(c) - 'a' + 10; + return 255; + } + + // Adds a single character to this buffer. + LIBC_INLINE constexpr void push(char c) { + if (c == '\'') + return; // ' is valid but not taken into account. + const uint8_t value = get_digit_value(c); + if (value == 255 || size >= MAX_DIGITS) { + // During constant evaluation `__builtin_unreachable` will halt the + // compiler as it is not executable. This is preferable over `assert` that + // will only trigger in debug mode. Also we can't use `static_assert` + // because `value` and `size` are not constant. + __builtin_unreachable(); // invalid or too many characters. + } + digits[size] = value; + ++size; + } +}; + +// Generic implementation for native types (including __uint128_t or ExtInt +// where available). +template struct Parser { + template LIBC_INLINE static constexpr T parse(const char *str) { + const DigitBuffer buffer(str); + return accumulate(base, buffer.digits, buffer.size); + } +}; + +// Specialization for cpp::BigInt. +// Because this code runs at compile time we try to make it efficient. For +// binary and hexadecimal formats we read digits by chunks of 64 bits and +// produce the BigInt internal representation direcly. For decimal numbers we +// go the slow path and use slower BigInt arithmetic. +template +struct Parser> { + using UIntT = cpp::BigInt; + template static constexpr UIntT parse(const char *str) { + const DigitBuffer buffer(str); + if constexpr (base == 10) { + // Slow path, we sum and multiply BigInt for each digit. + return accumulate(base, buffer.digits, buffer.size); + } else { + // Fast path, we consume blocks of uint64_t and creates the BigInt's + // internal representation directly. + using U64ArrayT = cpp::array; + U64ArrayT array; + size_t size = buffer.size; + const uint8_t *digit_ptr = buffer.digits + size; + for (size_t i = 0; i < array.size(); ++i) { + constexpr size_t U64_DIGITS = DigitBuffer::MAX_DIGITS; + const size_t chunk = size > U64_DIGITS ? U64_DIGITS : size; + digit_ptr -= chunk; + size -= chunk; + array[i] = accumulate(base, digit_ptr, chunk); + } + return UIntT(array); + } + } +}; + +// Detects the base of the number and dispatches to the right implementation. +template +LIBC_INLINE constexpr T parse_with_prefix(const char *ptr) { + using P = Parser; + if (ptr[0] == '0' && ptr[1] == 'x') + return P::template parse<16>(ptr + 2); + else if (ptr[0] == '0' && ptr[1] == 'b') + return P::template parse<2>(ptr + 2); + else + return P::template parse<10>(ptr); +} + +} // namespace internal + +LIBC_INLINE constexpr UInt128 operator""_u128(const char *x) { + return internal::parse_with_prefix(x); +} + +LIBC_INLINE constexpr auto operator""_u256(const char *x) { + return internal::parse_with_prefix>(x); +} + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index a92e6da56096a1..231b01e0ee50a7 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -97,6 +97,17 @@ add_libc_test( libc.src.__support.CPP.optional ) +add_libc_test( + integer_literals_test + SUITE + libc-support-tests + SRCS + integer_literals_test.cpp + DEPENDS + libc.src.__support.integer_literals + libc.src.__support.CPP.optional +) + add_libc_test( fixedvector_test SUITE diff --git a/libc/test/src/__support/FPUtil/CMakeLists.txt b/libc/test/src/__support/FPUtil/CMakeLists.txt index 897434ceff6007..f1a027a514ba23 100644 --- a/libc/test/src/__support/FPUtil/CMakeLists.txt +++ b/libc/test/src/__support/FPUtil/CMakeLists.txt @@ -23,6 +23,7 @@ add_libc_test( DEPENDS libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.fpbits_str + libc.src.__support.integer_literals ) add_fp_unittest( diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp index b1c4b6691b6eec..1c8a1c5b9d4cee 100644 --- a/libc/test/src/__support/FPUtil/fpbits_test.cpp +++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp @@ -8,6 +8,7 @@ #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/fpbits_str.h" +#include "src/__support/integer_literals.h" #include "test/UnitTest/Test.h" using LIBC_NAMESPACE::fputil::FPBits; @@ -15,37 +16,42 @@ using LIBC_NAMESPACE::fputil::FPType; using LIBC_NAMESPACE::fputil::Sign; using LIBC_NAMESPACE::fputil::internal::FPRep; +using LIBC_NAMESPACE::operator""_u16; +using LIBC_NAMESPACE::operator""_u32; +using LIBC_NAMESPACE::operator""_u64; +using LIBC_NAMESPACE::operator""_u128; + TEST(LlvmLibcFPBitsTest, FPType_IEEE754_Binary16) { using Rep = FPRep; using u16 = typename Rep::StorageType; - EXPECT_EQ(u16(0b0'00000'0000000000), u16(Rep::zero())); - EXPECT_EQ(u16(0b0'01111'0000000000), u16(Rep::one())); - EXPECT_EQ(u16(0b0'00000'0000000001), u16(Rep::min_subnormal())); - EXPECT_EQ(u16(0b0'00000'1111111111), u16(Rep::max_subnormal())); - EXPECT_EQ(u16(0b0'00001'0000000000), u16(Rep::min_normal())); - EXPECT_EQ(u16(0b0'11110'1111111111), u16(Rep::max_normal())); - EXPECT_EQ(u16(0b0'11111'0000000000), u16(Rep::inf())); - EXPECT_EQ(u16(0b0'11111'0100000000), u16(Rep::signaling_nan())); - EXPECT_EQ(u16(0b0'11111'1000000000), u16(Rep::quiet_nan())); + EXPECT_EQ(0b0'00000'0000000000_u16, u16(Rep::zero())); + EXPECT_EQ(0b0'01111'0000000000_u16, u16(Rep::one())); + EXPECT_EQ(0b0'00000'0000000001_u16, u16(Rep::min_subnormal())); + EXPECT_EQ(0b0'00000'1111111111_u16, u16(Rep::max_subnormal())); + EXPECT_EQ(0b0'00001'0000000000_u16, u16(Rep::min_normal())); + EXPECT_EQ(0b0'11110'1111111111_u16, u16(Rep::max_normal())); + EXPECT_EQ(0b0'11111'0000000000_u16, u16(Rep::inf())); + EXPECT_EQ(0b0'11111'0100000000_u16, u16(Rep::signaling_nan())); + EXPECT_EQ(0b0'11111'1000000000_u16, u16(Rep::quiet_nan())); } TEST(LlvmLibcFPBitsTest, FPType_IEEE754_Binary32) { using Rep = FPRep; using u32 = typename Rep::StorageType; - EXPECT_EQ(u32(0b0'00000000'00000000000000000000000), u32(Rep::zero())); - EXPECT_EQ(u32(0b0'01111111'00000000000000000000000), u32(Rep::one())); - EXPECT_EQ(u32(0b0'00000000'00000000000000000000001), + EXPECT_EQ(0b0'00000000'00000000000000000000000_u32, u32(Rep::zero())); + EXPECT_EQ(0b0'01111111'00000000000000000000000_u32, u32(Rep::one())); + EXPECT_EQ(0b0'00000000'00000000000000000000001_u32, u32(Rep::min_subnormal())); - EXPECT_EQ(u32(0b0'00000000'11111111111111111111111), + EXPECT_EQ(0b0'00000000'11111111111111111111111_u32, u32(Rep::max_subnormal())); - EXPECT_EQ(u32(0b0'00000001'00000000000000000000000), u32(Rep::min_normal())); - EXPECT_EQ(u32(0b0'11111110'11111111111111111111111), u32(Rep::max_normal())); - EXPECT_EQ(u32(0b0'11111111'00000000000000000000000), u32(Rep::inf())); - EXPECT_EQ(u32(0b0'11111111'01000000000000000000000), + EXPECT_EQ(0b0'00000001'00000000000000000000000_u32, u32(Rep::min_normal())); + EXPECT_EQ(0b0'11111110'11111111111111111111111_u32, u32(Rep::max_normal())); + EXPECT_EQ(0b0'11111111'00000000000000000000000_u32, u32(Rep::inf())); + EXPECT_EQ(0b0'11111111'01000000000000000000000_u32, u32(Rep::signaling_nan())); - EXPECT_EQ(u32(0b0'11111111'10000000000000000000000), u32(Rep::quiet_nan())); + EXPECT_EQ(0b0'11111111'10000000000000000000000_u32, u32(Rep::quiet_nan())); } TEST(LlvmLibcFPBitsTest, FPType_IEEE754_Binary64) { @@ -53,80 +59,63 @@ TEST(LlvmLibcFPBitsTest, FPType_IEEE754_Binary64) { using u64 = typename Rep::StorageType; EXPECT_EQ( - u64(0b0'00000000000'0000000000000000000000000000000000000000000000000000), + 0b0'00000000000'0000000000000000000000000000000000000000000000000000_u64, u64(Rep::zero())); EXPECT_EQ( - u64(0b0'01111111111'0000000000000000000000000000000000000000000000000000), + 0b0'01111111111'0000000000000000000000000000000000000000000000000000_u64, u64(Rep::one())); EXPECT_EQ( - u64(0b0'00000000000'0000000000000000000000000000000000000000000000000001), + 0b0'00000000000'0000000000000000000000000000000000000000000000000001_u64, u64(Rep::min_subnormal())); EXPECT_EQ( - u64(0b0'00000000000'1111111111111111111111111111111111111111111111111111), + 0b0'00000000000'1111111111111111111111111111111111111111111111111111_u64, u64(Rep::max_subnormal())); EXPECT_EQ( - u64(0b0'00000000001'0000000000000000000000000000000000000000000000000000), + 0b0'00000000001'0000000000000000000000000000000000000000000000000000_u64, u64(Rep::min_normal())); EXPECT_EQ( - u64(0b0'11111111110'1111111111111111111111111111111111111111111111111111), + 0b0'11111111110'1111111111111111111111111111111111111111111111111111_u64, u64(Rep::max_normal())); EXPECT_EQ( - u64(0b0'11111111111'0000000000000000000000000000000000000000000000000000), + 0b0'11111111111'0000000000000000000000000000000000000000000000000000_u64, u64(Rep::inf())); EXPECT_EQ( - u64(0b0'11111111111'0100000000000000000000000000000000000000000000000000), + 0b0'11111111111'0100000000000000000000000000000000000000000000000000_u64, u64(Rep::signaling_nan())); EXPECT_EQ( - u64(0b0'11111111111'1000000000000000000000000000000000000000000000000000), + 0b0'11111111111'1000000000000000000000000000000000000000000000000000_u64, u64(Rep::quiet_nan())); } -static constexpr UInt128 u128(uint64_t hi, uint64_t lo) { -#if defined(__SIZEOF_INT128__) - return __uint128_t(hi) << 64 | __uint128_t(lo); -#else - return UInt128({lo, hi}); -#endif -} - TEST(LlvmLibcFPBitsTest, FPType_IEEE754_Binary128) { using Rep = FPRep; EXPECT_EQ( - u128(0b0'000000000000000'000000000000000000000000000000000000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000000), + 0b0'000000000000000'0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::zero())); EXPECT_EQ( - u128(0b0'011111111111111'000000000000000000000000000000000000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000000), + 0b0'011111111111111'0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::one())); EXPECT_EQ( - u128(0b0'000000000000000'000000000000000000000000000000000000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000001), + 0b0'000000000000000'0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001_u128, UInt128(Rep::min_subnormal())); EXPECT_EQ( - u128(0b0'000000000000000'111111111111111111111111111111111111111111111111, - 0b1111111111111111111111111111111111111111111111111111111111111111), + 0b0'000000000000000'1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111_u128, UInt128(Rep::max_subnormal())); EXPECT_EQ( - u128(0b0'000000000000001'000000000000000000000000000000000000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000000), + 0b0'000000000000001'0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::min_normal())); EXPECT_EQ( - u128(0b0'111111111111110'111111111111111111111111111111111111111111111111, - 0b1111111111111111111111111111111111111111111111111111111111111111), + 0b0'111111111111110'1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111_u128, UInt128(Rep::max_normal())); EXPECT_EQ( - u128(0b0'111111111111111'000000000000000000000000000000000000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000000), + 0b0'111111111111111'0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::inf())); EXPECT_EQ( - u128(0b0'111111111111111'010000000000000000000000000000000000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000000), + 0b0'111111111111111'0100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::signaling_nan())); EXPECT_EQ( - u128(0b0'111111111111111'100000000000000000000000000000000000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000000), + 0b0'111111111111111'1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::quiet_nan())); } @@ -134,89 +123,73 @@ TEST(LlvmLibcFPBitsTest, FPType_X86_Binary80) { using Rep = FPRep; EXPECT_EQ( - u128(0b0'000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000000), + 0b0'0000000000000000000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::zero())); EXPECT_EQ( - u128(0b0'011111111111111, - 0b1000000000000000000000000000000000000000000000000000000000000000), + 0b0'0111111111111111000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::one())); EXPECT_EQ( - u128(0b0'000000000000000, - 0b0000000000000000000000000000000000000000000000000000000000000001), + 0b0'0000000000000000000000000000000000000000000000000000000000000000000000000000001_u128, UInt128(Rep::min_subnormal())); EXPECT_EQ( - u128(0b0'000000000000000, - 0b0111111111111111111111111111111111111111111111111111111111111111), + 0b0'0000000000000000111111111111111111111111111111111111111111111111111111111111111_u128, UInt128(Rep::max_subnormal())); EXPECT_EQ( - u128(0b0'000000000000001, - 0b1000000000000000000000000000000000000000000000000000000000000000), + 0b0'0000000000000011000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::min_normal())); EXPECT_EQ( - u128(0b0'111111111111110, - 0b1111111111111111111111111111111111111111111111111111111111111111), + 0b0'1111111111111101111111111111111111111111111111111111111111111111111111111111111_u128, UInt128(Rep::max_normal())); EXPECT_EQ( - u128(0b0'111111111111111, - 0b1000000000000000000000000000000000000000000000000000000000000000), + 0b0'1111111111111111000000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::inf())); EXPECT_EQ( - u128(0b0'111111111111111, - 0b1010000000000000000000000000000000000000000000000000000000000000), + 0b0'1111111111111111010000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::signaling_nan())); EXPECT_EQ( - u128(0b0'111111111111111, - 0b1100000000000000000000000000000000000000000000000000000000000000), + 0b0'1111111111111111100000000000000000000000000000000000000000000000000000000000000_u128, UInt128(Rep::quiet_nan())); } TEST(LlvmLibcFPBitsTest, FPType_X86_Binary80_IsNan) { using Rep = FPRep; - const auto is_nan = [](uint64_t hi, uint64_t lo) { - Rep rep; - rep.set_uintval(u128(hi, lo)); - return rep.is_nan(); - }; - - EXPECT_TRUE(is_nan( - 0b0'111111111111111, // NAN : Pseudo-Infinity - 0b0000000000000000000000000000000000000000000000000000000000000000)); - EXPECT_TRUE(is_nan( - 0b0'111111111111111, // NAN : Pseudo Not a Number - 0b0000000000000000000000000000000000000000000000000000000000000001)); - EXPECT_TRUE(is_nan( - 0b0'111111111111111, // NAN : Pseudo Not a Number - 0b0100000000000000000000000000000000000000000000000000000000000000)); - EXPECT_TRUE(is_nan( - 0b0'111111111111111, // NAN : Signalling Not a Number - 0b1000000000000000000000000000000000000000000000000000000000000001)); - EXPECT_TRUE(is_nan( - 0b0'111111111111111, // NAN : Floating-point Indefinite - 0b1100000000000000000000000000000000000000000000000000000000000000)); - EXPECT_TRUE(is_nan( - 0b0'111111111111111, // NAN : Quiet Not a Number - 0b1100000000000000000000000000000000000000000000000000000000000001)); - EXPECT_TRUE(is_nan( - 0b0'111111111111110, // NAN : Unnormal - 0b0000000000000000000000000000000000000000000000000000000000000000)); - - EXPECT_FALSE(is_nan( - 0b0'000000000000000, // Zero - 0b0000000000000000000000000000000000000000000000000000000000000000)); - EXPECT_FALSE(is_nan( - 0b0'000000000000000, // Subnormal - 0b0000000000000000000000000000000000000000000000000000000000000001)); - EXPECT_FALSE(is_nan( - 0b0'000000000000000, // Pseudo Denormal - 0b1000000000000000000000000000000000000000000000000000000000000001)); - EXPECT_FALSE(is_nan( - 0b0'111111111111111, // Infinity - 0b1000000000000000000000000000000000000000000000000000000000000000)); - EXPECT_FALSE(is_nan( - 0b0'111111111111110, // Normalized - 0b1000000000000000000000000000000000000000000000000000000000000000)); + EXPECT_TRUE( // NAN : Pseudo-Infinity + Rep(0b0'111111111111111'0000000000000000000000000000000000000000000000000000000000000000_u128) + .is_nan()); + EXPECT_TRUE( // NAN : Pseudo Not a Number + Rep(0b0'111111111111111'0000000000000000000000000000000000000000000000000000000000000001_u128) + .is_nan()); + EXPECT_TRUE( // NAN : Pseudo Not a Number + Rep(0b0'111111111111111'0100000000000000000000000000000000000000000000000000000000000000_u128) + .is_nan()); + EXPECT_TRUE( // NAN : Signalling Not a Number + Rep(0b0'111111111111111'1000000000000000000000000000000000000000000000000000000000000001_u128) + .is_nan()); + EXPECT_TRUE( // NAN : Floating-point Indefinite + Rep(0b0'111111111111111'1100000000000000000000000000000000000000000000000000000000000000_u128) + .is_nan()); + EXPECT_TRUE( // NAN : Quiet Not a Number + Rep(0b0'111111111111111'1100000000000000000000000000000000000000000000000000000000000001_u128) + .is_nan()); + EXPECT_TRUE( // NAN : Unnormal + Rep(0b0'111111111111110'0000000000000000000000000000000000000000000000000000000000000000_u128) + .is_nan()); + EXPECT_FALSE( // Zero + Rep(0b0'000000000000000'0000000000000000000000000000000000000000000000000000000000000000_u128) + .is_nan()); + EXPECT_FALSE( // Subnormal + Rep(0b0'000000000000000'0000000000000000000000000000000000000000000000000000000000000001_u128) + .is_nan()); + EXPECT_FALSE( // Pseudo Denormal + Rep(0b0'000000000000000'1000000000000000000000000000000000000000000000000000000000000001_u128) + .is_nan()); + EXPECT_FALSE( // Infinity + Rep(0b0'111111111111111'1000000000000000000000000000000000000000000000000000000000000000_u128) + .is_nan()); + EXPECT_FALSE( // Normalized + Rep(0b0'111111111111110'1000000000000000000000000000000000000000000000000000000000000000_u128) + .is_nan()); } enum class FP { @@ -339,49 +312,49 @@ TEST(LlvmLibcFPBitsTest, FloatType) { FloatBits zero(0.0f); EXPECT_TRUE(zero.is_pos()); - EXPECT_EQ(zero.get_biased_exponent(), static_cast(0)); - EXPECT_EQ(zero.get_mantissa(), static_cast(0)); - EXPECT_EQ(zero.uintval(), static_cast(0x00000000)); + EXPECT_EQ(zero.get_biased_exponent(), 0_u16); + EXPECT_EQ(zero.get_mantissa(), 0_u32); + EXPECT_EQ(zero.uintval(), 0_u32); EXPECT_STREQ(LIBC_NAMESPACE::str(zero).c_str(), "0x00000000 = (S: 0, E: 0x0000, M: 0x00000000)"); FloatBits negzero(-0.0f); EXPECT_TRUE(negzero.is_neg()); - EXPECT_EQ(negzero.get_biased_exponent(), static_cast(0)); - EXPECT_EQ(negzero.get_mantissa(), static_cast(0)); - EXPECT_EQ(negzero.uintval(), static_cast(0x80000000)); + EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); + EXPECT_EQ(negzero.get_mantissa(), 0_u32); + EXPECT_EQ(negzero.uintval(), 0x80000000_u32); EXPECT_STREQ(LIBC_NAMESPACE::str(negzero).c_str(), "0x80000000 = (S: 1, E: 0x0000, M: 0x00000000)"); FloatBits one(1.0f); EXPECT_TRUE(one.is_pos()); - EXPECT_EQ(one.get_biased_exponent(), static_cast(0x7F)); - EXPECT_EQ(one.get_mantissa(), static_cast(0)); - EXPECT_EQ(one.uintval(), static_cast(0x3F800000)); + EXPECT_EQ(one.get_biased_exponent(), 0x7F_u16); + EXPECT_EQ(one.get_mantissa(), 0_u32); + EXPECT_EQ(one.uintval(), 0x3F800000_u32); EXPECT_STREQ(LIBC_NAMESPACE::str(one).c_str(), "0x3F800000 = (S: 0, E: 0x007F, M: 0x00000000)"); FloatBits negone(-1.0f); EXPECT_TRUE(negone.is_neg()); - EXPECT_EQ(negone.get_biased_exponent(), static_cast(0x7F)); - EXPECT_EQ(negone.get_mantissa(), static_cast(0)); - EXPECT_EQ(negone.uintval(), static_cast(0xBF800000)); + EXPECT_EQ(negone.get_biased_exponent(), 0x7F_u16); + EXPECT_EQ(negone.get_mantissa(), 0_u32); + EXPECT_EQ(negone.uintval(), 0xBF800000_u32); EXPECT_STREQ(LIBC_NAMESPACE::str(negone).c_str(), "0xBF800000 = (S: 1, E: 0x007F, M: 0x00000000)"); FloatBits num(1.125f); EXPECT_TRUE(num.is_pos()); - EXPECT_EQ(num.get_biased_exponent(), static_cast(0x7F)); - EXPECT_EQ(num.get_mantissa(), static_cast(0x00100000)); - EXPECT_EQ(num.uintval(), static_cast(0x3F900000)); + EXPECT_EQ(num.get_biased_exponent(), 0x7F_u16); + EXPECT_EQ(num.get_mantissa(), 0x00100000_u32); + EXPECT_EQ(num.uintval(), 0x3F900000_u32); EXPECT_STREQ(LIBC_NAMESPACE::str(num).c_str(), "0x3F900000 = (S: 0, E: 0x007F, M: 0x00100000)"); FloatBits negnum(-1.125f); EXPECT_TRUE(negnum.is_neg()); - EXPECT_EQ(negnum.get_biased_exponent(), static_cast(0x7F)); - EXPECT_EQ(negnum.get_mantissa(), static_cast(0x00100000)); - EXPECT_EQ(negnum.uintval(), static_cast(0xBF900000)); + EXPECT_EQ(negnum.get_biased_exponent(), 0x7F_u16); + EXPECT_EQ(negnum.get_mantissa(), 0x00100000_u32); + EXPECT_EQ(negnum.uintval(), 0xBF900000_u32); EXPECT_STREQ(LIBC_NAMESPACE::str(negnum).c_str(), "0xBF900000 = (S: 1, E: 0x007F, M: 0x00100000)"); @@ -401,49 +374,49 @@ TEST(LlvmLibcFPBitsTest, DoubleType) { DoubleBits zero(0.0); EXPECT_TRUE(zero.is_pos()); - EXPECT_EQ(zero.get_biased_exponent(), static_cast(0x0000)); - EXPECT_EQ(zero.get_mantissa(), static_cast(0x0000000000000000)); - EXPECT_EQ(zero.uintval(), static_cast(0x0000000000000000)); + EXPECT_EQ(zero.get_biased_exponent(), 0_u16); + EXPECT_EQ(zero.get_mantissa(), 0_u64); + EXPECT_EQ(zero.uintval(), 0_u64); EXPECT_STREQ(LIBC_NAMESPACE::str(zero).c_str(), "0x0000000000000000 = (S: 0, E: 0x0000, M: 0x0000000000000000)"); DoubleBits negzero(-0.0); EXPECT_TRUE(negzero.is_neg()); - EXPECT_EQ(negzero.get_biased_exponent(), static_cast(0x0000)); - EXPECT_EQ(negzero.get_mantissa(), static_cast(0x0000000000000000)); - EXPECT_EQ(negzero.uintval(), static_cast(0x8000000000000000)); + EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); + EXPECT_EQ(negzero.get_mantissa(), 0_u64); + EXPECT_EQ(negzero.uintval(), 0x8000000000000000_u64); EXPECT_STREQ(LIBC_NAMESPACE::str(negzero).c_str(), "0x8000000000000000 = (S: 1, E: 0x0000, M: 0x0000000000000000)"); DoubleBits one(1.0); EXPECT_TRUE(one.is_pos()); - EXPECT_EQ(one.get_biased_exponent(), static_cast(0x03FF)); - EXPECT_EQ(one.get_mantissa(), static_cast(0x0000000000000000)); - EXPECT_EQ(one.uintval(), static_cast(0x3FF0000000000000)); + EXPECT_EQ(one.get_biased_exponent(), 0x03FF_u16); + EXPECT_EQ(one.get_mantissa(), 0_u64); + EXPECT_EQ(one.uintval(), 0x3FF0000000000000_u64); EXPECT_STREQ(LIBC_NAMESPACE::str(one).c_str(), "0x3FF0000000000000 = (S: 0, E: 0x03FF, M: 0x0000000000000000)"); DoubleBits negone(-1.0); EXPECT_TRUE(negone.is_neg()); - EXPECT_EQ(negone.get_biased_exponent(), static_cast(0x03FF)); - EXPECT_EQ(negone.get_mantissa(), static_cast(0x0000000000000000)); - EXPECT_EQ(negone.uintval(), static_cast(0xBFF0000000000000)); + EXPECT_EQ(negone.get_biased_exponent(), 0x03FF_u16); + EXPECT_EQ(negone.get_mantissa(), 0_u64); + EXPECT_EQ(negone.uintval(), 0xBFF0000000000000_u64); EXPECT_STREQ(LIBC_NAMESPACE::str(negone).c_str(), "0xBFF0000000000000 = (S: 1, E: 0x03FF, M: 0x0000000000000000)"); DoubleBits num(1.125); EXPECT_TRUE(num.is_pos()); - EXPECT_EQ(num.get_biased_exponent(), static_cast(0x03FF)); - EXPECT_EQ(num.get_mantissa(), static_cast(0x0002000000000000)); - EXPECT_EQ(num.uintval(), static_cast(0x3FF2000000000000)); + EXPECT_EQ(num.get_biased_exponent(), 0x03FF_u16); + EXPECT_EQ(num.get_mantissa(), 0x0002000000000000_u64); + EXPECT_EQ(num.uintval(), 0x3FF2000000000000_u64); EXPECT_STREQ(LIBC_NAMESPACE::str(num).c_str(), "0x3FF2000000000000 = (S: 0, E: 0x03FF, M: 0x0002000000000000)"); DoubleBits negnum(-1.125); EXPECT_TRUE(negnum.is_neg()); - EXPECT_EQ(negnum.get_biased_exponent(), static_cast(0x03FF)); - EXPECT_EQ(negnum.get_mantissa(), static_cast(0x0002000000000000)); - EXPECT_EQ(negnum.uintval(), static_cast(0xBFF2000000000000)); + EXPECT_EQ(negnum.get_biased_exponent(), 0x03FF_u16); + EXPECT_EQ(negnum.get_mantissa(), 0x0002000000000000_u64); + EXPECT_EQ(negnum.uintval(), 0xBFF2000000000000_u64); EXPECT_STREQ(LIBC_NAMESPACE::str(negnum).c_str(), "0xBFF2000000000000 = (S: 1, E: 0x03FF, M: 0x0002000000000000)"); @@ -467,10 +440,9 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits zero(0.0l); EXPECT_TRUE(zero.is_pos()); - EXPECT_EQ(zero.get_biased_exponent(), static_cast(0x0000)); - EXPECT_EQ(zero.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(zero.uintval(), static_cast(0x0000000000000000) << 64); + EXPECT_EQ(zero.get_biased_exponent(), 0_u16); + EXPECT_EQ(zero.get_mantissa(), 0_u128); + EXPECT_EQ(zero.uintval(), 0_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(zero).c_str(), "0x00000000000000000000000000000000 = " @@ -478,10 +450,9 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits negzero(-0.0l); EXPECT_TRUE(negzero.is_neg()); - EXPECT_EQ(negzero.get_biased_exponent(), static_cast(0x0000)); - EXPECT_EQ(negzero.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(negzero.uintval(), static_cast(0x1) << 79); + EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); + EXPECT_EQ(negzero.get_mantissa(), 0_u128); + EXPECT_EQ(negzero.uintval(), 0x80000000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negzero).c_str(), "0x00000000000080000000000000000000 = " @@ -489,9 +460,9 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits one(1.0l); EXPECT_TRUE(one.is_pos()); - EXPECT_EQ(one.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(one.get_mantissa(), static_cast(0x0000000000000000) << 64); - EXPECT_EQ(one.uintval(), static_cast(0x3FFF8) << 60); + EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(one.get_mantissa(), 0_u128); + EXPECT_EQ(one.uintval(), 0x3FFF8000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(one).c_str(), "0x0000000000003FFF8000000000000000 = " @@ -499,10 +470,9 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits negone(-1.0l); EXPECT_TRUE(negone.is_neg()); - EXPECT_EQ(negone.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(negone.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(negone.uintval(), static_cast(0xBFFF8) << 60); + EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(negone.get_mantissa(), 0_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF8000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negone).c_str(), "0x000000000000BFFF8000000000000000 = " @@ -510,9 +480,9 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits num(1.125l); EXPECT_TRUE(num.is_pos()); - EXPECT_EQ(num.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(num.get_mantissa(), static_cast(0x1) << 60); - EXPECT_EQ(num.uintval(), static_cast(0x3FFF9) << 60); + EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(num.get_mantissa(), 0x1000000000000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF9000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(num).c_str(), "0x0000000000003FFF9000000000000000 = " @@ -520,9 +490,9 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits negnum(-1.125l); EXPECT_TRUE(negnum.is_neg()); - EXPECT_EQ(negnum.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(negnum.get_mantissa(), static_cast(0x1) << 60); - EXPECT_EQ(negnum.uintval(), static_cast(0xBFFF9) << 60); + EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(negnum.get_mantissa(), 0x1000000000000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF9000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negnum).c_str(), "0x000000000000BFFF9000000000000000 = " @@ -547,57 +517,54 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { LongDoubleBits zero(0.0l); EXPECT_TRUE(zero.is_pos()); - EXPECT_EQ(zero.get_biased_exponent(), static_cast(0x0000)); - EXPECT_EQ(zero.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(zero.uintval(), static_cast(0x0000000000000000) << 64); + EXPECT_EQ(zero.get_biased_exponent(), 0_u16); + EXPECT_EQ(zero.get_mantissa(), 0_u128); + EXPECT_EQ(zero.uintval(), 0_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(zero).c_str(), "0x00000000000000000000000000000000 = " "(S: 0, E: 0x0000, M: 0x00000000000000000000000000000000)"); LongDoubleBits negzero(-0.0l); EXPECT_TRUE(negzero.is_neg()); - EXPECT_EQ(negzero.get_biased_exponent(), static_cast(0x0000)); - EXPECT_EQ(negzero.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(negzero.uintval(), static_cast(0x1) << 127); + EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); + EXPECT_EQ(negzero.get_mantissa(), 0_u128); + EXPECT_EQ(negzero.uintval(), 0x80000000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negzero).c_str(), "0x80000000000000000000000000000000 = " "(S: 1, E: 0x0000, M: 0x00000000000000000000000000000000)"); LongDoubleBits one(1.0l); EXPECT_TRUE(one.is_pos()); - EXPECT_EQ(one.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(one.get_mantissa(), static_cast(0x0000000000000000) << 64); - EXPECT_EQ(one.uintval(), static_cast(0x3FFF) << 112); + EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(one.get_mantissa(), 0_u128); + EXPECT_EQ(one.uintval(), 0x3FFF0000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(one).c_str(), "0x3FFF0000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); LongDoubleBits negone(-1.0l); EXPECT_TRUE(negone.is_neg()); - EXPECT_EQ(negone.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(negone.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(negone.uintval(), static_cast(0xBFFF) << 112); + EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(negone.get_mantissa(), 0_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF0000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negone).c_str(), "0xBFFF0000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); LongDoubleBits num(1.125l); EXPECT_TRUE(num.is_pos()); - EXPECT_EQ(num.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(num.get_mantissa(), static_cast(0x2) << 108); - EXPECT_EQ(num.uintval(), static_cast(0x3FFF2) << 108); + EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(num.get_mantissa(), 0x2000000000000000000000000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF2000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(num).c_str(), "0x3FFF2000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); LongDoubleBits negnum(-1.125l); EXPECT_TRUE(negnum.is_neg()); - EXPECT_EQ(negnum.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(negnum.get_mantissa(), static_cast(0x2) << 108); - EXPECT_EQ(negnum.uintval(), static_cast(0xBFFF2) << 108); + EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(negnum.get_mantissa(), 0x2000000000000000000000000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF2000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negnum).c_str(), "0xBFFF2000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); @@ -621,57 +588,54 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { Float128Bits zero = Float128Bits::zero(Sign::POS); EXPECT_TRUE(zero.is_pos()); - EXPECT_EQ(zero.get_biased_exponent(), static_cast(0x0000)); - EXPECT_EQ(zero.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(zero.uintval(), static_cast(0x0000000000000000) << 64); + EXPECT_EQ(zero.get_biased_exponent(), 0_u16); + EXPECT_EQ(zero.get_mantissa(), 0_u128); + EXPECT_EQ(zero.uintval(), 0_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(zero).c_str(), "0x00000000000000000000000000000000 = " "(S: 0, E: 0x0000, M: 0x00000000000000000000000000000000)"); Float128Bits negzero = Float128Bits::zero(Sign::NEG); EXPECT_TRUE(negzero.is_neg()); - EXPECT_EQ(negzero.get_biased_exponent(), static_cast(0x0000)); - EXPECT_EQ(negzero.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(negzero.uintval(), static_cast(0x1) << 127); + EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); + EXPECT_EQ(negzero.get_mantissa(), 0_u128); + EXPECT_EQ(negzero.uintval(), 0x80000000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negzero).c_str(), "0x80000000000000000000000000000000 = " "(S: 1, E: 0x0000, M: 0x00000000000000000000000000000000)"); Float128Bits one(float128(1.0)); EXPECT_TRUE(one.is_pos()); - EXPECT_EQ(one.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(one.get_mantissa(), static_cast(0x0000000000000000) << 64); - EXPECT_EQ(one.uintval(), static_cast(0x3FFF) << 112); + EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(one.get_mantissa(), 0_u128); + EXPECT_EQ(one.uintval(), 0x3FFF0000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(one).c_str(), "0x3FFF0000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); Float128Bits negone(float128(-1.0)); EXPECT_TRUE(negone.is_neg()); - EXPECT_EQ(negone.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(negone.get_mantissa(), static_cast(0x0000000000000000) - << 64); - EXPECT_EQ(negone.uintval(), static_cast(0xBFFF) << 112); + EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(negone.get_mantissa(), 0_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF0000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negone).c_str(), "0xBFFF0000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); Float128Bits num(float128(1.125)); EXPECT_TRUE(num.is_pos()); - EXPECT_EQ(num.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(num.get_mantissa(), static_cast(0x2) << 108); - EXPECT_EQ(num.uintval(), static_cast(0x3FFF2) << 108); + EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(num.get_mantissa(), 0x2000000000000000000000000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF2000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(num).c_str(), "0x3FFF2000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); Float128Bits negnum(float128(-1.125)); EXPECT_TRUE(negnum.is_neg()); - EXPECT_EQ(negnum.get_biased_exponent(), static_cast(0x3FFF)); - EXPECT_EQ(negnum.get_mantissa(), static_cast(0x2) << 108); - EXPECT_EQ(negnum.uintval(), static_cast(0xBFFF2) << 108); + EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); + EXPECT_EQ(negnum.get_mantissa(), 0x2000000000000000000000000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF2000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negnum).c_str(), "0xBFFF2000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); diff --git a/libc/test/src/__support/integer_literals_test.cpp b/libc/test/src/__support/integer_literals_test.cpp new file mode 100644 index 00000000000000..10c3625a0e5a49 --- /dev/null +++ b/libc/test/src/__support/integer_literals_test.cpp @@ -0,0 +1,134 @@ + +//===-- Unittests for user defined integer literals -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/integer_literals.h" +#include "test/UnitTest/Test.h" + +using LIBC_NAMESPACE::operator""_u8; +using LIBC_NAMESPACE::operator""_u16; +using LIBC_NAMESPACE::operator""_u32; +using LIBC_NAMESPACE::operator""_u64; +using LIBC_NAMESPACE::operator""_u128; +using LIBC_NAMESPACE::operator""_u256; + +TEST(LlvmLibcIntegerLiteralTest, u8) { + EXPECT_EQ(uint8_t(0), 0_u8); + EXPECT_EQ(uint8_t(UINT8_MAX), 255_u8); + EXPECT_EQ(uint8_t(UINT8_MAX), 0xFF_u8); + EXPECT_EQ(uint8_t(UINT8_MAX), 0b11111111_u8); +} + +TEST(LlvmLibcIntegerLiteralTest, u16) { + EXPECT_EQ(uint16_t(0), 0_u16); + EXPECT_EQ(uint16_t(UINT8_MAX), 255_u16); + EXPECT_EQ(uint16_t(UINT8_MAX), 0xFF_u16); + EXPECT_EQ(uint16_t(UINT8_MAX), 0b11111111_u16); + EXPECT_EQ(uint16_t(UINT16_MAX), 65535_u16); + EXPECT_EQ(uint16_t(UINT16_MAX), 0xFFFF_u16); + EXPECT_EQ(uint16_t(UINT16_MAX), 0b11111111'11111111_u16); +} + +TEST(LlvmLibcIntegerLiteralTest, u32) { + EXPECT_EQ(uint32_t(0), 0_u32); + EXPECT_EQ(uint32_t(UINT8_MAX), 255_u32); + EXPECT_EQ(uint32_t(UINT8_MAX), 0xFF_u32); + EXPECT_EQ(uint32_t(UINT8_MAX), 0b11111111_u32); + EXPECT_EQ(uint32_t(UINT16_MAX), 65535_u32); + EXPECT_EQ(uint32_t(UINT16_MAX), 0xFFFF_u32); + EXPECT_EQ(uint32_t(UINT16_MAX), 0b11111111'11111111_u32); + EXPECT_EQ(uint32_t(UINT32_MAX), 4294967295_u32); + EXPECT_EQ(uint32_t(UINT32_MAX), 0xFFFFFFFF_u32); + EXPECT_EQ(uint32_t(UINT32_MAX), 0b1111111111111111'1111111111111111_u32); +} + +TEST(LlvmLibcIntegerLiteralTest, u64) { + EXPECT_EQ(uint64_t(0), 0_u64); + EXPECT_EQ(uint64_t(UINT8_MAX), 255_u64); + EXPECT_EQ(uint64_t(UINT8_MAX), 0xFF_u64); + EXPECT_EQ(uint64_t(UINT8_MAX), 0b11111111_u64); + EXPECT_EQ(uint64_t(UINT16_MAX), 65535_u64); + EXPECT_EQ(uint64_t(UINT16_MAX), 0xFFFF_u64); + EXPECT_EQ(uint64_t(UINT16_MAX), 0b11111111'11111111_u64); + EXPECT_EQ(uint64_t(UINT32_MAX), 4294967295_u64); + EXPECT_EQ(uint64_t(UINT32_MAX), 0xFFFFFFFF_u64); + EXPECT_EQ(uint64_t(UINT32_MAX), 0b1111111111111111'1111111111111111_u64); + EXPECT_EQ(uint64_t(UINT64_MAX), 18446744073709551615_u64); + EXPECT_EQ(uint64_t(UINT64_MAX), 0xFFFFFFFF'FFFFFFFF_u64); + EXPECT_EQ( + uint64_t(UINT64_MAX), + 0b1111111111111111'1111111111111111'1111111111111111'1111111111111111_u64); +} + +TEST(LlvmLibcIntegerLiteralTest, u128) { +#if defined(__SIZEOF_INT128__) + const __uint128_t ZERO = 0; + const __uint128_t U8_MAX = UINT8_MAX; + const __uint128_t U16_MAX = UINT16_MAX; + const __uint128_t U32_MAX = UINT32_MAX; + const __uint128_t U64_MAX = UINT64_MAX; + const __uint128_t U128_MAX = (U64_MAX << 64) | U64_MAX; +#else + const UInt128 ZERO = 0; + const UInt128 U8_MAX = UINT8_MAX; + const UInt128 U16_MAX = UINT16_MAX; + const UInt128 U32_MAX = UINT32_MAX; + const UInt128 U64_MAX = UINT64_MAX; + const UInt128 U128_MAX = (U64_MAX << 64) | U64_MAX; +#endif + EXPECT_EQ(ZERO, 0_u128); + EXPECT_EQ(U8_MAX, 255_u128); + EXPECT_EQ(U8_MAX, 0xFF_u128); + EXPECT_EQ(U8_MAX, 0b11111111_u128); + EXPECT_EQ(U16_MAX, 65535_u128); + EXPECT_EQ(U16_MAX, 0xFFFF_u128); + EXPECT_EQ(U16_MAX, 0b11111111'11111111_u128); + EXPECT_EQ(U32_MAX, 4294967295_u128); + EXPECT_EQ(U32_MAX, 0xFFFFFFFF_u128); + EXPECT_EQ(U32_MAX, 0b1111111111111111'1111111111111111_u128); + EXPECT_EQ(U64_MAX, 18446744073709551615_u128); + EXPECT_EQ(U64_MAX, 0xFFFFFFFF'FFFFFFFF_u128); + EXPECT_EQ( + U64_MAX, + 0b1111111111111111'1111111111111111'1111111111111111'1111111111111111_u128); + EXPECT_EQ(U128_MAX, 340282366920938463463374607431768211455_u128); + EXPECT_EQ(U128_MAX, 0xFFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF_u128); + EXPECT_EQ( + U128_MAX, + 0b1111111111111111'1111111111111111'1111111111111111'1111111111111111'1111111111111111'1111111111111111'1111111111111111'1111111111111111_u128); +} + +TEST(LlvmLibcIntegerLiteralTest, u256) { + using UInt256 = LIBC_NAMESPACE::cpp::UInt<256>; + const UInt256 ZERO = 0; + const UInt256 U8_MAX = UINT8_MAX; + const UInt256 U16_MAX = UINT16_MAX; + const UInt256 U32_MAX = UINT32_MAX; + const UInt256 U64_MAX = UINT64_MAX; + const UInt256 U128_MAX = (U64_MAX << 64) | U64_MAX; + const UInt256 U256_MAX = (U128_MAX << 128) | U128_MAX; + EXPECT_EQ(ZERO, 0_u256); + EXPECT_EQ(U8_MAX, 255_u256); + EXPECT_EQ(U8_MAX, 0xFF_u256); + EXPECT_EQ(U8_MAX, 0b11111111_u256); + EXPECT_EQ(U16_MAX, 65535_u256); + EXPECT_EQ(U16_MAX, 0xFFFF_u256); + EXPECT_EQ(U16_MAX, 0b11111111'11111111_u256); + EXPECT_EQ(U32_MAX, 4294967295_u256); + EXPECT_EQ(U32_MAX, 0xFFFFFFFF_u256); + EXPECT_EQ(U32_MAX, 0b1111111111111111'1111111111111111_u256); + EXPECT_EQ(U64_MAX, 18446744073709551615_u256); + EXPECT_EQ(U64_MAX, 0xFFFFFFFF'FFFFFFFF_u256); + EXPECT_EQ( + U64_MAX, + 0b1111111111111111'1111111111111111'1111111111111111'1111111111111111_u256); + EXPECT_EQ(U128_MAX, 0xFFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF_u256); + EXPECT_EQ( + U256_MAX, + 0xFFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF_u256); +} diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 30c180b5a8b473..fde2bac746f4f8 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -472,6 +472,15 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_integer_literals", + hdrs = ["src/__support/integer_literals.h"], + deps = [ + ":__support_cpp_limits", + ":__support_uint128", + ], +) + libc_support_library( name = "__support_str_to_num_result", hdrs = ["src/__support/str_to_num_result.h"], diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index 22f4d03ee900b6..e691d3c3d2ebdd 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -99,3 +99,11 @@ libc_test( "//libc:__support_char_vector", ], ) + +libc_test( + name = "integer_literals_test", + srcs = ["integer_literals_test.cpp"], + deps = [ + "//libc:__support_integer_literals", + ], +) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel index 461d5127a42a74..76443fc5d9f85b 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel @@ -16,6 +16,7 @@ libc_test( deps = [ "//libc:__support_fputil_fp_bits", "//libc:__support_fputil_fpbits_str", + "//libc:__support_integer_literals", ], ) From f723260a80baee08394093fa06777dcc55b3ee4a Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 14 Feb 2024 13:16:20 +0000 Subject: [PATCH 132/240] [TableGen] Stop using make_pair and make_tuple. NFC. (#81730) These are unnecessary since C++17. --- llvm/utils/TableGen/AsmMatcherEmitter.cpp | 22 ++++++------ llvm/utils/TableGen/AsmWriterEmitter.cpp | 15 ++++---- llvm/utils/TableGen/CTagsEmitter.cpp | 4 +-- llvm/utils/TableGen/CodeEmitterGen.cpp | 4 +-- llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 34 +++++++++---------- llvm/utils/TableGen/CodeGenHwModes.cpp | 6 ++-- llvm/utils/TableGen/CodeGenInstAlias.cpp | 6 ++-- llvm/utils/TableGen/CodeGenInstruction.cpp | 8 ++--- llvm/utils/TableGen/CodeGenInstruction.h | 2 +- llvm/utils/TableGen/CodeGenRegisters.cpp | 34 +++++++++---------- llvm/utils/TableGen/CodeGenRegisters.h | 5 ++- llvm/utils/TableGen/CodeGenSchedule.cpp | 4 +-- llvm/utils/TableGen/CodeGenTarget.cpp | 4 +-- llvm/utils/TableGen/DAGISelMatcherEmitter.cpp | 2 +- llvm/utils/TableGen/DAGISelMatcherGen.cpp | 6 ++-- llvm/utils/TableGen/DAGISelMatcherOpt.cpp | 4 +-- llvm/utils/TableGen/DFAEmitter.cpp | 4 +-- llvm/utils/TableGen/DXILEmitter.cpp | 2 +- llvm/utils/TableGen/DecoderEmitter.cpp | 20 +++++------ llvm/utils/TableGen/FastISelEmitter.cpp | 2 +- .../TableGen/GlobalISelCombinerEmitter.cpp | 4 +-- llvm/utils/TableGen/GlobalISelEmitter.cpp | 4 +-- llvm/utils/TableGen/GlobalISelMatchTable.cpp | 22 ++++++------ llvm/utils/TableGen/GlobalISelMatchTable.h | 2 +- llvm/utils/TableGen/InfoByHwMode.cpp | 4 +-- llvm/utils/TableGen/InfoByHwMode.h | 4 +-- llvm/utils/TableGen/IntrinsicEmitter.cpp | 2 +- llvm/utils/TableGen/OptParserEmitter.cpp | 4 +-- .../utils/TableGen/SearchableTableEmitter.cpp | 6 ++-- llvm/utils/TableGen/SequenceToOffsetTable.h | 2 +- .../WebAssemblyDisassemblerEmitter.cpp | 2 +- .../TableGen/X86CompressEVEXTablesEmitter.cpp | 13 ++++--- llvm/utils/TableGen/X86DisassemblerTables.cpp | 4 +-- llvm/utils/TableGen/X86FoldTablesEmitter.cpp | 34 +++++++++---------- 34 files changed, 144 insertions(+), 151 deletions(-) diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 2c2c39a3ef54ff..3fcf2d87cc36ae 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -1277,7 +1277,7 @@ void AsmMatcherInfo::buildRegisterClasses( if (!ContainingSet.empty()) { RegisterSets.insert(ContainingSet); - RegisterMap.insert(std::make_pair(CGR.TheDef, ContainingSet)); + RegisterMap.insert(std::pair(CGR.TheDef, ContainingSet)); } } @@ -1298,7 +1298,7 @@ void AsmMatcherInfo::buildRegisterClasses( CI->DiagnosticType = ""; CI->IsOptional = false; CI->DefaultMethod = ""; // unused - RegisterSetClasses.insert(std::make_pair(RS, CI)); + RegisterSetClasses.insert(std::pair(RS, CI)); ++Index; } @@ -1340,7 +1340,7 @@ void AsmMatcherInfo::buildRegisterClasses( if (!CI->DiagnosticString.empty() && CI->DiagnosticType.empty()) CI->DiagnosticType = RC.getName(); - RegisterClassClasses.insert(std::make_pair(Def, CI)); + RegisterClassClasses.insert(std::pair(Def, CI)); } // Populate the map for individual registers. @@ -2193,7 +2193,7 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName, ConversionRow.push_back(SrcOp2); // Also create an 'enum' for this combination of tied operands. - auto Key = std::make_tuple(TiedOp, SrcOp1, SrcOp2); + auto Key = std::tuple(TiedOp, SrcOp1, SrcOp2); TiedOperandsEnumMap.emplace(Key, TiedTupleName); break; } @@ -2342,9 +2342,9 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName, // For a tied operand, emit a reference to the TiedAsmOperandTable // that contains the operand to copy, and the parsed operands to // check for their tied constraints. - auto Key = std::make_tuple((uint8_t)ConversionTable[Row][i + 1], - (uint8_t)ConversionTable[Row][i + 2], - (uint8_t)ConversionTable[Row][i + 3]); + auto Key = std::tuple((uint8_t)ConversionTable[Row][i + 1], + (uint8_t)ConversionTable[Row][i + 2], + (uint8_t)ConversionTable[Row][i + 3]); auto TiedOpndEnum = TiedOperandsEnumMap.find(Key); assert(TiedOpndEnum != TiedOperandsEnumMap.end() && "No record for tied operand pair"); @@ -2812,7 +2812,7 @@ emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info, MatchCode += "return;"; - Cases.push_back(std::make_pair(AliasEntry.first, MatchCode)); + Cases.push_back(std::pair(AliasEntry.first, MatchCode)); } StringMatcher("Mnemonic", Cases, OS).Emit(Indent); } @@ -2979,7 +2979,7 @@ emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, "std::end(OperandMatchTable),\n"; OS << " Mnemonic, LessOpcodeOperand());\n\n"; } else { - OS << " auto MnemonicRange = std::make_pair(std::begin(OperandMatchTable)," + OS << " auto MnemonicRange = std::pair(std::begin(OperandMatchTable)," " std::end(OperandMatchTable));\n"; OS << " if (!Mnemonic.empty())\n"; OS << " MnemonicRange =\n"; @@ -3154,7 +3154,7 @@ static void emitMnemonicChecker(raw_ostream &OS, CodeGenTarget &Target, OS << " auto MnemonicRange = " "std::equal_range(Start, End, Mnemonic, LessOpcode());\n\n"; } else { - OS << " auto MnemonicRange = std::make_pair(Start, End);\n"; + OS << " auto MnemonicRange = std::pair(Start, End);\n"; OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n"; OS << " if (!Mnemonic.empty())\n"; OS << " MnemonicRange = " @@ -3629,7 +3629,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " auto MnemonicRange = " "std::equal_range(Start, End, Mnemonic, LessOpcode());\n\n"; } else { - OS << " auto MnemonicRange = std::make_pair(Start, End);\n"; + OS << " auto MnemonicRange = std::pair(Start, End);\n"; OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n"; OS << " if (!Mnemonic.empty())\n"; OS << " MnemonicRange = " diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp index c05991fdbb1667..a27061ee585a59 100644 --- a/llvm/utils/TableGen/AsmWriterEmitter.cpp +++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp @@ -144,13 +144,12 @@ static void EmitInstructions(std::vector &Insts, raw_ostream &O, O << " switch (MI->getOpcode()) {\n"; O << " default: llvm_unreachable(\"Unexpected opcode.\");\n"; std::vector> OpsToPrint; - OpsToPrint.push_back( - std::make_pair(FirstInst.CGI->Namespace.str() + - "::" + FirstInst.CGI->TheDef->getName().str(), - FirstInst.Operands[i])); + OpsToPrint.push_back(std::pair(FirstInst.CGI->Namespace.str() + "::" + + FirstInst.CGI->TheDef->getName().str(), + FirstInst.Operands[i])); for (const AsmWriterInst &AWI : SimilarInsts) { - OpsToPrint.push_back(std::make_pair( + OpsToPrint.push_back(std::pair( AWI.CGI->Namespace.str() + "::" + AWI.CGI->TheDef->getName().str(), AWI.Operands[i])); } @@ -722,7 +721,7 @@ class IAPrinter { void addOperand(StringRef Op, int OpIdx, int PrintMethodIdx = -1) { assert(OpIdx >= 0 && OpIdx < 0xFE && "Idx out of range"); assert(PrintMethodIdx >= -1 && PrintMethodIdx < 0xFF && "Idx out of range"); - OpMap[Op] = std::make_pair(OpIdx, PrintMethodIdx); + OpMap[Op] = std::pair(OpIdx, PrintMethodIdx); } unsigned getNumMIOps() { return NumMIOps; } @@ -753,7 +752,7 @@ class IAPrinter { Next = I; } - return std::make_pair(StringRef(Start, I - Start), Next); + return std::pair(StringRef(Start, I - Start), Next); } std::string formatAliasString(uint32_t &UnescapedSize) { @@ -858,7 +857,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { const DagInit *DI = R->getValueAsDag("ResultInst"); AliasMap[getQualifiedName(DI->getOperatorAsDef(R->getLoc()))].insert( - std::make_pair(CodeGenInstAlias(R, Target), Priority)); + std::pair(CodeGenInstAlias(R, Target), Priority)); } // A map of which conditions need to be met for each instruction operand diff --git a/llvm/utils/TableGen/CTagsEmitter.cpp b/llvm/utils/TableGen/CTagsEmitter.cpp index bda18936dc771d..e21dc36a6e959a 100644 --- a/llvm/utils/TableGen/CTagsEmitter.cpp +++ b/llvm/utils/TableGen/CTagsEmitter.cpp @@ -40,8 +40,8 @@ class Tag { Line = LineAndColumn.first; } int operator<(const Tag &B) const { - return std::make_tuple(Id, BufferIdentifier, Line) < - std::make_tuple(B.Id, B.BufferIdentifier, B.Line); + return std::tuple(Id, BufferIdentifier, Line) < + std::tuple(B.Id, B.BufferIdentifier, B.Line); } void emit(raw_ostream &OS) const { OS << Id << "\t" << BufferIdentifier << "\t" << Line << "\n"; diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp index d7020d1d78b0aa..d80761d5fe35d2 100644 --- a/llvm/utils/TableGen/CodeEmitterGen.cpp +++ b/llvm/utils/TableGen/CodeEmitterGen.cpp @@ -300,11 +300,11 @@ CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) { append(" }\n"); } append(" }\n"); - return std::make_pair(std::move(Case), std::move(BitOffsetCase)); + return std::pair(std::move(Case), std::move(BitOffsetCase)); } } addInstructionCasesForEncoding(R, R, Target, Case, BitOffsetCase); - return std::make_pair(std::move(Case), std::move(BitOffsetCase)); + return std::pair(std::move(Case), std::move(BitOffsetCase)); } void CodeEmitterGen::addInstructionCasesForEncoding( diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index a9046e09a62976..ca6aa4b251e9b2 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -530,24 +530,24 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small, TypeSetByHwMode &Big, auto LT = [](MVT A, MVT B) -> bool { // Always treat non-scalable MVTs as smaller than scalable MVTs for the // purposes of ordering. - auto ASize = std::make_tuple(A.isScalableVector(), A.getScalarSizeInBits(), - A.getSizeInBits().getKnownMinValue()); - auto BSize = std::make_tuple(B.isScalableVector(), B.getScalarSizeInBits(), - B.getSizeInBits().getKnownMinValue()); + auto ASize = std::tuple(A.isScalableVector(), A.getScalarSizeInBits(), + A.getSizeInBits().getKnownMinValue()); + auto BSize = std::tuple(B.isScalableVector(), B.getScalarSizeInBits(), + B.getSizeInBits().getKnownMinValue()); return ASize < BSize; }; auto SameKindLE = [](MVT A, MVT B) -> bool { // This function is used when removing elements: when a vector is compared // to a non-vector or a scalable vector to any non-scalable MVT, it should // return false (to avoid removal). - if (std::make_tuple(A.isVector(), A.isScalableVector()) != - std::make_tuple(B.isVector(), B.isScalableVector())) + if (std::tuple(A.isVector(), A.isScalableVector()) != + std::tuple(B.isVector(), B.isScalableVector())) return false; - return std::make_tuple(A.getScalarSizeInBits(), - A.getSizeInBits().getKnownMinValue()) <= - std::make_tuple(B.getScalarSizeInBits(), - B.getSizeInBits().getKnownMinValue()); + return std::tuple(A.getScalarSizeInBits(), + A.getSizeInBits().getKnownMinValue()) <= + std::tuple(B.getScalarSizeInBits(), + B.getSizeInBits().getKnownMinValue()); }; for (unsigned M : Modes) { @@ -751,8 +751,8 @@ bool TypeInfer::EnforceSameNumElts(TypeSetByHwMode &V, TypeSetByHwMode &W) { namespace { struct TypeSizeComparator { bool operator()(const TypeSize &LHS, const TypeSize &RHS) const { - return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) < - std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue()); + return std::tuple(LHS.isScalable(), LHS.getKnownMinValue()) < + std::tuple(RHS.isScalable(), RHS.getKnownMinValue()); } }; } // end anonymous namespace @@ -2988,7 +2988,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit, // Check that the ComplexPattern uses are consistent: "(MY_PAT $a, $b)" // and "(MY_PAT $b, $a)" should not be allowed in the same pattern; // neither should "(MY_PAT_1 $a, $b)" and "(MY_PAT_2 $a, $b)". - auto OperandId = std::make_pair(Operator, i); + auto OperandId = std::pair(Operator, i); auto PrevOp = ComplexPatternOperands.find(Child->getName()); if (PrevOp != ComplexPatternOperands.end()) { if (PrevOp->getValue() != OperandId) @@ -3197,7 +3197,7 @@ void CodeGenDAGPatterns::ParseNodeInfo() { while (!Nodes.empty()) { Record *R = Nodes.back(); - SDNodes.insert(std::make_pair(R, SDNodeInfo(R, CGH))); + SDNodes.insert(std::pair(R, SDNodeInfo(R, CGH))); Nodes.pop_back(); } @@ -3217,7 +3217,7 @@ void CodeGenDAGPatterns::ParseNodeTransforms() { Record *SDNode = XFormNode->getValueAsDef("Opcode"); StringRef Code = XFormNode->getValueAsString("XFormFunction"); SDNodeXForms.insert( - std::make_pair(XFormNode, NodeXForm(SDNode, std::string(Code)))); + std::pair(XFormNode, NodeXForm(SDNode, std::string(Code)))); Xforms.pop_back(); } @@ -3227,7 +3227,7 @@ void CodeGenDAGPatterns::ParseComplexPatterns() { std::vector AMs = Records.getAllDerivedDefinitions("ComplexPattern"); while (!AMs.empty()) { - ComplexPatterns.insert(std::make_pair(AMs.back(), AMs.back())); + ComplexPatterns.insert(std::pair(AMs.back(), AMs.back())); AMs.pop_back(); } } @@ -3340,7 +3340,7 @@ void CodeGenDAGPatterns::ParseDefaultOperands() { std::vector> Ops; for (unsigned op = 0, e = DefaultInfo->getNumArgs(); op != e; ++op) Ops.push_back( - std::make_pair(DefaultInfo->getArg(op), DefaultInfo->getArgName(op))); + std::pair(DefaultInfo->getArg(op), DefaultInfo->getArgName(op))); DagInit *DI = DagInit::get(SomeSDNode, nullptr, Ops); // Create a TreePattern to parse this. diff --git a/llvm/utils/TableGen/CodeGenHwModes.cpp b/llvm/utils/TableGen/CodeGenHwModes.cpp index 7c08c7516054ec..fec74d29c8bbbd 100644 --- a/llvm/utils/TableGen/CodeGenHwModes.cpp +++ b/llvm/utils/TableGen/CodeGenHwModes.cpp @@ -52,7 +52,7 @@ HwModeSelect::HwModeSelect(Record *R, CodeGenHwModes &CGH) { } for (unsigned i = 0, e = Modes.size(); i != e; ++i) { unsigned ModeId = CGH.getHwModeId(Modes[i]); - Items.push_back(std::make_pair(ModeId, Objects[i])); + Items.push_back(std::pair(ModeId, Objects[i])); } } @@ -71,11 +71,11 @@ CodeGenHwModes::CodeGenHwModes(RecordKeeper &RK) : Records(RK) { if (R->getName() == DefaultModeName) continue; Modes.emplace_back(R); - ModeIds.insert(std::make_pair(R, Modes.size())); + ModeIds.insert(std::pair(R, Modes.size())); } for (Record *R : Records.getAllDerivedDefinitions("HwModeSelect")) { - auto P = ModeSelects.emplace(std::make_pair(R, HwModeSelect(R, *this))); + auto P = ModeSelects.emplace(std::pair(R, HwModeSelect(R, *this))); assert(P.second); (void)P; } diff --git a/llvm/utils/TableGen/CodeGenInstAlias.cpp b/llvm/utils/TableGen/CodeGenInstAlias.cpp index 8634d45eafc707..d217059542b1ad 100644 --- a/llvm/utils/TableGen/CodeGenInstAlias.cpp +++ b/llvm/utils/TableGen/CodeGenInstAlias.cpp @@ -227,7 +227,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) { InstOpRec->getValueAsDef("ParserMatchClass") ->getValueAsString("Name") != "Imm")) { ResultOperands.push_back(ResOp); - ResultInstOperandIndex.push_back(std::make_pair(i, -1)); + ResultInstOperandIndex.push_back(std::pair(i, -1)); ++AliasOpNo; // Otherwise, we need to match each of the suboperands individually. @@ -242,7 +242,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) { Result->getArgName(AliasOpNo)->getAsUnquotedString() + "." + MIOI->getArgName(SubOp)->getAsUnquotedString(), SubRec); - ResultInstOperandIndex.push_back(std::make_pair(i, SubOp)); + ResultInstOperandIndex.push_back(std::pair(i, SubOp)); } ++AliasOpNo; } @@ -260,7 +260,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) { if (tryAliasOpMatch(Result, AliasOpNo, SubRec, false, R->getLoc(), T, ResOp)) { ResultOperands.push_back(ResOp); - ResultInstOperandIndex.push_back(std::make_pair(i, SubOp)); + ResultInstOperandIndex.push_back(std::pair(i, SubOp)); ++AliasOpNo; } else { PrintFatalError( diff --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp index a569194ef23cbc..b00b95da5fc276 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.cpp +++ b/llvm/utils/TableGen/CodeGenInstruction.cpp @@ -175,7 +175,7 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) { } OpInfo.SubOpNames[j] = SubArgName; - SubOpAliases[SubArgName] = std::make_pair(i, j); + SubOpAliases[SubArgName] = std::pair(i, j); } } else if (!EncoderMethod.empty()) { // If we have no explicit sub-op dag, but have an top-level encoder @@ -276,7 +276,7 @@ CGIOperandList::ParseOperandName(StringRef Op, bool AllowWholeOp) { Op + "'"); // Otherwise, return the operand. - return std::make_pair(OpIdx, 0U); + return std::pair(OpIdx, 0U); } // Find the suboperand number involved. @@ -289,13 +289,13 @@ CGIOperandList::ParseOperandName(StringRef Op, bool AllowWholeOp) { // Find the operand with the right name. for (unsigned i = 0, e = MIOpInfo->getNumArgs(); i != e; ++i) if (MIOpInfo->getArgNameStr(i) == SubOpName) - return std::make_pair(OpIdx, i); + return std::pair(OpIdx, i); // Otherwise, didn't find it! PrintFatalError(TheDef->getLoc(), TheDef->getName() + ": unknown suboperand name in '" + Op + "'"); - return std::make_pair(0U, 0U); + return std::pair(0U, 0U); } static void ParseConstraint(StringRef CStr, CGIOperandList &Ops, Record *Rec) { diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h index ca7b1e9f7f4a4f..11a3acd8e72335 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.h +++ b/llvm/utils/TableGen/CodeGenInstruction.h @@ -205,7 +205,7 @@ class CGIOperandList { for (unsigned i = 0;; ++i) { assert(i < OperandList.size() && "Invalid flat operand #"); if (OperandList[i].MIOperandNo + OperandList[i].MINumOperands > Op) - return std::make_pair(i, Op - OperandList[i].MIOperandNo); + return std::pair(i, Op - OperandList[i].MIOperandNo); } } diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index 7d266c8896d8e3..5890f0f40e4059 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -281,13 +281,13 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { CodeGenSubRegIndex *Idx = ExplicitSubRegIndices[i]; if (!SR->Artificial) Idx->Artificial = false; - if (!SubRegs.insert(std::make_pair(Idx, SR)).second) + if (!SubRegs.insert(std::pair(Idx, SR)).second) PrintFatalError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() + " appears twice in Register " + getName()); // Map explicit sub-registers first, so the names take precedence. // The inherited sub-registers are mapped below. - SubReg2Idx.insert(std::make_pair(SR, Idx)); + SubReg2Idx.insert(std::pair(SR, Idx)); } // Keep track of inherited subregs and how they can be reached. @@ -327,7 +327,7 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { if (SubRegs.count(Comp.second) || !Orphans.erase(SRI->second)) continue; // We found a new name for the orphaned sub-register. - SubRegs.insert(std::make_pair(Comp.second, SRI->second)); + SubRegs.insert(std::pair(Comp.second, SRI->second)); Indices.push_back(Comp.second); } } @@ -374,7 +374,7 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { // Ensure that every sub-register has a unique name. DenseMap::iterator Ins = - SubReg2Idx.insert(std::make_pair(SubReg.second, SubReg.first)).first; + SubReg2Idx.insert(std::pair(SubReg.second, SubReg.first)).first; if (Ins->second == SubReg.first) continue; // Trouble: Two different names for SubReg.second. @@ -520,7 +520,7 @@ void CodeGenRegister::computeSecondarySubRegs(CodeGenRegBank &RegBank) { // a sub-register with a concatenated sub-register index. CodeGenSubRegIndex *Concat = RegBank.getConcatSubRegIndex(Parts); std::pair NewSubReg = - std::make_pair(Concat, Cand); + std::pair(Concat, Cand); if (!SubRegs.insert(NewSubReg).second) continue; @@ -528,7 +528,7 @@ void CodeGenRegister::computeSecondarySubRegs(CodeGenRegBank &RegBank) { // We inserted a new subregister. NewSubRegs.push_back(NewSubReg); SubRegQueue.push(NewSubReg); - SubReg2Idx.insert(std::make_pair(Cand, Concat)); + SubReg2Idx.insert(std::pair(Cand, Concat)); } } @@ -1074,7 +1074,7 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs( BitVector SuperRegClassesBV(RegClasses.size()); RC.getSuperRegClasses(SubIdx, SuperRegClassesBV); if (SuperRegClassesBV.any()) - SuperRegClasses.push_back(std::make_pair(&RC, SuperRegClassesBV)); + SuperRegClasses.push_back(std::pair(&RC, SuperRegClassesBV)); } llvm::stable_sort(SuperRegClasses, [&](const std::pair &A, @@ -1110,14 +1110,14 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs( // aren't subregisters of SuperRegRC whereas GR32 has a direct 1:1 // mapping. if (SuperRegRC->getMembers().size() >= SubRegRC->getMembers().size()) - return std::make_pair(ChosenSuperRegClass, SubRegRC); + return std::pair(ChosenSuperRegClass, SubRegRC); } } // If we found a fit but it wasn't quite ideal because SubRegRC had excess // registers, then we're done. if (ChosenSuperRegClass) - return std::make_pair(ChosenSuperRegClass, SubRegRC); + return std::pair(ChosenSuperRegClass, SubRegRC); } return std::nullopt; @@ -1230,7 +1230,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records, // entries? (or maybe there's a reason for it - I don't know much about this // code, just drive-by refactoring) RegistersByName.insert( - std::make_pair(Reg.TheDef->getValueAsString("AsmName"), &Reg)); + std::pair(Reg.TheDef->getValueAsString("AsmName"), &Reg)); // Precompute all sub-register maps. // This will create Composite entries for all inferred sub-register indices. @@ -1242,10 +1242,10 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records, for (CodeGenSubRegIndex &SRI : SubRegIndices) { SRI.computeConcatTransitiveClosure(); if (!SRI.ConcatenationOf.empty()) - ConcatIdx.insert(std::make_pair( - SmallVector(SRI.ConcatenationOf.begin(), - SRI.ConcatenationOf.end()), - &SRI)); + ConcatIdx.insert( + std::pair(SmallVector( + SRI.ConcatenationOf.begin(), SRI.ConcatenationOf.end()), + &SRI)); } // Infer even more sub-registers by combining leading super-registers. @@ -1336,12 +1336,12 @@ CodeGenRegister *CodeGenRegBank::getReg(Record *Def) { void CodeGenRegBank::addToMaps(CodeGenRegisterClass *RC) { if (Record *Def = RC->getDef()) - Def2RC.insert(std::make_pair(Def, RC)); + Def2RC.insert(std::pair(Def, RC)); // Duplicate classes are rejected by insert(). // That's OK, we only care about the properties handled by CGRC::Key. CodeGenRegisterClass::Key K(*RC); - Key2RC.insert(std::make_pair(K, RC)); + Key2RC.insert(std::pair(K, RC)); } // Create a synthetic sub-class if it is missing. @@ -1472,7 +1472,7 @@ void CodeGenRegBank::computeComposites() { SmallSet UserDefined; for (const CodeGenSubRegIndex &Idx : SubRegIndices) for (auto P : Idx.getComposites()) - UserDefined.insert(std::make_pair(&Idx, P.first)); + UserDefined.insert(std::pair(&Idx, P.first)); // Keep track of TopoSigs visited. We only need to visit each TopoSig once, // and many registers will share TopoSigs on regular architectures. diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h index fc5cd67e2d553d..61e8e7c857e921 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.h +++ b/llvm/utils/TableGen/CodeGenRegisters.h @@ -111,8 +111,7 @@ class CodeGenSubRegIndex { CodeGenSubRegIndex *addComposite(CodeGenSubRegIndex *A, CodeGenSubRegIndex *B) { assert(A && B); - std::pair Ins = - Composed.insert(std::make_pair(A, B)); + std::pair Ins = Composed.insert(std::pair(A, B)); // Synthetic subreg indices that aren't contiguous (for instance ARM // register tuples) don't have a bit range, so it's OK to let // B->Offset == -1. For the other cases, accumulate the offset and set @@ -706,7 +705,7 @@ class CodeGenRegBank { // This function is only for use by CodeGenRegister::computeSuperRegs(). // Others should simply use Reg->getTopoSig(). unsigned getTopoSig(const TopoSigId &Id) { - return TopoSigs.insert(std::make_pair(Id, TopoSigs.size())).first->second; + return TopoSigs.insert(std::pair(Id, TopoSigs.size())).first->second; } // Create a native register unit that is associated with one or two root diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp index e56bf5bdee634b..b4c624703626c3 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -338,7 +338,7 @@ static void processSTIPredicate(STIPredicateFunction &Fn, APInt DefaultProcMask(ProcModelMap.size(), 0); APInt DefaultPredMask(NumUniquePredicates, 0); for (std::pair &MaskPair : OpcodeMasks) - MaskPair = std::make_pair(DefaultProcMask, DefaultPredMask); + MaskPair = std::pair(DefaultProcMask, DefaultPredMask); // Construct a OpcodeInfo object for every unique opcode declared by an // InstructionEquivalenceClass definition. @@ -564,7 +564,7 @@ void CodeGenSchedModels::collectProcModels() { /// ProcessorItineraries. void CodeGenSchedModels::addProcModel(Record *ProcDef) { Record *ModelKey = getModelOrItinDef(ProcDef); - if (!ProcModelMap.insert(std::make_pair(ModelKey, ProcModels.size())).second) + if (!ProcModelMap.insert(std::pair(ModelKey, ProcModels.size())).second) return; std::string Name = std::string(ModelKey->getName()); diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 8e2957efe85406..f26815c2f184fa 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -534,8 +534,8 @@ void CodeGenTarget::ComputeInstrsByEnum() const { [](const CodeGenInstruction *Rec1, const CodeGenInstruction *Rec2) { const auto &D1 = *Rec1->TheDef; const auto &D2 = *Rec2->TheDef; - return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < - std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); + return std::tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < + std::tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); }); // Assign an enum value to each instruction according to the sorted order. diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp index b475c98d30a6e0..533b8c42369029 100644 --- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -73,7 +73,7 @@ class MatcherTableEmitter { unsigned getPatternIdxFromTable(std::string &&P, std::string &&include_loc) { const auto It = VecPatterns.find(P); if (It == VecPatterns.end()) { - VecPatterns.insert(make_pair(std::move(P), VecPatterns.size())); + VecPatterns.insert(std::pair(std::move(P), VecPatterns.size())); VecIncludeStrings.push_back(std::move(include_loc)); return VecIncludeStrings.size() - 1; } diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp index 956cb5e4a65aad..e8bdabaa0c7e89 100644 --- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp @@ -252,7 +252,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode &N) { if (LeafRec->isSubClassOf("Register")) { AddMatcher(new RecordMatcher("physreg input " + LeafRec->getName().str(), NextRecordedOperandNo)); - PhysRegInputs.push_back(std::make_pair(LeafRec, NextRecordedOperandNo++)); + PhysRegInputs.push_back(std::pair(LeafRec, NextRecordedOperandNo++)); return; } @@ -272,7 +272,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode &N) { // Remember this ComplexPattern so that we can emit it after all the other // structural matches are done. unsigned InputOperand = VariableMap[N.getName()] - 1; - MatchedComplexPatterns.push_back(std::make_pair(&N, InputOperand)); + MatchedComplexPatterns.push_back(std::pair(&N, InputOperand)); return; } @@ -313,7 +313,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode &N, } if (recordUniqueNode(PatternName)) { - auto NodeAndOpNum = std::make_pair(&N, NextRecordedOperandNo - 1); + auto NodeAndOpNum = std::pair(&N, NextRecordedOperandNo - 1); MatchedComplexPatterns.push_back(NodeAndOpNum); } diff --git a/llvm/utils/TableGen/DAGISelMatcherOpt.cpp b/llvm/utils/TableGen/DAGISelMatcherOpt.cpp index f786d41c88d7ce..047d285f9914e5 100644 --- a/llvm/utils/TableGen/DAGISelMatcherOpt.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherOpt.cpp @@ -425,7 +425,7 @@ static void FactorNodes(std::unique_ptr &InputMatcherPtr) { CheckOpcodeMatcher *COM = cast(OptionsToMatch[i]); assert(Opcodes.insert(COM->getOpcode().getEnumName()).second && "Duplicate opcodes not factored?"); - Cases.push_back(std::make_pair(&COM->getOpcode(), COM->takeNext())); + Cases.push_back(std::pair(&COM->getOpcode(), COM->takeNext())); delete COM; } @@ -462,7 +462,7 @@ static void FactorNodes(std::unique_ptr &InputMatcherPtr) { } Entry = Cases.size() + 1; - Cases.push_back(std::make_pair(CTMTy, MatcherWithoutCTM)); + Cases.push_back(std::pair(CTMTy, MatcherWithoutCTM)); } // Make sure we recursively factor any scopes we may have created. diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp index 0d22ad251fba41..ce8cc2a078d717 100644 --- a/llvm/utils/TableGen/DFAEmitter.cpp +++ b/llvm/utils/TableGen/DFAEmitter.cpp @@ -81,7 +81,7 @@ void DfaEmitter::visitDfaState(const DfaState &DS) { sort(TI); TI.erase(std::unique(TI.begin(), TI.end()), TI.end()); unsigned ToId = DfaStates.insert(NewStates); - DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI)); + DfaTransitions.emplace(std::pair(FromId, A), std::pair(ToId, TI)); } } @@ -353,7 +353,7 @@ void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; } void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { const ActionTuple &AT = Actions[A]; if (AT.size() > 1) - OS << "std::make_tuple("; + OS << "std::tuple("; ListSeparator LS; for (const auto &SingleAction : AT) { OS << LS; diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 3378a904ac404d..768e8052975b7f 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -245,7 +245,7 @@ static void emitDXILEnums(std::vector &Ops, std::vector> ClassVec; for (auto &It : ClassMap) { ClassVec.emplace_back( - std::make_pair(It.getKey().str(), buildCategoryStr(It.second))); + std::pair(It.getKey().str(), buildCategoryStr(It.second))); } // Sort by Category + ClassName. llvm::sort(ClassVec, [](std::pair &A, diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 2f28ccbe6d6ceb..02d9527fd9ed91 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -614,7 +614,7 @@ void Filter::recurse() { // Delegates to an inferior filter chooser for further processing on this // group of instructions whose segment values are variable. - FilterChooserMap.insert(std::make_pair( + FilterChooserMap.insert(std::pair( NO_FIXED_SEGMENTS_SENTINEL, std::make_unique(Owner->AllInstructions, VariableInstructions, Owner->Operands, @@ -641,10 +641,10 @@ void Filter::recurse() { // Delegates to an inferior filter chooser for further processing on this // category of instructions. - FilterChooserMap.insert(std::make_pair( - Inst.first, std::make_unique( - Owner->AllInstructions, Inst.second, Owner->Operands, - BitValueArray, *Owner))); + FilterChooserMap.insert( + std::pair(Inst.first, std::make_unique( + Owner->AllInstructions, Inst.second, + Owner->Operands, BitValueArray, *Owner))); } } @@ -1908,7 +1908,7 @@ void parseVarLenInstOperand(const Record &Def, int TiedReg = TiedTo[OpSubOpPair.first]; if (TiedReg != -1) { unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( - std::make_pair(TiedReg, OpSubOpPair.second)); + std::pair(TiedReg, OpSubOpPair.second)); Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); } } @@ -2005,11 +2005,9 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, DagInit *Out = Def.getValueAsDag("OutOperandList"); DagInit *In = Def.getValueAsDag("InOperandList"); for (unsigned i = 0; i < Out->getNumArgs(); ++i) - InOutOperands.push_back( - std::make_pair(Out->getArg(i), Out->getArgNameStr(i))); + InOutOperands.push_back(std::pair(Out->getArg(i), Out->getArgNameStr(i))); for (unsigned i = 0; i < In->getNumArgs(); ++i) - InOutOperands.push_back( - std::make_pair(In->getArg(i), In->getArgNameStr(i))); + InOutOperands.push_back(std::pair(In->getArg(i), In->getArgNameStr(i))); // Search for tied operands, so that we can correctly instantiate // operands that are not explicitly represented in the encoding. @@ -2545,7 +2543,7 @@ void DecoderEmitter::run(raw_ostream &o) { if (!NumberedEncodings[i].HwModeName.empty()) DecoderNamespace += std::string("_") + NumberedEncodings[i].HwModeName.str(); - OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back( + OpcMap[std::pair(DecoderNamespace, Size)].emplace_back( i, IndexOfInstruction.find(Def)->second); } else { NumEncodingsOmitted++; diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp index 00a165030d36bf..f04c6e3b3bf05d 100644 --- a/llvm/utils/TableGen/FastISelEmitter.cpp +++ b/llvm/utils/TableGen/FastISelEmitter.cpp @@ -595,7 +595,7 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) { int complexity = Pattern.getPatternComplexity(CGP); auto inserted_simple_pattern = SimplePatternsCheck.insert( - std::make_tuple(Operands, OpcodeName, VT, RetVT, PredicateCheck)); + std::tuple(Operands, OpcodeName, VT, RetVT, PredicateCheck)); if (!inserted_simple_pattern.second) { PrintFatalError(Pattern.getSrcRecord()->getLoc(), "Duplicate predicate in FastISel table!"); diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index d9249cf627f21e..dee3cb4d71a403 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -2956,8 +2956,8 @@ GICombinerEmitter::buildMatchTable(MutableArrayRef Rules) { const Matcher *B) { auto *L = static_cast(A); auto *R = static_cast(B); - return std::make_tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) < - std::make_tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands()); + return std::tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) < + std::tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands()); }); for (Matcher *Rule : InputRules) diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 13f2384efc9ce0..618cb2fedb9292 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -2215,8 +2215,8 @@ GlobalISelEmitter::buildMatchTable(MutableArrayRef Rules, const Matcher *B) { auto *L = static_cast(A); auto *R = static_cast(B); - return std::make_tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) < - std::make_tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands()); + return std::tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) < + std::tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands()); }); for (Matcher *Rule : InputRules) diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/GlobalISelMatchTable.cpp index d1bdc30849a7f6..45fb41b89f2787 100644 --- a/llvm/utils/TableGen/GlobalISelMatchTable.cpp +++ b/llvm/utils/TableGen/GlobalISelMatchTable.cpp @@ -408,18 +408,18 @@ bool LLTCodeGen::operator<(const LLTCodeGen &Other) const { return Ty.getAddressSpace() < Other.Ty.getAddressSpace(); if (Ty.isVector() && Ty.getElementCount() != Other.Ty.getElementCount()) - return std::make_tuple(Ty.isScalable(), - Ty.getElementCount().getKnownMinValue()) < - std::make_tuple(Other.Ty.isScalable(), - Other.Ty.getElementCount().getKnownMinValue()); + return std::tuple(Ty.isScalable(), + Ty.getElementCount().getKnownMinValue()) < + std::tuple(Other.Ty.isScalable(), + Other.Ty.getElementCount().getKnownMinValue()); assert((!Ty.isVector() || Ty.isScalable() == Other.Ty.isScalable()) && "Unexpected mismatch of scalable property"); return Ty.isVector() - ? std::make_tuple(Ty.isScalable(), - Ty.getSizeInBits().getKnownMinValue()) < - std::make_tuple(Other.Ty.isScalable(), - Other.Ty.getSizeInBits().getKnownMinValue()) + ? std::tuple(Ty.isScalable(), + Ty.getSizeInBits().getKnownMinValue()) < + std::tuple(Other.Ty.isScalable(), + Other.Ty.getSizeInBits().getKnownMinValue()) : Ty.getSizeInBits().getFixedValue() < Other.Ty.getSizeInBits().getFixedValue(); } @@ -720,8 +720,8 @@ void RuleMatcher::optimize() { } llvm::sort(EpilogueMatchers, [](const std::unique_ptr &L, const std::unique_ptr &R) { - return std::make_tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) < - std::make_tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx()); + return std::tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) < + std::tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx()); }); } @@ -822,7 +822,7 @@ Error RuleMatcher::defineComplexSubOperand(StringRef SymbolicName, } ComplexSubOperands[SymbolicName] = - std::make_tuple(ComplexPattern, RendererID, SubOperandID); + std::tuple(ComplexPattern, RendererID, SubOperandID); ComplexSubOperandsParentName[SymbolicName] = ParentName; return Error::success(); diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.h b/llvm/utils/TableGen/GlobalISelMatchTable.h index 635552fc890406..b1ab7da8db4461 100644 --- a/llvm/utils/TableGen/GlobalISelMatchTable.h +++ b/llvm/utils/TableGen/GlobalISelMatchTable.h @@ -232,7 +232,7 @@ class MatchTable { unsigned allocateLabelID() { return CurrentLabelID++; } void defineLabel(unsigned LabelID) { - LabelMap.insert(std::make_pair(LabelID, CurrentSize)); + LabelMap.insert(std::pair(LabelID, CurrentSize)); } unsigned getLabelIndex(unsigned LabelID) const { diff --git a/llvm/utils/TableGen/InfoByHwMode.cpp b/llvm/utils/TableGen/InfoByHwMode.cpp index 6d9a35a68e138e..4a64421c013c5f 100644 --- a/llvm/utils/TableGen/InfoByHwMode.cpp +++ b/llvm/utils/TableGen/InfoByHwMode.cpp @@ -69,9 +69,9 @@ MVT &ValueTypeByHwMode::getOrCreateTypeForMode(unsigned Mode, MVT Type) { // make a copy of it for Mode and return it. auto D = Map.begin(); if (D != Map.end() && D->first == DefaultMode) - return Map.insert(std::make_pair(Mode, D->second)).first->second; + return Map.insert(std::pair(Mode, D->second)).first->second; // If default mode is not present either, use provided Type. - return Map.insert(std::make_pair(Mode, Type)).first->second; + return Map.insert(std::pair(Mode, Type)).first->second; } StringRef ValueTypeByHwMode::getMVTName(MVT T) { diff --git a/llvm/utils/TableGen/InfoByHwMode.h b/llvm/utils/TableGen/InfoByHwMode.h index 5f532958414888..001509e5317f95 100644 --- a/llvm/utils/TableGen/InfoByHwMode.h +++ b/llvm/utils/TableGen/InfoByHwMode.h @@ -144,7 +144,7 @@ template struct InfoByHwMode { assert(hasMode(Mode) || hasDefault()); InfoT I = get(Mode); Map.clear(); - Map.insert(std::make_pair(DefaultMode, I)); + Map.insert(std::pair(DefaultMode, I)); } protected: @@ -209,7 +209,7 @@ struct RegSizeInfoByHwMode : public InfoByHwMode { void writeToStream(raw_ostream &OS) const; void insertRegSizeForMode(unsigned Mode, RegSizeInfo Info) { - Map.insert(std::make_pair(Mode, Info)); + Map.insert(std::pair(Mode, Info)); } }; diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index f7ae5ed8a6b3cb..50a34eac7ca35a 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -637,7 +637,7 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap( std::map &BIM = BuiltinMap[Ints[i].TargetPrefix]; - if (!BIM.insert(std::make_pair(BuiltinName, Ints[i].EnumName)).second) + if (!BIM.insert(std::pair(BuiltinName, Ints[i].EnumName)).second) PrintFatalError(Ints[i].TheDef->getLoc(), "Intrinsic '" + Ints[i].TheDef->getName() + "': duplicate " + CompilerName + " builtin name!"); diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp index 0f081197b5b826..c25f6c59cab383 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptParserEmitter.cpp @@ -207,14 +207,14 @@ static void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { typedef SmallVector, 2> PrefixKeyT; typedef std::map PrefixesT; PrefixesT Prefixes; - Prefixes.insert(std::make_pair(PrefixKeyT(), "prefix_0")); + Prefixes.insert(std::pair(PrefixKeyT(), "prefix_0")); unsigned CurPrefix = 0; for (const Record &R : llvm::make_pointee_range(Opts)) { std::vector RPrefixes = R.getValueAsListOfStrings("Prefixes"); PrefixKeyT PrefixKey(RPrefixes.begin(), RPrefixes.end()); unsigned NewPrefix = CurPrefix + 1; std::string Prefix = (Twine("prefix_") + Twine(NewPrefix)).str(); - if (Prefixes.insert(std::make_pair(PrefixKey, Prefix)).second) + if (Prefixes.insert(std::pair(PrefixKey, Prefix)).second) CurPrefix = NewPrefix; } diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index 0cce798a27b4ab..5bab4ff188e8ed 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -608,7 +608,7 @@ void SearchableTableEmitter::collectEnumEntries( Value = getInt(EntryRec, ValueField); Enum.Entries.push_back(std::make_unique(Name, Value)); - Enum.EntryMap.insert(std::make_pair(EntryRec, Enum.Entries.back().get())); + Enum.EntryMap.insert(std::pair(EntryRec, Enum.Entries.back().get())); } if (ValueField.empty()) { @@ -708,7 +708,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { collectEnumEntries(*Enum, NameField, ValueField, Records.getAllDerivedDefinitions(FilterClass)); - EnumMap.insert(std::make_pair(EnumRec, Enum.get())); + EnumMap.insert(std::pair(EnumRec, Enum.get())); Enums.emplace_back(std::move(Enum)); } @@ -773,7 +773,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { }); } - TableMap.insert(std::make_pair(TableRec, Table.get())); + TableMap.insert(std::pair(TableRec, Table.get())); Tables.emplace_back(std::move(Table)); } diff --git a/llvm/utils/TableGen/SequenceToOffsetTable.h b/llvm/utils/TableGen/SequenceToOffsetTable.h index 7db39a9af72092..5766b682b58a38 100644 --- a/llvm/utils/TableGen/SequenceToOffsetTable.h +++ b/llvm/utils/TableGen/SequenceToOffsetTable.h @@ -87,7 +87,7 @@ class SequenceToOffsetTable { if (I != Seqs.end() && isSuffix(Seq, I->first)) return; - I = Seqs.insert(I, std::make_pair(Seq, 0u)); + I = Seqs.insert(I, std::pair(Seq, 0u)); // The entry before I may be a suffix of Seq that can now be erased. if (I != Seqs.begin() && isSuffix((--I)->first, Seq)) diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp index 2cf86d38a2120b..928129f24fcb49 100644 --- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp @@ -80,7 +80,7 @@ void emitWebAssemblyDisassemblerTables( } } // Set this instruction as the one to use. - CGIP = std::make_pair(I, &CGI); + CGIP = std::pair(I, &CGI); } OS << "#include \"MCTargetDesc/WebAssemblyMCTargetDesc.h\"\n"; OS << "\n"; diff --git a/llvm/utils/TableGen/X86CompressEVEXTablesEmitter.cpp b/llvm/utils/TableGen/X86CompressEVEXTablesEmitter.cpp index e4db995d47f7e6..b96d16b9797cf3 100644 --- a/llvm/utils/TableGen/X86CompressEVEXTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86CompressEVEXTablesEmitter.cpp @@ -120,12 +120,11 @@ class IsMatch { RecognizableInstrBase OldRI(*OldInst); // Return false if any of the following fields of does not match. - if (std::make_tuple(OldRI.IsCodeGenOnly, OldRI.OpMap, NewRI.OpPrefix, - OldRI.HasVEX_4V, OldRI.HasVEX_L, OldRI.HasREX_W, - OldRI.Form) != - std::make_tuple(NewRI.IsCodeGenOnly, NewRI.OpMap, OldRI.OpPrefix, - NewRI.HasVEX_4V, NewRI.HasVEX_L, NewRI.HasREX_W, - NewRI.Form)) + if (std::tuple(OldRI.IsCodeGenOnly, OldRI.OpMap, NewRI.OpPrefix, + OldRI.HasVEX_4V, OldRI.HasVEX_L, OldRI.HasREX_W, + OldRI.Form) != + std::tuple(NewRI.IsCodeGenOnly, NewRI.OpMap, OldRI.OpPrefix, + NewRI.HasVEX_4V, NewRI.HasVEX_L, NewRI.HasREX_W, NewRI.Form)) return false; for (unsigned I = 0, E = OldInst->Operands.size(); I < E; ++I) { @@ -219,7 +218,7 @@ void X86CompressEVEXTablesEmitter::run(raw_ostream &OS) { if (!NewInst) continue; - Table.push_back(std::make_pair(Inst, NewInst)); + Table.push_back(std::pair(Inst, NewInst)); auto Predicates = NewInst->TheDef->getValueAsListOfDefs("Predicates"); auto It = llvm::find_if(Predicates, [](const Record *R) { StringRef Name = R->getName(); diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp index 588d9b22a772a3..a48b9cfe42e37f 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -845,7 +845,7 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, for (auto Operand : InstructionSpecifiers[Index].operands) { OperandEncoding Encoding = (OperandEncoding)Operand.encoding; OperandType Type = (OperandType)Operand.type; - OperandList.push_back(std::make_pair(Encoding, Type)); + OperandList.push_back(std::pair(Encoding, Type)); } unsigned &N = OperandSets[OperandList]; if (N != 0) @@ -877,7 +877,7 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, for (auto Operand : InstructionSpecifiers[index].operands) { OperandEncoding Encoding = (OperandEncoding)Operand.encoding; OperandType Type = (OperandType)Operand.type; - OperandList.push_back(std::make_pair(Encoding, Type)); + OperandList.push_back(std::pair(Encoding, Type)); } o.indent(i * 2) << (OperandSets[OperandList] - 1) << ",\n"; diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 44c2817e1b5d68..1319042e48d060 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -171,8 +171,8 @@ class X86FoldTablesEmitter { assert(LHS && RHS && "LHS and RHS shouldn't be nullptr"); const auto &D1 = *LHS->TheDef; const auto &D2 = *RHS->TheDef; - return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < - std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); + return std::tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < + std::tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); } }; @@ -372,22 +372,20 @@ class IsMatch { return false; // Return false if any of the following fields of does not match. - if (std::make_tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, - RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, - RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L, - RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, - RegRI.HasEVEX_L2, RegRI.HasEVEX_NF, - RegRec->getValueAsBit("hasEVEX_RC"), - RegRec->getValueAsBit("hasLockPrefix"), - RegRec->getValueAsBit("hasNoTrackPrefix")) != - std::make_tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, - MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, - MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L, - MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, - MemRI.HasEVEX_L2, MemRI.HasEVEX_NF, - MemRec->getValueAsBit("hasEVEX_RC"), - MemRec->getValueAsBit("hasLockPrefix"), - MemRec->getValueAsBit("hasNoTrackPrefix"))) + if (std::tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, RegRI.OpMap, + RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, RegRI.HasVEX_4V, + RegRI.HasVEX_L, RegRI.IgnoresVEX_L, RegRI.IgnoresW, + RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, RegRI.HasEVEX_L2, + RegRI.HasEVEX_NF, RegRec->getValueAsBit("hasEVEX_RC"), + RegRec->getValueAsBit("hasLockPrefix"), + RegRec->getValueAsBit("hasNoTrackPrefix")) != + std::tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, MemRI.OpMap, + MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, MemRI.HasVEX_4V, + MemRI.HasVEX_L, MemRI.IgnoresVEX_L, MemRI.IgnoresW, + MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, MemRI.HasEVEX_L2, + MemRI.HasEVEX_NF, MemRec->getValueAsBit("hasEVEX_RC"), + MemRec->getValueAsBit("hasLockPrefix"), + MemRec->getValueAsBit("hasNoTrackPrefix"))) return false; // Make sure the sizes of the operands of both instructions suit each other. From 6c84709eff20460a75fb58d2face54432c133967 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 14 Feb 2024 13:55:51 +0000 Subject: [PATCH 133/240] [AArch64] Materialize constants via fneg. (#80641) This is something that is already done as a special case for copysign, this patch extends it to be more generally applied. If we are trying to matrialize a negative constant (notably -0.0, 0x80000000), then there may be no movi encoding that creates the immediate, but a fneg(movi) might. Some of the existing patterns for RADDHN needed to be adjusted to keep them in line with the new immediates. --- .../Target/AArch64/AArch64ISelLowering.cpp | 101 ++++++++++++------ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 16 +-- .../CodeGen/AArch64/arm64-build-vector.ll | 4 +- llvm/test/CodeGen/AArch64/fabs-combine.ll | 4 +- llvm/test/CodeGen/AArch64/fcvt_combine.ll | 8 +- llvm/test/CodeGen/AArch64/neon-mov.ll | 50 ++++++--- .../AArch64/srem-seteq-vec-nonsplat.ll | 60 +++++------ .../CodeGen/AArch64/urem-seteq-vec-nonzero.ll | 5 +- llvm/test/CodeGen/AArch64/urem-vector-lkk.ll | 14 +-- 9 files changed, 156 insertions(+), 106 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a3b7e3128ac1a4..8c5a4cdae11634 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12935,42 +12935,74 @@ static SDValue NormalizeBuildVector(SDValue Op, return DAG.getBuildVector(VT, dl, Ops); } -static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) { +static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG, + const AArch64Subtarget *ST) { EVT VT = Op.getValueType(); + assert((VT.getSizeInBits() == 64 || VT.getSizeInBits() == 128) && + "Expected a legal NEON vector"); APInt DefBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); BuildVectorSDNode *BVN = cast(Op.getNode()); if (resolveBuildVector(BVN, DefBits, UndefBits)) { - SDValue NewOp; - if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits))) - return NewOp; - - DefBits = ~DefBits; - if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits))) - return NewOp; - - DefBits = UndefBits; - if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits))) - return NewOp; + auto TryMOVIWithBits = [&](APInt DefBits) { + SDValue NewOp; + if ((NewOp = + tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) || + (NewOp = + tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || + (NewOp = + tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) || + (NewOp = + tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits))) + return NewOp; + + APInt NotDefBits = ~DefBits; + if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, + NotDefBits)) || + (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, + NotDefBits)) || + (NewOp = + tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, NotDefBits))) + return NewOp; + return SDValue(); + }; + if (SDValue R = TryMOVIWithBits(DefBits)) + return R; + if (SDValue R = TryMOVIWithBits(UndefBits)) + return R; - DefBits = ~UndefBits; - if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) || - (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits))) - return NewOp; + // See if a fneg of the constant can be materialized with a MOVI, etc + auto TryWithFNeg = [&](APInt DefBits, MVT FVT) { + // FNegate each sub-element of the constant + assert(VT.getSizeInBits() % FVT.getScalarSizeInBits() == 0); + APInt Neg = APInt::getHighBitsSet(FVT.getSizeInBits(), 1) + .zext(VT.getSizeInBits()); + APInt NegBits(VT.getSizeInBits(), 0); + unsigned NumElts = VT.getSizeInBits() / FVT.getScalarSizeInBits(); + for (unsigned i = 0; i < NumElts; i++) + NegBits |= Neg << (FVT.getScalarSizeInBits() * i); + NegBits = DefBits ^ NegBits; + + // Try to create the new constants with MOVI, and if so generate a fneg + // for it. + if (SDValue NewOp = TryMOVIWithBits(NegBits)) { + SDLoc DL(Op); + MVT VFVT = NumElts == 1 ? FVT : MVT::getVectorVT(FVT, NumElts); + return DAG.getNode( + AArch64ISD::NVCAST, DL, VT, + DAG.getNode(ISD::FNEG, DL, VFVT, + DAG.getNode(AArch64ISD::NVCAST, DL, VFVT, NewOp))); + } + return SDValue(); + }; + SDValue R; + if ((R = TryWithFNeg(DefBits, MVT::f32)) || + (R = TryWithFNeg(DefBits, MVT::f64)) || + (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16)))) + return R; } return SDValue(); @@ -13019,7 +13051,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, return Op; } - if (SDValue V = ConstantBuildVector(Op, DAG)) + if (SDValue V = ConstantBuildVector(Op, DAG, Subtarget)) return V; // Scan through the operands to find some interesting properties we can @@ -13244,7 +13276,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize); if (!isNullConstant(ConstantValue) && !isNullFPConstant(ConstantValue) && !ConstantValueAPInt.isAllOnes()) { - Val = ConstantBuildVector(Val, DAG); + Val = ConstantBuildVector(Val, DAG, Subtarget); if (!Val) // Otherwise, materialize the constant and splat it. Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); @@ -23145,9 +23177,12 @@ static SDValue performDUPCombine(SDNode *N, } /// Get rid of unnecessary NVCASTs (that don't change the type). -static SDValue performNVCASTCombine(SDNode *N) { +static SDValue performNVCASTCombine(SDNode *N, SelectionDAG &DAG) { if (N->getValueType(0) == N->getOperand(0).getValueType()) return N->getOperand(0); + if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST) + return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0), + N->getOperand(0).getOperand(0)); return SDValue(); } @@ -24141,7 +24176,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case AArch64ISD::DUPLANE128: return performDupLane128Combine(N, DAG); case AArch64ISD::NVCAST: - return performNVCASTCombine(N); + return performNVCASTCombine(N, DAG); case AArch64ISD::SPLICE: return performSpliceCombine(N, DAG); case AArch64ISD::UUNPKLO: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 9c3a6927d043ba..8c2a852850320f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7595,13 +7595,17 @@ defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; +def VImm0080: PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>; +def VImm00008000: PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>; +def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>; + // RADDHN patterns for when RSHRN shifts by half the size of the vector element -def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), +def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))), (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; -def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), +def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))), (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; let AddedComplexity = 5 in -def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), +def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))), (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; @@ -7613,20 +7617,20 @@ def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), // RADDHN2 patterns for when RSHRN shifts by half the size of the vector element def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Vd), - (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), + (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))))), (RADDHNv8i16_v16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Vd), - (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), + (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))))), (RADDHNv4i32_v8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; let AddedComplexity = 5 in def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Vd), - (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), + (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))))), (RADDHNv2i64_v4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; diff --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll index e4fbf0765dcd2c..82802c79c70858 100644 --- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll @@ -120,8 +120,8 @@ define <2 x double> @poszero_v2f64(<2 x double> %a) { define <2 x double> @negzero_v2f64(<2 x double> %a) { ; CHECK-LABEL: negzero_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: fneg v1.2d, v1.2d ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %b = fmul <2 x double> %a, diff --git a/llvm/test/CodeGen/AArch64/fabs-combine.ll b/llvm/test/CodeGen/AArch64/fabs-combine.ll index 23bf7a699195f7..d083f2006575b1 100644 --- a/llvm/test/CodeGen/AArch64/fabs-combine.ll +++ b/llvm/test/CodeGen/AArch64/fabs-combine.ll @@ -71,8 +71,8 @@ define <4 x float> @nabsv4f32(<4 x float> %a) { define <2 x double> @nabsv2d64(<2 x double> %a) { ; CHECK-LABEL: nabsv2d64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 -; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: fneg v1.2d, v1.2d ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %conv = bitcast <2 x double> %a to <2 x i64> diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll index b5b9055fbc02f8..37133cf0aa1df9 100644 --- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -110,8 +110,8 @@ define <2 x i32> @test9(<2 x float> %f) { define <2 x i32> @test10(<2 x float> %f) { ; CHECK-LABEL: test10: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000 -; CHECK-NEXT: dup v0.2s, w8 +; CHECK-NEXT: mvni v0.2s, #63, msl #16 +; CHECK-NEXT: fneg v0.2s, v0.2s ; CHECK-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, @@ -426,8 +426,8 @@ define <2 x i32> @test9_sat(<2 x float> %f) { define <2 x i32> @test10_sat(<2 x float> %f) { ; CHECK-LABEL: test10_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000 -; CHECK-NEXT: dup v0.2s, w8 +; CHECK-NEXT: mvni v0.2s, #63, msl #16 +; CHECK-NEXT: fneg v0.2s, v0.2s ; CHECK-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index 219c8b53243e60..7effdc97993c18 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -111,16 +111,14 @@ define <4 x i32> @movi4s_lsl16() { define <4 x i32> @movi4s_fneg() { ; CHECK-NOFP16-SD-LABEL: movi4s_fneg: ; CHECK-NOFP16-SD: // %bb.0: -; CHECK-NOFP16-SD-NEXT: mov w8, #61440 // =0xf000 -; CHECK-NOFP16-SD-NEXT: movk w8, #32768, lsl #16 -; CHECK-NOFP16-SD-NEXT: dup v0.4s, w8 +; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: movi4s_fneg: ; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: mov w8, #61440 // =0xf000 -; CHECK-FP16-SD-NEXT: movk w8, #32768, lsl #16 -; CHECK-FP16-SD-NEXT: dup v0.4s, w8 +; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: movi4s_fneg: @@ -178,11 +176,29 @@ define <8 x i16> @movi8h_lsl8() { } define <8 x i16> @movi8h_fneg() { -; CHECK-LABEL: movi8h_fneg: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: ret +; CHECK-NOFP16-SD-LABEL: movi8h_fneg: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: movi v0.8h, #127, lsl #8 +; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: movi8h_fneg: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: movi v0.8h, #127, lsl #8 +; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: movi8h_fneg: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI19_0 +; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] +; CHECK-NOFP16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: movi8h_fneg: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI19_0 +; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] +; CHECK-FP16-GI-NEXT: ret ret <8 x i16> } @@ -294,8 +310,8 @@ define <8 x i16> @mvni8h_neg() { ; ; CHECK-FP16-SD-LABEL: mvni8h_neg: ; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: mov w8, #33008 // =0x80f0 -; CHECK-FP16-SD-NEXT: dup v0.8h, w8 +; CHECK-FP16-SD-NEXT: movi v0.8h, #240 +; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: mvni8h_neg: @@ -480,14 +496,14 @@ define <2 x double> @fmov2d() { define <2 x double> @fmov2d_neg0() { ; CHECK-NOFP16-SD-LABEL: fmov2d_neg0: ; CHECK-NOFP16-SD: // %bb.0: -; CHECK-NOFP16-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NOFP16-SD-NEXT: dup v0.2d, x8 +; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: fmov2d_neg0: ; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-FP16-SD-NEXT: dup v0.2d, x8 +; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: fmov2d_neg0: diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll index f8c6f4193959d2..1ebfe308e9af94 100644 --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll @@ -35,18 +35,17 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind { define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_allones_eq: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.16b, #153 ; CHECK-NEXT: mov w8, #52429 // =0xcccd -; CHECK-NEXT: mov w9, #39321 // =0x9999 ; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fneg v1.4s, v1.4s +; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -56,18 +55,17 @@ define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind { define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_allones_ne: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.16b, #153 ; CHECK-NEXT: mov w8, #52429 // =0xcccd -; CHECK-NEXT: mov w9, #39321 // =0x9999 ; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fneg v1.4s, v1.4s +; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, @@ -269,18 +267,17 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_one: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.16b, #153 ; CHECK-NEXT: mov w8, #52429 // =0xcccd -; CHECK-NEXT: mov w9, #39321 // =0x9999 ; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fneg v1.4s, v1.4s +; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -522,18 +519,17 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_allones_and_one: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.16b, #153 ; CHECK-NEXT: mov w8, #52429 // =0xcccd -; CHECK-NEXT: mov w9, #39321 // =0x9999 ; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fneg v1.4s, v1.4s +; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll index 478a34cf2a2b97..b31ce94cdaaea1 100644 --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll @@ -51,12 +51,11 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind { ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: mov w8, #43690 // =0xaaaa -; CHECK-NEXT: movk w8, #10922, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: usra v1.4s, v0.4s, #1 -; CHECK-NEXT: dup v0.4s, w8 +; CHECK-NEXT: movi v0.16b, #170 +; CHECK-NEXT: fneg v0.4s, v0.4s ; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll index dc021bc3bfcc74..468a33ce5bfcf6 100644 --- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -10,18 +10,18 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: adrp x8, .LCPI0_2 ; CHECK-NEXT: ushl v1.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI0_2] -; CHECK-NEXT: adrp x8, .LCPI0_3 ; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 -; CHECK-NEXT: sub v2.4h, v0.4h, v1.4h -; CHECK-NEXT: umull v2.4s, v2.4h, v3.4h +; CHECK-NEXT: fneg d2, d2 +; CHECK-NEXT: sub v3.4h, v0.4h, v1.4h +; CHECK-NEXT: umull v2.4s, v3.4h, v2.4h ; CHECK-NEXT: shrn v2.4h, v2.4s, #16 ; CHECK-NEXT: add v1.4h, v2.4h, v1.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3] -; CHECK-NEXT: adrp x8, .LCPI0_4 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2] +; CHECK-NEXT: adrp x8, .LCPI0_3 ; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_4] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3] ; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, From bd8fcf75df11406527de423daa63e21c3ec8609b Mon Sep 17 00:00:00 2001 From: "Oleksandr \"Alex\" Zinenko" Date: Wed, 14 Feb 2024 15:03:04 +0100 Subject: [PATCH 134/240] [mlir][python] expose LLVMStructType API (#81672) Expose the API for constructing and inspecting StructTypes from the LLVM dialect. Separate constructor methods are used instead of overloads for better readability, similarly to IntegerType. --- mlir/include/mlir-c/Dialect/LLVM.h | 61 ++++++++- mlir/lib/Bindings/Python/DialectLLVM.cpp | 145 +++++++++++++++++++++ mlir/lib/CAPI/Dialect/LLVM.cpp | 68 +++++++++- mlir/python/CMakeLists.txt | 13 ++ mlir/python/mlir/dialects/llvm.py | 1 + mlir/test/CAPI/llvm.c | 156 ++++++++++++++++++++++- mlir/test/python/dialects/llvm.py | 84 ++++++++++++ 7 files changed, 525 insertions(+), 3 deletions(-) create mode 100644 mlir/lib/Bindings/Python/DialectLLVM.cpp diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index 72701a82225436..ac216b01f364d4 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -34,11 +34,70 @@ MLIR_CAPI_EXPORTED MlirType mlirLLVMFunctionTypeGet(MlirType resultType, intptr_t nArgumentTypes, MlirType const *argumentTypes, bool isVarArg); -/// Creates an LLVM literal (unnamed) struct type. +/// Returns `true` if the type is an LLVM dialect struct type. +MLIR_CAPI_EXPORTED bool mlirTypeIsALLVMStructType(MlirType type); + +/// Returns `true` if the type is a literal (unnamed) LLVM struct type. +MLIR_CAPI_EXPORTED bool mlirLLVMStructTypeIsLiteral(MlirType type); + +/// Returns the number of fields in the struct. Asserts if the struct is opaque +/// or not yet initialized. +MLIR_CAPI_EXPORTED intptr_t mlirLLVMStructTypeGetNumElementTypes(MlirType type); + +/// Returns the `positions`-th field of the struct. Asserts if the struct is +/// opaque, not yet initialized or if the position is out of range. +MLIR_CAPI_EXPORTED MlirType mlirLLVMStructTypeGetElementType(MlirType type, + intptr_t position); + +/// Returns `true` if the struct is packed. +MLIR_CAPI_EXPORTED bool mlirLLVMStructTypeIsPacked(MlirType type); + +/// Returns the identifier of the identified struct. Asserts that the struct is +/// identified, i.e., not literal. +MLIR_CAPI_EXPORTED MlirStringRef mlirLLVMStructTypeGetIdentifier(MlirType type); + +/// Returns `true` is the struct is explicitly opaque (will not have a body) or +/// uninitialized (will eventually have a body). +MLIR_CAPI_EXPORTED bool mlirLLVMStructTypeIsOpaque(MlirType type); + +/// Creates an LLVM literal (unnamed) struct type. This may assert if the fields +/// have types not compatible with the LLVM dialect. For a graceful failure, use +/// the checked version. MLIR_CAPI_EXPORTED MlirType mlirLLVMStructTypeLiteralGet(MlirContext ctx, intptr_t nFieldTypes, MlirType const *fieldTypes, bool isPacked); +/// Creates an LLVM literal (unnamed) struct type if possible. Emits a +/// diagnostic at the given location and returns null otherwise. +MLIR_CAPI_EXPORTED MlirType +mlirLLVMStructTypeLiteralGetChecked(MlirLocation loc, intptr_t nFieldTypes, + MlirType const *fieldTypes, bool isPacked); + +/// Creates an LLVM identified struct type with no body. If a struct type with +/// this name already exists in the context, returns that type. Use +/// mlirLLVMStructTypeIdentifiedNewGet to create a fresh struct type, +/// potentially renaming it. The body should be set separatelty by calling +/// mlirLLVMStructTypeSetBody, if it isn't set already. +MLIR_CAPI_EXPORTED MlirType mlirLLVMStructTypeIdentifiedGet(MlirContext ctx, + MlirStringRef name); + +/// Creates an LLVM identified struct type with no body and a name starting with +/// the given prefix. If a struct with the exact name as the given prefix +/// already exists, appends an unspecified suffix to the name so that the name +/// is unique in context. +MLIR_CAPI_EXPORTED MlirType mlirLLVMStructTypeIdentifiedNewGet( + MlirContext ctx, MlirStringRef name, intptr_t nFieldTypes, + MlirType const *fieldTypes, bool isPacked); + +MLIR_CAPI_EXPORTED MlirType mlirLLVMStructTypeOpaqueGet(MlirContext ctx, + MlirStringRef name); + +/// Sets the body of the identified struct if it hasn't been set yet. Returns +/// whether the operation was successful. +MLIR_CAPI_EXPORTED MlirLogicalResult +mlirLLVMStructTypeSetBody(MlirType structType, intptr_t nFieldTypes, + MlirType const *fieldTypes, bool isPacked); + #ifdef __cplusplus } #endif diff --git a/mlir/lib/Bindings/Python/DialectLLVM.cpp b/mlir/lib/Bindings/Python/DialectLLVM.cpp new file mode 100644 index 00000000000000..780f5eacf0b8e5 --- /dev/null +++ b/mlir/lib/Bindings/Python/DialectLLVM.cpp @@ -0,0 +1,145 @@ +//===- DialectLLVM.cpp - Pybind module for LLVM dialect API support -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir-c/Diagnostics.h" +#include "mlir-c/Dialect/LLVM.h" +#include "mlir-c/IR.h" +#include "mlir-c/Support.h" +#include "mlir/Bindings/Python/PybindAdaptors.h" +#include + +namespace py = pybind11; +using namespace llvm; +using namespace mlir; +using namespace mlir::python; +using namespace mlir::python::adaptors; + +/// RAII scope intercepting all diagnostics into a string. The message must be +/// checked before this goes out of scope. +class CollectDiagnosticsToStringScope { +public: + explicit CollectDiagnosticsToStringScope(MlirContext ctx) : context(ctx) { + handlerID = mlirContextAttachDiagnosticHandler(ctx, &handler, &errorMessage, + /*deleteUserData=*/nullptr); + } + ~CollectDiagnosticsToStringScope() { + assert(errorMessage.empty() && "unchecked error message"); + mlirContextDetachDiagnosticHandler(context, handlerID); + } + + [[nodiscard]] std::string takeMessage() { return std::move(errorMessage); } + +private: + static MlirLogicalResult handler(MlirDiagnostic diag, void *data) { + auto printer = +[](MlirStringRef message, void *data) { + *static_cast(data) += + StringRef(message.data, message.length); + }; + mlirDiagnosticPrint(diag, printer, data); + return mlirLogicalResultSuccess(); + } + + MlirContext context; + MlirDiagnosticHandlerID handlerID; + std::string errorMessage = ""; +}; + +void populateDialectLLVMSubmodule(const pybind11::module &m) { + auto llvmStructType = + mlir_type_subclass(m, "StructType", mlirTypeIsALLVMStructType); + + llvmStructType.def_classmethod( + "get_literal", + [](py::object cls, const std::vector &elements, bool packed, + MlirLocation loc) { + CollectDiagnosticsToStringScope scope(mlirLocationGetContext(loc)); + + MlirType type = mlirLLVMStructTypeLiteralGetChecked( + loc, elements.size(), elements.data(), packed); + if (mlirTypeIsNull(type)) { + throw py::value_error(scope.takeMessage()); + } + return cls(type); + }, + py::arg("cls"), py::arg("elements"), py::kw_only(), + py::arg("packed") = false, py::arg("loc") = py::none()); + + llvmStructType.def_classmethod( + "get_identified", + [](py::object cls, const std::string &name, MlirContext context) { + return cls(mlirLLVMStructTypeIdentifiedGet( + context, mlirStringRefCreate(name.data(), name.size()))); + }, + py::arg("cls"), py::arg("name"), py::kw_only(), + py::arg("context") = py::none()); + + llvmStructType.def_classmethod( + "get_opaque", + [](py::object cls, const std::string &name, MlirContext context) { + return cls(mlirLLVMStructTypeOpaqueGet( + context, mlirStringRefCreate(name.data(), name.size()))); + }, + py::arg("cls"), py::arg("name"), py::arg("context") = py::none()); + + llvmStructType.def( + "set_body", + [](MlirType self, const std::vector &elements, bool packed) { + MlirLogicalResult result = mlirLLVMStructTypeSetBody( + self, elements.size(), elements.data(), packed); + if (!mlirLogicalResultIsSuccess(result)) { + throw py::value_error( + "Struct body already set to different content."); + } + }, + py::arg("elements"), py::kw_only(), py::arg("packed") = false); + + llvmStructType.def_classmethod( + "new_identified", + [](py::object cls, const std::string &name, + const std::vector &elements, bool packed, MlirContext ctx) { + return cls(mlirLLVMStructTypeIdentifiedNewGet( + ctx, mlirStringRefCreate(name.data(), name.length()), + elements.size(), elements.data(), packed)); + }, + py::arg("cls"), py::arg("name"), py::arg("elements"), py::kw_only(), + py::arg("packed") = false, py::arg("context") = py::none()); + + llvmStructType.def_property_readonly( + "name", [](MlirType type) -> std::optional { + if (mlirLLVMStructTypeIsLiteral(type)) + return std::nullopt; + + MlirStringRef stringRef = mlirLLVMStructTypeGetIdentifier(type); + return StringRef(stringRef.data, stringRef.length).str(); + }); + + llvmStructType.def_property_readonly("body", [](MlirType type) -> py::object { + // Don't crash in absence of a body. + if (mlirLLVMStructTypeIsOpaque(type)) + return py::none(); + + py::list body; + for (intptr_t i = 0, e = mlirLLVMStructTypeGetNumElementTypes(type); i < e; + ++i) { + body.append(mlirLLVMStructTypeGetElementType(type, i)); + } + return body; + }); + + llvmStructType.def_property_readonly( + "packed", [](MlirType type) { return mlirLLVMStructTypeIsPacked(type); }); + + llvmStructType.def_property_readonly( + "opaque", [](MlirType type) { return mlirLLVMStructTypeIsOpaque(type); }); +} + +PYBIND11_MODULE(_mlirDialectsLLVM, m) { + m.doc() = "MLIR LLVM Dialect"; + + populateDialectLLVMSubmodule(m); +} diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index b4405f7aac8ab2..642018a814ca12 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -36,11 +36,77 @@ MlirType mlirLLVMFunctionTypeGet(MlirType resultType, intptr_t nArgumentTypes, unwrapList(nArgumentTypes, argumentTypes, argumentStorage), isVarArg)); } +bool mlirTypeIsALLVMStructType(MlirType type) { + return isa(unwrap(type)); +} + +bool mlirLLVMStructTypeIsLiteral(MlirType type) { + return !cast(unwrap(type)).isIdentified(); +} + +intptr_t mlirLLVMStructTypeGetNumElementTypes(MlirType type) { + return cast(unwrap(type)).getBody().size(); +} + +MlirType mlirLLVMStructTypeGetElementType(MlirType type, intptr_t position) { + return wrap(cast(unwrap(type)).getBody()[position]); +} + +bool mlirLLVMStructTypeIsPacked(MlirType type) { + return cast(unwrap(type)).isPacked(); +} + +MlirStringRef mlirLLVMStructTypeGetIdentifier(MlirType type) { + return wrap(cast(unwrap(type)).getName()); +} + +bool mlirLLVMStructTypeIsOpaque(MlirType type) { + return cast(unwrap(type)).isOpaque(); +} + MlirType mlirLLVMStructTypeLiteralGet(MlirContext ctx, intptr_t nFieldTypes, MlirType const *fieldTypes, bool isPacked) { - SmallVector fieldStorage; + SmallVector fieldStorage; return wrap(LLVMStructType::getLiteral( unwrap(ctx), unwrapList(nFieldTypes, fieldTypes, fieldStorage), isPacked)); } + +MlirType mlirLLVMStructTypeLiteralGetChecked(MlirLocation loc, + intptr_t nFieldTypes, + MlirType const *fieldTypes, + bool isPacked) { + SmallVector fieldStorage; + return wrap(LLVMStructType::getLiteralChecked( + [loc]() { return emitError(unwrap(loc)); }, unwrap(loc)->getContext(), + unwrapList(nFieldTypes, fieldTypes, fieldStorage), isPacked)); +} + +MlirType mlirLLVMStructTypeOpaqueGet(MlirContext ctx, MlirStringRef name) { + return wrap(LLVMStructType::getOpaque(unwrap(name), unwrap(ctx))); +} + +MlirType mlirLLVMStructTypeIdentifiedGet(MlirContext ctx, MlirStringRef name) { + return wrap(LLVMStructType::getIdentified(unwrap(ctx), unwrap(name))); +} + +MlirType mlirLLVMStructTypeIdentifiedNewGet(MlirContext ctx, MlirStringRef name, + intptr_t nFieldTypes, + MlirType const *fieldTypes, + bool isPacked) { + SmallVector fields; + return wrap(LLVMStructType::getNewIdentified( + unwrap(ctx), unwrap(name), unwrapList(nFieldTypes, fieldTypes, fields), + isPacked)); +} + +MlirLogicalResult mlirLLVMStructTypeSetBody(MlirType structType, + intptr_t nFieldTypes, + MlirType const *fieldTypes, + bool isPacked) { + SmallVector fields; + return wrap( + cast(unwrap(structType)) + .setBody(unwrapList(nFieldTypes, fieldTypes, fields), isPacked)); +} diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index 266b86090fe174..ed167afeb69a62 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -482,6 +482,19 @@ declare_mlir_python_extension(MLIRPythonExtension.Dialects.Linalg.Pybind MLIRCAPILinalg ) +declare_mlir_python_extension(MLIRPythonExtension.Dialects.LLVM.Pybind + MODULE_NAME _mlirDialectsLLVM + ADD_TO_PARENT MLIRPythonSources.Dialects.llvm + ROOT_DIR "${PYTHON_SOURCE_DIR}" + SOURCES + DialectLLVM.cpp + PRIVATE_LINK_LIBS + LLVMSupport + EMBED_CAPI_LINK_LIBS + MLIRCAPIIR + MLIRCAPILLVM +) + declare_mlir_python_extension(MLIRPythonExtension.Dialects.Quant.Pybind MODULE_NAME _mlirDialectsQuant ADD_TO_PARENT MLIRPythonSources.Dialects.quant diff --git a/mlir/python/mlir/dialects/llvm.py b/mlir/python/mlir/dialects/llvm.py index 77025438c37a4f..8aa16e4a256030 100644 --- a/mlir/python/mlir/dialects/llvm.py +++ b/mlir/python/mlir/dialects/llvm.py @@ -4,3 +4,4 @@ from ._llvm_ops_gen import * from ._llvm_enum_gen import * +from .._mlir_libs._mlirDialectsLLVM import * diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index aaec7b113f0a97..5a78fac91a5097 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -12,6 +12,7 @@ #include "mlir-c/Dialect/LLVM.h" #include "mlir-c/BuiltinTypes.h" #include "mlir-c/IR.h" +#include "mlir-c/Support.h" #include #include @@ -73,11 +74,164 @@ static void testTypeCreation(MlirContext ctx) { mlirTypeEqual(i32_i64_s, i32_i64_s_ref)); } +// CHECK-LABEL: testStructTypeCreation +static int testStructTypeCreation(MlirContext ctx) { + fprintf(stderr, "testStructTypeCreation"); + + // CHECK: !llvm.struct<()> + mlirTypeDump(mlirLLVMStructTypeLiteralGet(ctx, /*nFieldTypes=*/0, + /*fieldTypes=*/NULL, + /*isPacked=*/false)); + + MlirType i8 = mlirIntegerTypeGet(ctx, 8); + MlirType i32 = mlirIntegerTypeGet(ctx, 32); + MlirType i64 = mlirIntegerTypeGet(ctx, 64); + MlirType i8_i32_i64[] = {i8, i32, i64}; + // CHECK: !llvm.struct<(i8, i32, i64)> + mlirTypeDump( + mlirLLVMStructTypeLiteralGet(ctx, sizeof(i8_i32_i64) / sizeof(MlirType), + i8_i32_i64, /*isPacked=*/false)); + // CHECK: !llvm.struct<(i32)> + mlirTypeDump(mlirLLVMStructTypeLiteralGet(ctx, 1, &i32, /*isPacked=*/false)); + MlirType i32_i32[] = {i32, i32}; + // CHECK: !llvm.struct + mlirTypeDump(mlirLLVMStructTypeLiteralGet( + ctx, sizeof(i32_i32) / sizeof(MlirType), i32_i32, /*isPacked=*/true)); + + MlirType literal = + mlirLLVMStructTypeLiteralGet(ctx, sizeof(i8_i32_i64) / sizeof(MlirType), + i8_i32_i64, /*isPacked=*/false); + // CHECK: num elements: 3 + // CHECK: i8 + // CHECK: i32 + // CHECK: i64 + fprintf(stderr, "num elements: %ld\n", + mlirLLVMStructTypeGetNumElementTypes(literal)); + for (intptr_t i = 0; i < 3; ++i) { + mlirTypeDump(mlirLLVMStructTypeGetElementType(literal, i)); + } + + if (!mlirTypeEqual( + mlirLLVMStructTypeLiteralGet(ctx, 1, &i32, /*isPacked=*/false), + mlirLLVMStructTypeLiteralGet(ctx, 1, &i32, /*isPacked=*/false))) { + return 1; + } + if (mlirTypeEqual( + mlirLLVMStructTypeLiteralGet(ctx, 1, &i32, /*isPacked=*/false), + mlirLLVMStructTypeLiteralGet(ctx, 1, &i64, /*isPacked=*/false))) { + return 2; + } + + // CHECK: !llvm.struct<"foo", opaque> + // CHECK: !llvm.struct<"bar", opaque> + mlirTypeDump(mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("foo"))); + mlirTypeDump(mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("bar"))); + + if (!mlirTypeEqual(mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("foo")), + mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("foo")))) { + return 3; + } + if (mlirTypeEqual(mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("foo")), + mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("bar")))) { + return 4; + } + + MlirType fooStruct = mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("foo")); + MlirStringRef name = mlirLLVMStructTypeGetIdentifier(fooStruct); + if (memcmp(name.data, "foo", name.length)) + return 5; + if (!mlirLLVMStructTypeIsOpaque(fooStruct)) + return 6; + + MlirType i32_i64[] = {i32, i64}; + MlirLogicalResult result = + mlirLLVMStructTypeSetBody(fooStruct, sizeof(i32_i64) / sizeof(MlirType), + i32_i64, /*isPacked=*/false); + if (!mlirLogicalResultIsSuccess(result)) + return 7; + + // CHECK: !llvm.struct<"foo", (i32, i64)> + mlirTypeDump(fooStruct); + if (mlirLLVMStructTypeIsOpaque(fooStruct)) + return 8; + if (mlirLLVMStructTypeIsPacked(fooStruct)) + return 9; + if (!mlirTypeEqual(mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("foo")), + fooStruct)) { + return 10; + } + + MlirType barStruct = mlirLLVMStructTypeIdentifiedGet( + ctx, mlirStringRefCreateFromCString("bar")); + result = mlirLLVMStructTypeSetBody(barStruct, 1, &i32, /*isPacked=*/true); + if (!mlirLogicalResultIsSuccess(result)) + return 11; + + // CHECK: !llvm.struct<"bar", packed (i32)> + mlirTypeDump(barStruct); + if (!mlirLLVMStructTypeIsPacked(barStruct)) + return 12; + + // Same body, should succeed. + result = + mlirLLVMStructTypeSetBody(fooStruct, sizeof(i32_i64) / sizeof(MlirType), + i32_i64, /*isPacked=*/false); + if (!mlirLogicalResultIsSuccess(result)) + return 13; + + // Different body, should fail. + result = mlirLLVMStructTypeSetBody(fooStruct, 1, &i32, /*isPacked=*/false); + if (mlirLogicalResultIsSuccess(result)) + return 14; + + // Packed flag differs, should fail. + result = mlirLLVMStructTypeSetBody(barStruct, 1, &i32, /*isPacked=*/false); + if (mlirLogicalResultIsSuccess(result)) + return 15; + + // Should have a different name. + // CHECK: !llvm.struct<"foo{{[^"]+}} + mlirTypeDump(mlirLLVMStructTypeIdentifiedNewGet( + ctx, mlirStringRefCreateFromCString("foo"), /*nFieldTypes=*/0, + /*fieldTypes=*/NULL, /*isPacked=*/false)); + + // Two freshly created "new" types must differ. + if (mlirTypeEqual( + mlirLLVMStructTypeIdentifiedNewGet( + ctx, mlirStringRefCreateFromCString("foo"), /*nFieldTypes=*/0, + /*fieldTypes=*/NULL, /*isPacked=*/false), + mlirLLVMStructTypeIdentifiedNewGet( + ctx, mlirStringRefCreateFromCString("foo"), /*nFieldTypes=*/0, + /*fieldTypes=*/NULL, /*isPacked=*/false))) { + return 16; + } + + MlirType opaque = mlirLLVMStructTypeOpaqueGet( + ctx, mlirStringRefCreateFromCString("opaque")); + // CHECK: !llvm.struct<"opaque", opaque> + mlirTypeDump(opaque); + if (!mlirLLVMStructTypeIsOpaque(opaque)) + return 17; + + return 0; +} + int main(void) { MlirContext ctx = mlirContextCreate(); mlirDialectHandleRegisterDialect(mlirGetDialectHandle__llvm__(), ctx); mlirContextGetOrLoadDialect(ctx, mlirStringRefCreateFromCString("llvm")); testTypeCreation(ctx); + int result = testStructTypeCreation(ctx); mlirContextDestroy(ctx); - return 0; + if (result) + fprintf(stderr, "FAILED: code %d", result); + return result; } diff --git a/mlir/test/python/dialects/llvm.py b/mlir/test/python/dialects/llvm.py index 2d207ae14eecd2..fb4b343b170bae 100644 --- a/mlir/test/python/dialects/llvm.py +++ b/mlir/test/python/dialects/llvm.py @@ -15,6 +15,90 @@ def constructAndPrintInModule(f): return f +# CHECK-LABEL: testStructType +@constructAndPrintInModule +def testStructType(): + print(llvm.StructType.get_literal([])) + # CHECK: !llvm.struct<()> + + i8, i32, i64 = tuple(map(lambda x: IntegerType.get_signless(x), [8, 32, 64])) + print(llvm.StructType.get_literal([i8, i32, i64])) + print(llvm.StructType.get_literal([i32])) + print(llvm.StructType.get_literal([i32, i32], packed=True)) + literal = llvm.StructType.get_literal([i8, i32, i64]) + assert len(literal.body) == 3 + print(*tuple(literal.body)) + assert literal.name is None + # CHECK: !llvm.struct<(i8, i32, i64)> + # CHECK: !llvm.struct<(i32)> + # CHECK: !llvm.struct + # CHECK: i8 i32 i64 + + assert llvm.StructType.get_literal([i32]) == llvm.StructType.get_literal([i32]) + assert llvm.StructType.get_literal([i32]) != llvm.StructType.get_literal([i64]) + + print(llvm.StructType.get_identified("foo")) + print(llvm.StructType.get_identified("bar")) + # CHECK: !llvm.struct<"foo", opaque> + # CHECK: !llvm.struct<"bar", opaque> + + assert llvm.StructType.get_identified("foo") == llvm.StructType.get_identified( + "foo" + ) + assert llvm.StructType.get_identified("foo") != llvm.StructType.get_identified( + "bar" + ) + + foo_struct = llvm.StructType.get_identified("foo") + print(foo_struct.name) + print(foo_struct.body) + assert foo_struct.opaque + foo_struct.set_body([i32, i64]) + print(*tuple(foo_struct.body)) + print(foo_struct) + assert not foo_struct.packed + assert not foo_struct.opaque + assert llvm.StructType.get_identified("foo") == foo_struct + # CHECK: foo + # CHECK: None + # CHECK: i32 i64 + # CHECK: !llvm.struct<"foo", (i32, i64)> + + bar_struct = llvm.StructType.get_identified("bar") + bar_struct.set_body([i32], packed=True) + print(bar_struct) + assert bar_struct.packed + # CHECK: !llvm.struct<"bar", packed (i32)> + + # Same body, should not raise. + foo_struct.set_body([i32, i64]) + + try: + foo_struct.set_body([]) + except ValueError as e: + pass + else: + assert False, "expected exception not raised" + + try: + bar_struct.set_body([i32]) + except ValueError as e: + pass + else: + assert False, "expected exception not raised" + + print(llvm.StructType.new_identified("foo", [])) + assert llvm.StructType.new_identified("foo", []) != llvm.StructType.new_identified( + "foo", [] + ) + # CHECK: !llvm.struct<"foo{{[^"]+}} + + opaque = llvm.StructType.get_opaque("opaque") + print(opaque) + assert opaque.opaque + # CHECK: !llvm.struct<"opaque", opaque> + + # CHECK-LABEL: testSmoke @constructAndPrintInModule def testSmoke(): From 8e24bc096dcd0013d802e59a45803c51796dec0a Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 14 Feb 2024 09:08:28 -0500 Subject: [PATCH 135/240] [C23] Do not diagnose binary literals as an extension (#81658) We previously would diagnose them as a GNU extension in C mode, but they are now a feature of C23. The -Wgnu-binary-literal warning group no longer controls any diagnostics as this is no longer a GNU extension. The warning group is retained as a noop to help avoid "unknown warning" diagnostics. This also adds the companion compatibility warning which existed for C++ but not for C. Fixes https://github.com/llvm/llvm-project/issues/72017 --- clang/docs/ReleaseNotes.rst | 8 ++++++++ clang/include/clang/Basic/DiagnosticGroups.td | 11 ++++++----- clang/include/clang/Basic/DiagnosticLexKinds.td | 5 ++++- clang/lib/Lex/LiteralSupport.cpp | 16 +++++++++++----- clang/test/C/C2x/n2549.c | 14 ++++++++++++++ clang/test/Lexer/gnu-flags.c | 14 +++----------- 6 files changed, 46 insertions(+), 22 deletions(-) create mode 100644 clang/test/C/C2x/n2549.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e12a802e2e9ede..6cf48d63dd512e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -56,6 +56,12 @@ Clang Frontend Potentially Breaking Changes ``ArrayRef`` reduces AST memory usage by 0.4% when compiling clang, and is expected to show similar improvements on other workloads. +- The ``-Wgnu-binary-literal`` diagnostic group no longer controls any + diagnostics. Binary literals are no longer a GNU extension, they're now a C23 + extension which is controlled via ``-pedantic`` or ``-Wc23-extensions``. Use + of ``-Wno-gnu-binary-literal`` will no longer silence this pedantic warning, + which may break existing uses with ``-Werror``. + Target OS macros extension ^^^^^^^^^^^^^^^^^^^^^^^^^^ A new Clang extension (see :ref:`here `) is enabled for @@ -113,6 +119,8 @@ C Language Changes C23 Feature Support ^^^^^^^^^^^^^^^^^^^ +- No longer diagnose use of binary literals as an extension in C23 mode. Fixes + `#72017 `_. Non-comprehensive list of changes in this release ------------------------------------------------- diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 975eca0ad9b642..7679b8528a4197 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -44,10 +44,8 @@ def DeprecatedModuleDotMap : DiagGroup<"deprecated-module-dot-map">; def FrameworkHdrAtImport : DiagGroup<"atimport-in-framework-header">; def CXX14BinaryLiteral : DiagGroup<"c++14-binary-literal">; def CXXPre14CompatBinaryLiteral : DiagGroup<"c++98-c++11-compat-binary-literal">; -def GNUBinaryLiteral : DiagGroup<"gnu-binary-literal">; def BinaryLiteral : DiagGroup<"binary-literal", [CXX14BinaryLiteral, - CXXPre14CompatBinaryLiteral, - GNUBinaryLiteral]>; + CXXPre14CompatBinaryLiteral]>; def GNUCompoundLiteralInitializer : DiagGroup<"gnu-compound-literal-initializer">; def SingleBitBitFieldConstantConversion : DiagGroup<"single-bit-bitfield-constant-conversion">; @@ -1176,10 +1174,13 @@ def C23 : DiagGroup<"c23-extensions">; def : DiagGroup<"c2x-extensions", [C23]>; +// Previously supported warning group which is no longer pertinent as binary +// literals are a C++14 and C23 extension now instead of a GNU extension. +def GNUBinaryLiteral : DiagGroup<"gnu-binary-literal">; + // A warning group for warnings about GCC extensions. def GNU : DiagGroup<"gnu", [GNUAlignofExpression, GNUAnonymousStruct, - GNUAutoType, - GNUBinaryLiteral, GNUCaseRange, + GNUAutoType, GNUBinaryLiteral, GNUCaseRange, GNUComplexInteger, GNUCompoundLiteralInitializer, GNUConditionalOmittedOperand, GNUDesignator, GNUEmptyStruct, diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 75ca2fa16d3485..1354543612b9fb 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -246,7 +246,10 @@ def warn_cxx17_hex_literal : Warning< "C++ standards before C++17">, InGroup, DefaultIgnore; def ext_binary_literal : Extension< - "binary integer literals are a GNU extension">, InGroup; + "binary integer literals are a C23 extension">, InGroup; +def warn_c23_compat_binary_literal : Warning< + "binary integer literals are incompatible with C standards before C23">, + InGroup, DefaultIgnore; def ext_binary_literal_cxx14 : Extension< "binary integer literals are a C++14 extension">, InGroup; def warn_cxx11_compat_binary_literal : Warning< diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 0a78638f680511..571a984884029d 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -1358,11 +1358,17 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // Handle simple binary numbers 0b01010 if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) { - // 0b101010 is a C++1y / GCC extension. - Diags.Report(TokLoc, LangOpts.CPlusPlus14 - ? diag::warn_cxx11_compat_binary_literal - : LangOpts.CPlusPlus ? diag::ext_binary_literal_cxx14 - : diag::ext_binary_literal); + // 0b101010 is a C++14 and C23 extension. + unsigned DiagId; + if (LangOpts.CPlusPlus14) + DiagId = diag::warn_cxx11_compat_binary_literal; + else if (LangOpts.C23) + DiagId = diag::warn_c23_compat_binary_literal; + else if (LangOpts.CPlusPlus) + DiagId = diag::ext_binary_literal_cxx14; + else + DiagId = diag::ext_binary_literal; + Diags.Report(TokLoc, DiagId); ++s; assert(s < ThisTokEnd && "didn't maximally munch?"); radix = 2; diff --git a/clang/test/C/C2x/n2549.c b/clang/test/C/C2x/n2549.c new file mode 100644 index 00000000000000..817338bcdacc39 --- /dev/null +++ b/clang/test/C/C2x/n2549.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -verify -std=c23 %s +// RUN: %clang_cc1 -verify=pedantic -std=c17 -pedantic %s +// RUN: %clang_cc1 -verify=compat -std=c23 -Wpre-c23-compat %s + +// expected-no-diagnostics + +/* WG14 N2549: Clang 9 + * Binary literals + */ + +int i = 0b01; /* pedantic-warning {{binary integer literals are a C23 extension}} + compat-warning {{binary integer literals are incompatible with C standards before C23}} + */ + diff --git a/clang/test/Lexer/gnu-flags.c b/clang/test/Lexer/gnu-flags.c index 6e47547b009d52..384339fc859429 100644 --- a/clang/test/Lexer/gnu-flags.c +++ b/clang/test/Lexer/gnu-flags.c @@ -1,15 +1,14 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -DNONE -// RUN: %clang_cc1 -fsyntax-only -verify %s -DALL -Wgnu +// RUN: %clang_cc1 -fsyntax-only -verify %s -DALL -Wgnu // RUN: %clang_cc1 -fsyntax-only -verify %s -DALL \ // RUN: -Wgnu-zero-variadic-macro-arguments \ -// RUN: -Wgnu-imaginary-constant -Wgnu-binary-literal -Wgnu-zero-line-directive +// RUN: -Wgnu-imaginary-constant -Wgnu-zero-line-directive // RUN: %clang_cc1 -fsyntax-only -verify %s -DNONE -Wgnu \ // RUN: -Wno-gnu-zero-variadic-macro-arguments \ -// RUN: -Wno-gnu-imaginary-constant -Wno-gnu-binary-literal -Wno-gnu-zero-line-directive +// RUN: -Wno-gnu-imaginary-constant -Wno-gnu-zero-line-directive // Additional disabled tests: // %clang_cc1 -fsyntax-only -verify %s -DZEROARGS -Wgnu-zero-variadic-macro-arguments // %clang_cc1 -fsyntax-only -verify %s -DIMAGINARYCONST -Wgnu-imaginary-constant -// %clang_cc1 -fsyntax-only -verify %s -DBINARYLITERAL -Wgnu-binary-literal // %clang_cc1 -fsyntax-only -verify %s -DLINE0 -Wgnu-zero-line-directive #if NONE @@ -38,13 +37,6 @@ void foo( const char* c ) float _Complex c = 1.if; -#if ALL || BINARYLITERAL -// expected-warning@+3 {{binary integer literals are a GNU extension}} -#endif - -int b = 0b0101; - - // This case is handled differently because lit has a bug whereby #line 0 is reported to be on line 4294967295 // http://llvm.org/bugs/show_bug.cgi?id=16952 #if ALL || LINE0 From 1a1fcacbce805e3c409d9d41de61413e3fd8aa36 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 14 Feb 2024 23:17:00 +0900 Subject: [PATCH 136/240] [MC/DC] Refactor: Introduce `ConditionIDs` as `std::array<2>` (#81221) Its 0th element corresponds to `FalseID` and 1st to `TrueID`. CoverageMappingGen.cpp: `DecisionIDPair` is replaced with `ConditionIDs` --- clang/lib/CodeGen/CoverageMappingGen.cpp | 35 +++++-------- .../ProfileData/Coverage/CoverageMapping.h | 1 - .../llvm/ProfileData/Coverage/MCDCTypes.h | 9 ++-- .../ProfileData/Coverage/CoverageMapping.cpp | 50 +++++++++---------- .../Coverage/CoverageMappingReader.cpp | 6 +-- .../Coverage/CoverageMappingWriter.cpp | 4 +- .../ProfileData/CoverageMappingTest.cpp | 19 ++++--- 7 files changed, 56 insertions(+), 68 deletions(-) diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 3b711c05e92754..93c3c31e71fa83 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -593,11 +593,6 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder { /// creation. struct MCDCCoverageBuilder { - struct DecisionIDPair { - mcdc::ConditionID TrueID = 0; - mcdc::ConditionID FalseID = 0; - }; - /// The AST walk recursively visits nested logical-AND or logical-OR binary /// operator nodes and then visits their LHS and RHS children nodes. As this /// happens, the algorithm will assign IDs to each operator's LHS and RHS side @@ -688,14 +683,14 @@ struct MCDCCoverageBuilder { private: CodeGenModule &CGM; - llvm::SmallVector DecisionStack; + llvm::SmallVector DecisionStack; MCDC::State &MCDCState; llvm::DenseMap &CondIDs; mcdc::ConditionID NextID = 1; bool NotMapped = false; /// Represent a sentinel value of [0,0] for the bottom of DecisionStack. - static constexpr DecisionIDPair DecisionStackSentinel{0, 0}; + static constexpr mcdc::ConditionIDs DecisionStackSentinel{0, 0}; /// Is this a logical-AND operation? bool isLAnd(const BinaryOperator *E) const { @@ -732,7 +727,7 @@ struct MCDCCoverageBuilder { } /// Return the LHS Decision ([0,0] if not set). - const DecisionIDPair &back() const { return DecisionStack.back(); } + const mcdc::ConditionIDs &back() const { return DecisionStack.back(); } /// Push the binary operator statement to track the nest level and assign IDs /// to the operator's LHS and RHS. The RHS may be a larger subtree that is @@ -750,7 +745,7 @@ struct MCDCCoverageBuilder { if (NotMapped) return; - const DecisionIDPair &ParentDecision = DecisionStack.back(); + const mcdc::ConditionIDs &ParentDecision = DecisionStack.back(); // If the operator itself has an assigned ID, this means it represents a // larger subtree. In this case, assign that ID to its LHS node. Its RHS @@ -766,18 +761,18 @@ struct MCDCCoverageBuilder { // Push the LHS decision IDs onto the DecisionStack. if (isLAnd(E)) - DecisionStack.push_back({RHSid, ParentDecision.FalseID}); + DecisionStack.push_back({ParentDecision[false], RHSid}); else - DecisionStack.push_back({ParentDecision.TrueID, RHSid}); + DecisionStack.push_back({RHSid, ParentDecision[true]}); } /// Pop and return the LHS Decision ([0,0] if not set). - DecisionIDPair pop() { + mcdc::ConditionIDs pop() { if (!CGM.getCodeGenOpts().MCDCCoverage || NotMapped) return DecisionStack.front(); assert(DecisionStack.size() > 1); - DecisionIDPair D = DecisionStack.back(); + mcdc::ConditionIDs D = DecisionStack.back(); DecisionStack.pop_back(); return D; } @@ -1026,15 +1021,12 @@ struct CounterCoverageMappingBuilder return (Cond->EvaluateAsInt(Result, CVM.getCodeGenModule().getContext())); } - using MCDCDecisionIDPair = MCDCCoverageBuilder::DecisionIDPair; - /// Create a Branch Region around an instrumentable condition for coverage /// and add it to the function's SourceRegions. A branch region tracks a /// "True" counter and a "False" counter for boolean expressions that /// result in the generation of a branch. - void - createBranchRegion(const Expr *C, Counter TrueCnt, Counter FalseCnt, - const MCDCDecisionIDPair &IDPair = MCDCDecisionIDPair()) { + void createBranchRegion(const Expr *C, Counter TrueCnt, Counter FalseCnt, + const mcdc::ConditionIDs &Conds = {}) { // Check for NULL conditions. if (!C) return; @@ -1047,8 +1039,7 @@ struct CounterCoverageMappingBuilder mcdc::Parameters BranchParams; mcdc::ConditionID ID = MCDCBuilder.getCondID(C); if (ID > 0) - BranchParams = - mcdc::BranchParameters{ID, IDPair.TrueID, IDPair.FalseID}; + BranchParams = mcdc::BranchParameters{ID, Conds}; // If a condition can fold to true or false, the corresponding branch // will be removed. Create a region with both counters hard-coded to @@ -2134,8 +2125,8 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, if (const auto *BranchParams = std::get_if(&R.MCDCParams)) { - OS << " [" << BranchParams->ID << "," << BranchParams->TrueID; - OS << "," << BranchParams->FalseID << "] "; + OS << " [" << BranchParams->ID << "," << BranchParams->Conds[true]; + OS << "," << BranchParams->Conds[false] << "] "; } if (R.Kind == CounterMappingRegion::ExpansionRegion) diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index c6fbdb512b807e..e3b394287f3352 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h b/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h index d7520fa0824243..61272174fef827 100644 --- a/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h +++ b/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h @@ -13,12 +13,14 @@ #ifndef LLVM_PROFILEDATA_COVERAGE_MCDCTYPES_H #define LLVM_PROFILEDATA_COVERAGE_MCDCTYPES_H +#include #include namespace llvm::coverage::mcdc { /// The ID for MCDCBranch. using ConditionID = unsigned int; +using ConditionIDs = std::array; struct DecisionParameters { /// Byte Index of Bitmap Coverage Object for a Decision Region. @@ -35,11 +37,12 @@ struct DecisionParameters { struct BranchParameters { /// IDs used to represent a branch region and other branch regions /// evaluated based on True and False branches. - ConditionID ID, TrueID, FalseID; + ConditionID ID; + ConditionIDs Conds; BranchParameters() = delete; - BranchParameters(ConditionID ID, ConditionID TrueID, ConditionID FalseID) - : ID(ID), TrueID(TrueID), FalseID(FalseID) {} + BranchParameters(ConditionID ID, const ConditionIDs &Conds) + : ID(ID), Conds(Conds) {} }; /// The type of MC/DC-specific parameters. diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index 80b80f7a26f454..9adeceb1faee2b 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -246,7 +246,7 @@ class MCDCRecordProcessor { unsigned BitmapIdx; /// Mapping of a condition ID to its corresponding branch params. - llvm::DenseMap BranchParamsMap; + llvm::DenseMap CondsMap; /// Vector used to track whether a condition is constant folded. MCDCRecord::BoolVector Folded; @@ -269,38 +269,34 @@ class MCDCRecordProcessor { Folded(NumConditions, false), IndependencePairs(NumConditions) {} private: - void recordTestVector(MCDCRecord::TestVector &TV, unsigned Index, - MCDCRecord::CondState Result) { - if (!Bitmap[BitmapIdx + Index]) - return; - - // Copy the completed test vector to the vector of testvectors. - ExecVectors.push_back(TV); - - // The final value (T,F) is equal to the last non-dontcare state on the - // path (in a short-circuiting system). - ExecVectors.back().push_back(Result); - } - // Walk the binary decision diagram and try assigning both false and true to // each node. When a terminal node (ID == 0) is reached, fill in the value in // the truth table. void buildTestVector(MCDCRecord::TestVector &TV, unsigned ID, unsigned Index) { - auto [UnusedID, TrueID, FalseID] = *BranchParamsMap[ID]; + assert((Index & (1 << (ID - 1))) == 0); + + for (auto MCDCCond : {MCDCRecord::MCDC_False, MCDCRecord::MCDC_True}) { + static_assert(MCDCRecord::MCDC_False == 0); + static_assert(MCDCRecord::MCDC_True == 1); + Index |= MCDCCond << (ID - 1); + TV[ID - 1] = MCDCCond; + auto NextID = CondsMap[ID][MCDCCond]; + if (NextID > 0) { + buildTestVector(TV, NextID, Index); + continue; + } - TV[ID - 1] = MCDCRecord::MCDC_False; - if (FalseID > 0) - buildTestVector(TV, FalseID, Index); - else - recordTestVector(TV, Index, MCDCRecord::MCDC_False); + if (!Bitmap[BitmapIdx + Index]) + continue; - Index |= 1 << (ID - 1); - TV[ID - 1] = MCDCRecord::MCDC_True; - if (TrueID > 0) - buildTestVector(TV, TrueID, Index); - else - recordTestVector(TV, Index, MCDCRecord::MCDC_True); + // Copy the completed test vector to the vector of testvectors. + ExecVectors.push_back(TV); + + // The final value (T,F) is equal to the last non-dontcare state on the + // path (in a short-circuiting system). + ExecVectors.back().push_back(MCDCCond); + } // Reset back to DontCare. TV[ID - 1] = MCDCRecord::MCDC_DontCare; @@ -374,7 +370,7 @@ class MCDCRecordProcessor { // from being measured. for (const auto *B : Branches) { const auto &BranchParams = B->getBranchParams(); - BranchParamsMap[BranchParams.ID] = &BranchParams; + CondsMap[BranchParams.ID] = BranchParams.Conds; PosToID[I] = BranchParams.ID - 1; CondLoc[I] = B->startLoc(); Folded[I++] = (B->Count.isZero() && B->FalseCount.isZero()); diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index d528d9aa95648f..de7be523ef33ca 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -313,9 +313,9 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray( return make_error( coveragemap_error::malformed, "MCDCConditionID shouldn't be zero"); - Params = mcdc::BranchParameters{static_cast(ID), - static_cast(TID), - static_cast(FID)}; + Params = mcdc::BranchParameters{ + static_cast(ID), + {static_cast(FID), static_cast(TID)}}; break; case CounterMappingRegion::MCDCDecisionRegion: Kind = CounterMappingRegion::MCDCDecisionRegion; diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index 3267afdbe15c28..6125cce0fa4cd9 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -257,8 +257,8 @@ void CoverageMappingWriter::write(raw_ostream &OS) { ParamsShouldBeNull = false; assert(BranchParams.ID > 0); encodeULEB128(static_cast(BranchParams.ID), OS); - encodeULEB128(static_cast(BranchParams.TrueID), OS); - encodeULEB128(static_cast(BranchParams.FalseID), OS); + encodeULEB128(static_cast(BranchParams.Conds[true]), OS); + encodeULEB128(static_cast(BranchParams.Conds[false]), OS); } break; case CounterMappingRegion::MCDCDecisionRegion: diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp index 6f6718fbd94591..db6689bc58839c 100644 --- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp +++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp @@ -200,14 +200,13 @@ struct CoverageMappingTest : ::testing::TestWithParam> { mcdc::DecisionParameters{Mask, NC}, FileID, LS, CS, LE, CE)); } - void addMCDCBranchCMR(Counter C1, Counter C2, unsigned ID, unsigned TrueID, - unsigned FalseID, StringRef File, unsigned LS, + void addMCDCBranchCMR(Counter C1, Counter C2, mcdc::ConditionID ID, + mcdc::ConditionIDs Conds, StringRef File, unsigned LS, unsigned CS, unsigned LE, unsigned CE) { auto &Regions = InputFunctions.back().Regions; unsigned FileID = getFileIndexForFunction(File); Regions.push_back(CounterMappingRegion::makeBranchRegion( - C1, C2, FileID, LS, CS, LE, CE, - mcdc::BranchParameters{ID, TrueID, FalseID})); + C1, C2, FileID, LS, CS, LE, CE, mcdc::BranchParameters{ID, Conds})); } void addExpansionCMR(StringRef File, StringRef ExpandedFile, unsigned LS, @@ -873,9 +872,9 @@ TEST_P(CoverageMappingTest, non_code_region_bitmask) { addCMR(Counter::getCounter(3), "file", 1, 1, 5, 5); addMCDCDecisionCMR(0, 2, "file", 7, 1, 7, 6); - addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 1, 2, 0, + addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 1, {0, 2}, "file", 7, 2, 7, 3); - addMCDCBranchCMR(Counter::getCounter(2), Counter::getCounter(3), 2, 0, 0, + addMCDCBranchCMR(Counter::getCounter(2), Counter::getCounter(3), 2, {0, 0}, "file", 7, 4, 7, 5); EXPECT_THAT_ERROR(loadCoverageMapping(), Succeeded()); @@ -901,11 +900,11 @@ TEST_P(CoverageMappingTest, decision_before_expansion) { addExpansionCMR("foo", "B", 4, 19, 4, 20); addCMR(Counter::getCounter(0), "A", 1, 14, 1, 17); addCMR(Counter::getCounter(0), "A", 1, 14, 1, 17); - addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 1, 2, 0, "A", - 1, 14, 1, 17); + addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 1, {0, 2}, + "A", 1, 14, 1, 17); addCMR(Counter::getCounter(1), "B", 1, 14, 1, 17); - addMCDCBranchCMR(Counter::getCounter(1), Counter::getCounter(2), 2, 0, 0, "B", - 1, 14, 1, 17); + addMCDCBranchCMR(Counter::getCounter(1), Counter::getCounter(2), 2, {0, 0}, + "B", 1, 14, 1, 17); // InputFunctionCoverageData::Regions is rewritten after the write. auto InputRegions = InputFunctions.back().Regions; From 43c7eb5d7b237bc18385f0a5529f1e4b8bf4d6a3 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Wed, 14 Feb 2024 15:19:04 +0100 Subject: [PATCH 137/240] [AMDGPU] Replace '.' with '-' in generic target names (#81718) The dot is too confusing for tools. Output temporaries would have '10.3-generic' so tools could parse it as an extension, device libs & the associated clang driver logic are also confused by the dot. After discussions, we decided it's better to just remove the '.' from the target name than fix each issue one by one. --- clang/lib/Basic/Targets/AMDGPU.cpp | 3 +-- clang/test/Driver/amdgpu-macros.cl | 4 ++-- clang/test/Driver/amdgpu-mcpu.cl | 8 ++++---- clang/test/Misc/target-invalid-cpu-note.c | 2 +- llvm/docs/AMDGPUUsage.rst | 8 ++++---- llvm/lib/Object/ELFObjectFile.cpp | 4 ++-- llvm/lib/Target/AMDGPU/GCNProcessors.td | 4 ++-- llvm/lib/TargetParser/TargetParser.cpp | 4 ++-- .../CodeGen/AMDGPU/directive-amdgcn-target.ll | 12 ++++++------ .../test/CodeGen/AMDGPU/elf-header-flags-mach.ll | 4 ++-- .../CodeGen/AMDGPU/generic-targets-require-v6.ll | 12 ++++++------ .../AMDGPU/hsa-generic-target-features.ll | 16 ++++++++-------- .../AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll | 2 +- .../AMDGPU/llvm.amdgcn.image.sample.dim.ll | 2 +- .../tools/llvm-objdump/ELF/AMDGPU/subtarget.ll | 8 ++++---- llvm/tools/llvm-readobj/ELFDumper.cpp | 4 ++-- 16 files changed, 48 insertions(+), 49 deletions(-) diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 10cba6b7eac5cc..5742885df0461b 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -285,10 +285,9 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, : getArchNameR600(GPUKind)); // Sanitize the name of generic targets. - // e.g. gfx10.1-generic -> gfx10_1_generic + // e.g. gfx10-1-generic -> gfx10_1_generic if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST && GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) { - std::replace(CanonName.begin(), CanonName.end(), '.', '_'); std::replace(CanonName.begin(), CanonName.end(), '-', '_'); } diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index 3b10444ef71d36..004619321b271f 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -132,8 +132,8 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10-1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10-3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11 // ARCH-GCN-DAG: #define FP_FAST_FMA 1 diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl index 6f18ea0615cb69..915fa6473ac07f 100644 --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -116,8 +116,8 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s // RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s -// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s -// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx10-1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx10-3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s // RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s // GCNDEFAULT-NOT: -target-cpu @@ -167,6 +167,6 @@ // GFX1201: "-target-cpu" "gfx1201" // GFX9_GENERIC: "-target-cpu" "gfx9-generic" -// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic" -// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic" +// GFX10_1_GENERIC: "-target-cpu" "gfx10-1-generic" +// GFX10_3_GENERIC: "-target-cpu" "gfx10-3-generic" // GFX11_GENERIC: "-target-cpu" "gfx11-generic" diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 123b203af3e9ca..ef2c1c0cfd3d29 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -37,7 +37,7 @@ // RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN // AMDGCN: error: unknown target CPU 'not-a-cpu' -// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201, gfx9-generic, gfx10.1-generic, gfx10.3-generic, gfx11-generic{{$}} +// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201, gfx9-generic, gfx10-1-generic, gfx10-3-generic, gfx11-generic{{$}} // RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM // WEBASM: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 8c126906fbfd51..0c588c84958624 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -567,7 +567,7 @@ greater than or equal to the version in which the processor was added to the gen - ``v_dot2_f32_f16`` - ``gfx10.1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are + ``gfx10-1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are - ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011`` - ``gfx1012`` - cumode and ``gfx1012`` - ``gfx1013`` @@ -586,7 +586,7 @@ greater than or equal to the version in which the processor was added to the gen ``gfx1013`` - ``gfx10.3-generic`` ``amdgcn`` - ``gfx1030`` - wavefrontsize64 - Absolute flat No restrictions. + ``gfx10-3-generic`` ``amdgcn`` - ``gfx1030`` - wavefrontsize64 - Absolute flat No restrictions. - ``gfx1031`` - cumode scratch - ``gfx1032`` - ``gfx1033`` @@ -1860,8 +1860,8 @@ The AMDGPU backend uses the following ELF header: *reserved* 0x04f Reserved. *reserved* 0x050 Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic`` - ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10.1-generic`` - ``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10.3-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10-3-generic`` ``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic`` *reserved* 0x055 Reserved. ========================================== ========== ============================= diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 01949c6dad81fc..33be48196ae7d2 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -519,9 +519,9 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: return "gfx9-generic"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: - return "gfx10.1-generic"; + return "gfx10-1-generic"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: - return "gfx10.3-generic"; + return "gfx10-3-generic"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: return "gfx11-generic"; default: diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 4671e03d43b3ab..f7e091904bad36 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -258,12 +258,12 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel, >; // [gfx1010, gfx1011, gfx1012, gfx1013] -def : ProcessorModel<"gfx10.1-generic", GFX10SpeedModel, +def : ProcessorModel<"gfx10-1-generic", GFX10SpeedModel, FeatureISAVersion10_1_Generic.Features >; // [gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036] -def : ProcessorModel<"gfx10.3-generic", GFX10SpeedModel, +def : ProcessorModel<"gfx10-3-generic", GFX10SpeedModel, FeatureISAVersion10_3_Generic.Features >; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 684d698521e595..a31027c59ee9d7 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -128,8 +128,8 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, - {{"gfx10.1-generic"}, {"gfx10.1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, - {{"gfx10.3-generic"}, {"gfx10.3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, + {{"gfx10-1-generic"}, {"gfx10-1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, + {{"gfx10-3-generic"}, {"gfx10-3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, // clang-format on }; diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll index 038219fc374047..2979b52d00e2ed 100644 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -110,9 +110,9 @@ ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck --check-prefixes=GFX11_GENERIC %s ; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" @@ -205,9 +205,9 @@ ; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-" ; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+" -; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack-" -; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack+" -; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.3-generic" +; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-1-generic:xnack-" +; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-1-generic:xnack+" +; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-3-generic" ; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic" define amdgpu_kernel void @directive_amdgcn_target() { diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll index 9ba8176947174c..5f4bfe7ea9d5f7 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -78,8 +78,8 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s ; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s -; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s -; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s ; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s ; FIXME: With the default attributes the eflags are not accurate for diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll index e3f4b14bac0c16..482f61624ec7df 100644 --- a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll +++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll @@ -1,16 +1,16 @@ ; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s -; RUN: not llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s -; RUN: not llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s +; RUN: not llc -march=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s +; RUN: not llc -march=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s ; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s ; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s -; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -o - %s -; RUN: llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -march=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -march=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=6 -o - %s ; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s ; GFX9-V5: gfx9-generic is only available on code object version 6 or better -; GFX101-V5: gfx10.1-generic is only available on code object version 6 or better -; GFX103-V5: gfx10.3-generic is only available on code object version 6 or better +; GFX101-V5: gfx10-1-generic is only available on code object version 6 or better +; GFX103-V5: gfx10-3-generic is only available on code object version 6 or better ; GFX11-V5: gfx11-generic is only available on code object version 6 or better define void @foo() { diff --git a/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll index 4fee563d1cc93b..a2d9bbf575b456 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll @@ -1,14 +1,14 @@ -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic < %s | FileCheck -check-prefix=CU %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck -check-prefix=CU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic < %s | FileCheck -check-prefix=CU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-3-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-3-generic < %s | FileCheck -check-prefix=CU %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck -check-prefix=CU %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s -; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-3-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-3-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll index cf324d62e1de1e..3a5a60896ee245 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s -; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll index 10e1ae3ecfcbd4..c8421c66f97c38 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -3,7 +3,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll index ca136a6a0d5ba2..bcec6ce9ed376e 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -54,8 +54,8 @@ define amdgpu_kernel void @test_kernel() { ; RUN: diff %t-specify.txt %t-detect.txt ; ----------------------------------GFX10-------------------------------------- -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.3-generic -filetype=obj -O0 -o %t.o %s -; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.3-generic %t.o > %t-specify.txt +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10-3-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10-3-generic %t.o > %t-specify.txt ; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt ; RUN: diff %t-specify.txt %t-detect.txt @@ -94,8 +94,8 @@ define amdgpu_kernel void @test_kernel() { ; RUN: llvm-objdump -D %t.o > %t-detect.txt ; RUN: diff %t-specify.txt %t-detect.txt -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.1-generic -filetype=obj -O0 -o %t.o %s -; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.1-generic %t.o > %t-specify.txt +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10-1-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10-1-generic %t.o > %t-specify.txt ; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt ; RUN: diff %t-specify.txt %t-detect.txt diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 8e68f08c3fa9ab..50ea63e87a43be 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1622,8 +1622,8 @@ const EnumEntry ElfHeaderMipsFlags[] = { ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10.1-generic"), \ - ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10.3-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10-1-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10-3-generic"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic") // clang-format on From dd1897c6cb028bda7d4d541d1bb33965eccf0a68 Mon Sep 17 00:00:00 2001 From: Philipp Tomsich Date: Wed, 14 Feb 2024 06:23:14 -0800 Subject: [PATCH 138/240] [AArch64] Initial Ampere1B scheduling model (#81341) The Ampere1B core is enabled with a new scheduling/pipeline model, as it provides significant updates over the Ampere1 core; it reduces latencies on many instructions, has some micro-ops reassigned between the XY and X units, and provides modelling for the instructions added since Ampere1 and Ampere1A. As this is the first model implementing the CSSC instructions, we update the UnsupportedFeatures on all other models (that have CompleteModel set). Testcases are added under llvm-mca: these showed the FullFP16 feature missing, so we are adding it in as part of this commit. This *adds tests and additional fixes* compared to the reverted #81338. --- llvm/lib/Target/AArch64/AArch64.td | 5 +- llvm/lib/Target/AArch64/AArch64SchedA53.td | 2 +- llvm/lib/Target/AArch64/AArch64SchedA57.td | 2 +- llvm/lib/Target/AArch64/AArch64SchedA64FX.td | 3 +- .../Target/AArch64/AArch64SchedAmpere1B.td | 1149 +++++ .../lib/Target/AArch64/AArch64SchedCyclone.td | 2 +- .../Target/AArch64/AArch64SchedExynosM3.td | 2 +- .../Target/AArch64/AArch64SchedExynosM4.td | 2 +- .../Target/AArch64/AArch64SchedExynosM5.td | 2 +- llvm/lib/Target/AArch64/AArch64SchedFalkor.td | 2 +- llvm/lib/Target/AArch64/AArch64SchedKryo.td | 2 +- .../Target/AArch64/AArch64SchedNeoverseN1.td | 2 +- .../Target/AArch64/AArch64SchedNeoverseN2.td | 2 +- .../Target/AArch64/AArch64SchedNeoverseV1.td | 3 +- .../Target/AArch64/AArch64SchedNeoverseV2.td | 3 +- llvm/lib/Target/AArch64/AArch64SchedTSV110.td | 2 +- .../Target/AArch64/AArch64SchedThunderX.td | 2 +- .../AArch64/AArch64SchedThunderX2T99.td | 2 +- .../AArch64/AArch64SchedThunderX3T110.td | 2 +- .../Ampere/Ampere1B/basic-instructions.s | 3724 +++++++++++++++++ .../Ampere/Ampere1B/cssc-instructions.s | 76 + .../Ampere/Ampere1B/mte-instructions.s | 349 ++ .../Ampere/Ampere1B/neon-instructions.s | 3235 ++++++++++++++ .../Ampere/Ampere1B/shifted-register.s | 31 + 24 files changed, 8587 insertions(+), 19 deletions(-) create mode 100644 llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td create mode 100644 llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/basic-instructions.s create mode 100644 llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/cssc-instructions.s create mode 100644 llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/mte-instructions.s create mode 100644 llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/neon-instructions.s create mode 100644 llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/shifted-register.s diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 3377fcf1447282..169b00e5ebc989 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -837,6 +837,7 @@ include "AArch64SchedA64FX.td" include "AArch64SchedThunderX3T110.td" include "AArch64SchedTSV110.td" include "AArch64SchedAmpere1.td" +include "AArch64SchedAmpere1B.td" include "AArch64SchedNeoverseN1.td" include "AArch64SchedNeoverseN2.td" include "AArch64SchedNeoverseV1.td" @@ -1555,7 +1556,7 @@ def ProcessorFeatures { FeatureMTE, FeatureSSBS, FeatureRandGen, FeatureSB, FeatureSM4, FeatureSHA2, FeatureSHA3, FeatureAES, FeatureCSSC, - FeatureWFxT]; + FeatureWFxT, FeatureFullFP16]; // ETE and TRBE are future architecture extensions. We temporarily enable them // by default for users targeting generic AArch64. The extensions do not @@ -1723,7 +1724,7 @@ def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A, [TuneAmpere1A]>; -def : ProcessorModel<"ampere1b", Ampere1Model, ProcessorFeatures.Ampere1B, +def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B, [TuneAmpere1B]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td index 3e4168f5f445f5..c714bad92b7fbb 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA53.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -29,7 +29,7 @@ def CortexA53Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td index 277ec772cf0f10..ebbc3b72b50609 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -34,7 +34,7 @@ def CortexA57Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index 7edce4b61605d2..d6fe84a2c9c9b4 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -22,7 +22,8 @@ def A64FXModel : SchedMachineModel { list UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F, [HasMTE, HasMatMulInt8, HasBF16, - HasPAuth, HasPAuthLR, HasCPA]); + HasPAuth, HasPAuthLR, HasCPA, + HasCSSC]); let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td new file mode 100644 index 00000000000000..9c4f000cf351b2 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td @@ -0,0 +1,1149 @@ +//=- AArch64SchedAmpere1B.td - Ampere-1B scheduling def -----*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the Ampere Computing Ampere-1B to +// support instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +// The Ampere-1B core is an out-of-order micro-architecture. The front +// end has branch prediction, with a 10-cycle recovery time from a +// mispredicted branch. Instructions coming out of the front end are +// decoded into internal micro-ops (uops). + +def Ampere1BModel : SchedMachineModel { + let IssueWidth = 12; // Maximum micro-ops dispatch rate. + let MicroOpBufferSize = 192; // micro-op re-order buffer size + let LoadLatency = 3; // Optimistic load latency + let MispredictPenalty = 10; // Branch mispredict penalty + let LoopMicroOpBufferSize = 32; // Instruction queue size + let CompleteModel = 1; + + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + SMEUnsupported.F, + PAUnsupported.F); +} + +let SchedModel = Ampere1BModel in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Ampere-1B. + +def Ampere1BUnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w +def Ampere1BUnitB : ProcResource<2>; // integer single-cycle, and complex shifts +def Ampere1BUnitBS : ProcResource<1>; // integer multi-cycle +def Ampere1BUnitL : ProcResource<2>; // load +def Ampere1BUnitS : ProcResource<2>; // store address calculation +def Ampere1BUnitX : ProcResource<1>; // FP and vector operations, and flag write +def Ampere1BUnitY : ProcResource<1>; // FP and vector operations, and crypto +def Ampere1BUnitZ : ProcResource<1>; // FP store data and FP-to-integer moves + +def Ampere1BUnitAB : ProcResGroup<[Ampere1BUnitA, Ampere1BUnitB]>; +def Ampere1BUnitXY : ProcResGroup<[Ampere1BUnitX, Ampere1BUnitY]>; + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Ampere-1. + +def Ampere1BWrite_1cyc_1A : SchedWriteRes<[Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_2A : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1B : SchedWriteRes<[Ampere1BUnitB]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1BS_1B : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitB]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1AB : SchedWriteRes<[Ampere1BUnitAB]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1AB_1A : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1S : SchedWriteRes<[Ampere1BUnitS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_2S : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1Y : SchedWriteRes<[Ampere1BUnitY]> { + let Latency = 2; + let NumMicroOps = 1; +} + +def Ampere1BWrite_2cyc_2AB : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1AB : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1S : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1S_1AB : SchedWriteRes<[Ampere1BUnitB, + Ampere1BUnitS, + Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1BWrite_2cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ, + Ampere1BUnitZ]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1BWrite_2cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 2; + let NumMicroOps = 1; +} + +def Ampere1BWrite_2cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_3cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1Z : SchedWriteRes<[Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def Ampere1BWrite_3cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def Ampere1BWrite_3cyc_2S_2Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def Ampere1BWrite_4cyc_1BS_1AB : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitAB]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_2L : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1L_1B : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitB]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_5cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1BUnitXY, + Ampere1BUnitS, + Ampere1BUnitZ]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def Ampere1BWrite_4cyc_3S_3Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def Ampere1BWrite_5cyc_4S_4Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 5; + let NumMicroOps = 8; +} + +def Ampere1BWrite_5cyc_1L_1BS : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitBS]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def Ampere1BWrite_5cyc_3L : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def Ampere1BWrite_5cyc_4L : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL]> { + let Latency = 5; + let NumMicroOps = 4; +} + +def Ampere1BWrite_5cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1BWrite_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 5; + let NumMicroOps = 6; +} + +def Ampere1BWrite_6cyc_1BS_1A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_1BS_2A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA, + Ampere1BUnitA]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def Ampere1BWrite_6cyc_1L_1XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_2L_2XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def Ampere1BWrite_6cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def Ampere1BWrite_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 6; + let NumMicroOps = 6; +} + +def Ampere1BWrite_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 6; + let NumMicroOps = 9; +} + +def Ampere1BWrite_7cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_1XY_1Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_1X_1Z : SchedWriteRes<[Ampere1BUnitX, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 6; +} + +def Ampere1BWrite_7cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 8; +} + +def Ampere1BWrite_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 12; +} + +def Ampere1BWrite_8cyc_1BS_1L : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitL]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_2L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 5; +} + +def Ampere1BWrite_8cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 6; +} + +def Ampere1BWrite_8cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 8; +} + +def Ampere1BWrite_8cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_4XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 4; +} + +def Ampere1BWrite_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 9; + let NumMicroOps = 14; +} + +def Ampere1BWrite_9cyc_1A_1BS_1X : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitX]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_9cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_9cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 6; +} + +def Ampere1BWrite_9cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 9; + let NumMicroOps = 1; +} + +def Ampere1BWrite_9cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_10cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 10; + let NumMicroOps = 12; +} + +def Ampere1BWrite_11cyc_1BS_2XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 11; + let NumMicroOps = 3; +} + +def Ampere1BWrite_11cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 11; + let NumMicroOps = 12; +} + +def Ampere1BWrite_12cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 12; + let NumMicroOps = 1; +} + +def Ampere1BWrite_13cyc_1BS_1X : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitX]> { + let Latency = 13; + let NumMicroOps = 2; +} + +def Ampere1BWrite_17cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 17; + let NumMicroOps = 1; +} + +def Ampere1BWrite_19cyc_2BS_1X : SchedWriteRes<[Ampere1BUnitBS, + Ampere1BUnitBS, + Ampere1BUnitX]> { + let Latency = 13; + let NumMicroOps = 3; +} + +def Ampere1BWrite_19cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 19; + let NumMicroOps = 1; +} + +def Ampere1BWrite_21cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 21; + let NumMicroOps = 1; +} + +def Ampere1BWrite_33cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 33; + let NumMicroOps = 1; +} + +def Ampere1BWrite_39cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 39; + let NumMicroOps = 1; +} + +def Ampere1BWrite_63cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 63; + let NumMicroOps = 1; +} + +// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), +// which are a single uop, and for extended registers, which have full flexibility +// across Unit A or B for both uops. +def Ampere1BWrite_Arith : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar]>; + +def Ampere1BWrite_ArithFlagsetting : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar]>; + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latencies for Ampere-1. +// This provides a coarse model, which is then specialised below. + +def : WriteRes; // MOVN, MOVZ +def : WriteRes; // ALU +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} // ALU of Shifted-Reg +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} // ALU of Extended-Reg +def : WriteRes; // EXTR shifts a reg pair +def : WriteRes; // Shift/Scale +def : WriteRes { + let Latency = 13; +} // 32-bit Divide +def : WriteRes { + let Latency = 19; +} // 64-bit Divide +def : WriteRes { + let Latency = 3; +} // 32-bit Multiply +def : WriteRes { + let Latency = 3; +} // 64-bit Multiply +def : WriteRes; +def : WriteRes; +def : WriteRes { + let Latency = 3; +} // Load from base addr plus immediate offset +def : WriteRes { + let Latency = 1; +} // Store to base addr plus immediate offset +def : WriteRes { + let Latency = 1; + let NumMicroOps = 1; +} // Store a register pair. +def : WriteRes; +def : WriteRes { + let Latency = 3; + let NumMicroOps = 1; +} // Load from a register index (maybe scaled). +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} // Store to a register index (maybe scaled). +def : WriteRes { + let Latency = 2; +} // General floating-point ops. +def : WriteRes { + let Latency = 3; +} // Floating-point compare. +def : WriteRes { + let Latency = 3; +} // Float conversion. +def : WriteRes { +} // Float-int register copy. +def : WriteRes { + let Latency = 2; +} // Float-int register copy. +def : WriteRes { + let Latency = 4; +} // Floating-point multiply. +def : WriteRes { + let Latency = 19; +} // Floating-point division. +def : WriteRes { + let Latency = 3; +} // 64bit Vector D ops. +def : WriteRes { + let Latency = 3; +} // 128bit Vector Q ops. +def : WriteRes { + let Latency = 4; +} // Vector loads. +def : WriteRes { + let Latency = 2; +} // Vector stores. + +def : WriteRes { let Unsupported = 1; } + +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +def : WriteRes { + let Latency = 3; +} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP + +// Forwarding logic. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Specialising the scheduling model further for Ampere-1B. + +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs COPY)>; + +// Branch instructions +def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs Bcc, BL, RET)>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; +def : InstRW<[Ampere1BWrite_1cyc_2A], (instrs BLR)>; + +// Common Short Sequence Compression (CSSC) +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instregex "^ABS[WX]")>; +def : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CNT[WX]")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "^CTZ[WX]")>; +def : InstRW<[Ampere1BWrite_1cyc_1AB_1A], (instregex "^[SU](MAX|MIN)[WX]")>; + +// Cryptography instructions +// -- AES encryption/decryption +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AES[DE]")>; +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AESI?MC")>; +// -- Polynomial multiplication +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; +// -- SHA-256 hash +def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA256(H|H2)")>; +// -- SHA-256 schedule update +def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA256SU[01]")>; +// -- SHA-3 instructions +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; +// -- SHA-512 hash +def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA512(H|H2)")>; +// -- SHA-512 schedule update +def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA512SU[01]")>; +// -- SHA1 choose/majority/parity +def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA1[CMP]")>; +// -- SHA1 hash/schedule update +def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1SU[01]")>; +def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1H")>; +// -- SM3 hash +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SM3PARTW[12]$", "^SM3SS1$", "^SM3TT[12][AB]$")>; +def : InstRW<[Ampere1BWrite_4cyc_1X], (instrs SM4E, SM4ENCKEY)>; + +// FP and vector load instructions +// -- Load 1-element structure to one/all lanes +// ---- all lanes +def : InstRW<[Ampere1BWrite_6cyc_1L_1XY], + (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// ---- one lane +def : InstRW<[Ampere1BWrite_6cyc_1L_1XY], + (instregex "^LD1i(8|16|32|64)")>; +// -- Load 1-element structure to one/all lanes, 1D size +def : InstRW<[Ampere1BWrite_4cyc_1L], + (instregex "^LD1Rv1d")>; +// -- Load 1-element structures to 1 register +def : InstRW<[Ampere1BWrite_4cyc_1L], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 2 registers +def : InstRW<[Ampere1BWrite_4cyc_2L], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 3 registers +def : InstRW<[Ampere1BWrite_5cyc_3L], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 4 registers +def : InstRW<[Ampere1BWrite_5cyc_4L], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 2-element structure to all lanes of 2 registers, 1D size +def : InstRW<[Ampere1BWrite_4cyc_2L], + (instregex "^LD2Rv1d")>; +// -- Load 2-element structure to all lanes of 2 registers, other sizes +def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], + (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 2-element structure to one lane of 2 registers +def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], + (instregex "^LD2i(8|16|32|64)")>; +// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size +def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], + (instregex "^LD2Twov(16b|8h|4s|2d)")>; +// -- Load 2-element structures to 2 registers, 8B/4H/2S size +def : InstRW<[Ampere1BWrite_8cyc_2L_3XY], + (instregex "^LD2Twov(8b|4h|2s)")>; +// -- Load 3-element structure to all lanes of 3 registers, 1D size +def : InstRW<[Ampere1BWrite_5cyc_3L], + (instregex "^LD3Rv1d")>; +// -- Load 3-element structure to all lanes of 3 registers, other sizes +def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], + (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 3-element structure to one lane of 3 registers +def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], + (instregex "^LD3i(8|16|32|64)")>; +// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes +def : InstRW<[Ampere1BWrite_8cyc_3L_3XY], + (instregex "^LD3Threev(16b|8h|4s)")>; +// -- Load 3-element structures to 3 registers, 2D size +def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], + (instregex "^LD3Threev2d")>; +// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1BWrite_9cyc_3L_3XY], + (instregex "^LD3Threev(8b|4h|2s)")>; +// -- Load 4-element structure to all lanes of 4 registers, 1D size +def : InstRW<[Ampere1BWrite_5cyc_4L], + (instregex "^LD4Rv1d")>; +// -- Load 4-element structure to all lanes of 4 registers, other sizes +def : InstRW<[Ampere1BWrite_7cyc_4L_4XY], + (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 4-element structure to one lane of 4 registers +def : InstRW<[Ampere1BWrite_7cyc_4L_4XY], + (instregex "^LD4i(8|16|32|64)")>; +// -- Load 4-element structures to 4 registers, 2D size +def : InstRW<[Ampere1BWrite_8cyc_4L_4XY], + (instregex "^LD4Fourv2d")>; +// -- Load 4-element structures to 4 registers, 2S size +def : InstRW<[Ampere1BWrite_11cyc_4L_8XY], + (instregex "^LD4Fourv2s")>; +// -- Load 4-element structures to 4 registers, other sizes +def : InstRW<[Ampere1BWrite_10cyc_4L_8XY], + (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; +// -- Load pair, Q-form +def : InstRW<[Ampere1BWrite_4cyc_2L], (instregex "LDN?PQ")>; +// -- Load pair, S/D-form +def : InstRW<[Ampere1BWrite_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; +// -- Load register +def : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDU?R[BHSDQ]i")>; +// -- Load register, sign-extended register +def : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; + +// FP and vector store instructions +// -- Store 1-element structure from one lane of 1 register +def : InstRW<[Ampere1BWrite_4cyc_1XY_1S_1Z], + (instregex "^ST1i(8|16|32|64)")>; +// -- Store 1-element structures from 1 register +def : InstRW<[Ampere1BWrite_2cyc_1S_1Z], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 2 registers +def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 3 registers +def : InstRW<[Ampere1BWrite_4cyc_3S_3Z], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 4 registers +def : InstRW<[Ampere1BWrite_5cyc_4S_4Z], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 2-element structure from one lane of 2 registers +def : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], + (instregex "^ST2i(8|16|32|64)")>; +// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes +def : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], + (instregex "^ST2Twov(16b|8h|4s|2d)")>; +// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1BWrite_6cyc_2XY_2S_2Z], + (instregex "^ST2Twov(8b|4h|2s)")>; +// -- Store 3-element structure from one lane of 3 registers +def : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], + (instregex "^ST3i(8|16|32|64)")>; +// -- Store 3-element structures from 3 registers +def : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], + (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 4-element structure from one lane of 4 registers +def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], + (instregex "^ST4i(8|16|32|64)")>; +// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes +def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], + (instregex "^ST4Fourv(16b|8h|4s)")>; +// -- Store 4-element structures from 4 registers, 2D sizes +def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], + (instregex "^ST4Fourv2d")>; +// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1BWrite_9cyc_6XY_4S_4Z], + (instregex "^ST4Fourv(8b|4h|2s)")>; +// -- Store pair, Q-form +def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?PQ")>; +// -- Store pair, S/D-form +def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?P[SD]")>; +// -- Store register +def : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>; +// -- Store register, sign-extended register offset +def : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; + +// FP data processing, bfloat16 format +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFCVT)>; +def : InstRW<[Ampere1BWrite_8cyc_2XY], (instrs BFCVTN, BFCVTN2)>; +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFMMLA)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^BFMLAL")>; + +// FP data processing, scalar/vector, half precision +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; +def : InstRW<[Ampere1BWrite_3cyc_1X], + (instregex "^FCMPE?H")>; +def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], + (instregex "^FCCMPE?H")>; +def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], + (instregex "^FCSELH")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; +// Convert FP to integer, H-form +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi]16")>; +// Convert to FP from GPR, H-form +def : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]toH$")>; +// Convert to FP from GPR, fixed-point, H-form +def : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX]Hri$")>; +def : InstRW<[Ampere1BWrite_9cyc_1X], (instrs FDIVHrr)>; +def : InstRW<[Ampere1BWrite_17cyc_1X], (instregex "^FDIVv.[if]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; +def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; +def : InstRW<[Ampere1BWrite_9cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX16)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if]16")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; +// FP square root, H-form +def : InstRW<[Ampere1BWrite_21cyc_1X], (instrs FSQRTHr)>; +// FP square root, vector-form, F16 +def : InstRW<[Ampere1BWrite_39cyc_1X], (instregex "^FSQRTv.f16")>; + +// FP data processing, scalar/vector, single/double precision +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1X], + (instregex "^FCMPE?(S|D)")>; +def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], + (instregex "^FCCMPE?(S|D)")>; +def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], + (instregex "^FCSEL(S|D)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; +// Convert FP to integer, S/D-form +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi](32|64)")>; +// Convert to FP from GPR, S/D-form +def : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]to[DS]$")>; +// Convert to FP from GPR, fixed-point, S/D-form +def : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX][SD]ri$")>; +def : InstRW<[Ampere1BWrite_19cyc_1X], (instregex "^FDIVv.[if](64)", "FDIVD")>; +def : InstRW<[Ampere1BWrite_12cyc_1X], (instregex "^FDIVv.[if](32)", "FDIVS")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX32, FMULX64)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULSrr, FNMULSrr)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULDrr, FNMULDrr)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT(32|64)")>; +def : InstRW<[Ampere1BWrite_63cyc_1X], (instregex "^FSQRTv.f64", "^FSQRTDr")>; +def : InstRW<[Ampere1BWrite_33cyc_1X], (instregex "^FSQRTv.f32", "^FSQRTSr")>; + +// FP miscellaneous instructions +def : InstRW<[Ampere1BWrite_7cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD]Hr")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVTLv")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT(N|XN)v")>; +def : InstRW<[Ampere1BWrite_7cyc_1X_1Z], (instrs FJCVTZS)>; +def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; +def : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; +def : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "^FMOVXDHighr")>; +def : InstRW<[Ampere1BWrite_3cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; + +// Integer arithmetic and logical instructions +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "ADC(W|X)r", "SBC(W|X)r")>; +def : InstRW<[Ampere1BWrite_Arith], + (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]")>; +def : InstRW<[Ampere1BWrite_1cyc_1AB], + (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[ri]")>; +def : InstRW<[Ampere1BWrite_ArithFlagsetting], + (instregex "(ADD|AND|BIC|SUB)S[WX]r[sx]")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "(ADD|AND|BIC|SUB)S[WX]r[ri]")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "(ADC|SBC)S[WX]r")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs RMIF)>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "(CCMN|CCMP)(X|W)")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; +def : InstRW<[Ampere1BWrite_13cyc_1BS_1X], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[Ampere1BWrite_19cyc_2BS_1X], (instrs SDIVXr, UDIVXr)>; +def : InstRW<[Ampere1BWrite_3cyc_1BS], + (instregex "(S|U)MULHr")>; +def : InstRW<[Ampere1BWrite_4cyc_1BS_1AB], + (instregex "(S|U)?M(ADD|SUB)L?r")>; + +// Integer load instructions +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "(LDNP|LDP|LDPSW)(X|W)")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDR(B|D|H|Q|S)ui")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDR(D|Q|W|X)l")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDTR(B|H|W|X)i")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDTRS(BW|BX|HW|HX|W)i")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDUR(BB|HH|X|W)i")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDURS(BW|BX|HW|HX|W)i")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; +def : InstRW<[Ampere1BWrite_1cyc_1L], + (instrs PRFMl, PRFUMi, PRFUMi)>; +def : InstRW<[Ampere1BWrite_1cyc_1L], + (instrs PRFMroW, PRFMroX)>; + +// Integer miscellaneous instructions +def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs ADR, ADRP)>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "EXTR(W|X)")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; +def : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "CLS(W|X)")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs SETF8, SETF16)>; +def : InstRW<[Ampere1BWrite_1cyc_1AB], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; +def : InstRW<[Ampere1BWrite_1cyc_1B], + (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], + (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; + +// Integer store instructions +def : InstRW<[Ampere1BWrite_1cyc_2S], (instregex "STNP(X|W)i")>; +def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STPXi)>; +def : InstRW<[Ampere1BWrite_2cyc_1B_1S], (instrs STPWi)>; +def : InstRW<[Ampere1BWrite_2cyc_1B_1S_1AB], (instregex "STP(W|X)(pre|post)")>; +def : InstRW<[Ampere1BWrite_1cyc_1S], (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; +def : InstRW<[Ampere1BWrite_1cyc_1S], (instregex "STUR(BB|HH|X|W)i", + "STR(X|W)ui", + "STUR(BB|HH|X|W)i")>; +def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroX, STRXroX)>; +def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroW, STRXroW)>; + +// Memory tagging + +// Insert Random Tags +def : InstRW<[Ampere1BWrite_1cyc_1BS_1B], (instrs IRG, IRGstack)>; +// Load allocation tag +def : InstRW<[Ampere1BWrite_4cyc_1L_1B], (instrs LDG, LDGM)>; +// Store allocation tags +def : InstRW<[Ampere1BWrite_1cyc_1S], + (instrs STGi, STGM, STGPreIndex, STGPostIndex)>; +// Store allocation tags and pair of registers +def : InstRW<[Ampere1BWrite_1cyc_2S], + (instrs STGPi, STGPpre, STGPpost)>; +// Store allocation tags and zero data +def : InstRW<[Ampere1BWrite_1cyc_1S], + (instrs STZGi, STZGM, STZGPreIndex, STZGPostIndex)>; +// Store two tags +def : InstRW<[Ampere1BWrite_1cyc_2S], + (instrs ST2Gi, ST2GPreIndex, ST2GPostIndex)>; +// Store two tags and zero data +def : InstRW<[Ampere1BWrite_1cyc_2S], + (instrs STZ2Gi, STZ2GPreIndex, STZ2GPostIndex)>; +// Subtract Pointer +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBP)>; +// Subtract Pointer, flagset +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBPS)>; +// Insert Tag Mask +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs GMI)>; +// Arithmetic, immediate to logical address tag +def : InstRW<[Ampere1BWrite_1cyc_1B], (instrs ADDG, SUBG)>; + +// Pointer authentication +def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^AUT")>; +def : InstRW<[Ampere1BWrite_6cyc_1BS_1A], + (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; +def : InstRW<[Ampere1BWrite_6cyc_1BS_2A], + (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; +def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^PAC")>; +def : InstRW<[Ampere1BWrite_8cyc_1BS_1L], (instregex "^LDRA(A|B)")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instrs XPACD, XPACI)>; + +// Vector integer instructions +// -- absolute difference +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", + "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; +// -- arithmetic +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", + "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", + "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; +// -- arithmetic, horizontal, 16B +def : InstRW<[Ampere1BWrite_8cyc_4XY], + (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; +def : InstRW<[Ampere1BWrite_8cyc_4XY], + (instregex "^[SU](MIN|MAX)Vv16i8v")>; +// -- arithmetic, horizontal, 4H/4S +def : InstRW<[Ampere1BWrite_4cyc_2XY], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; +def : InstRW<[Ampere1BWrite_4cyc_2XY], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; +// -- arithmetic, horizontal, 8B/8H +def : InstRW<[Ampere1BWrite_6cyc_3XY], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; +def : InstRW<[Ampere1BWrite_6cyc_3XY], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; +// -- arithmetic, narrowing +def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; +// -- arithmetic, pairwise +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; +// -- arithmetic, saturating +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; +// -- bit count +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^(CLS|CLZ|CNT)v")>; +// -- compare +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", + "^CMHIv", "^CMHSv")>; +// -- compare non-zero +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^CMTSTv")>; +// -- dot product +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; +// -- fp reciprocal estimate +def : InstRW<[Ampere1BWrite_6cyc_1X], (instregex "^FRECPEv", "^FRSQRTEv")>; +// -- integer reciprocal estimate +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; +// -- logical +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; +// -- logical, narrowing +def : InstRW<[Ampere1BWrite_6cyc_2XY], + (instregex "RSHRNv", + "SHRNv", "SQSHRNv", "SQSHRUNv", + "UQXTNv")>; +// -- matrix multiply +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instrs SMMLA, UMMLA, USMMLA)>; +// -- max/min +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; +// -- move immediate +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; +// -- multiply +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; +// -- multiply accumulate +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; +// -- negation, saturating +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; +// -- reverse bits/bytes +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; +// -- shift +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// -- shift and accumulate +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; +// -- shift, saturating +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", + "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", + "^UQSHL")>; + +// Vector miscellaneous instructions +// -- duplicate element +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^DUPv.+lane")>; +// -- duplicate from GPR +def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^DUPv.+gpr")>; +// -- extract narrow +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^XTNv")>; +// -- insert/extract element +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; +// -- move FP immediate +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOVv")>; +// -- move element to GPR +def : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "(S|U)MOVv")>; +// -- move from GPR to any element +def : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; +// -- table lookup +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; +def : InstRW<[Ampere1BWrite_4cyc_2XY], + (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; +def : InstRW<[Ampere1BWrite_6cyc_3XY], + (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; +def : InstRW<[Ampere1BWrite_8cyc_4XY], + (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; +// -- transpose +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; +// -- zip/unzip +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; + +} // SchedModel = Ampere1BModel diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td index 1ef3a2a063382d..48324654949c06 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td @@ -21,7 +21,7 @@ def CycloneModel : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td index 2127a34a58d513..6fc4ec3ae41b77 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -27,7 +27,7 @@ def ExynosM3Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td index 83cf56088d4ced..5163de280f2e4f 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -27,7 +27,7 @@ def ExynosM4Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td index 85058af86decb5..2ccbe1614dcd79 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td @@ -27,7 +27,7 @@ def ExynosM5Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td index a765cd1cdfe347..e9172e82b099d1 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td @@ -26,7 +26,7 @@ def FalkorModel : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td index 3551066ee7c35d..258b34c38898cd 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td +++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td @@ -30,7 +30,7 @@ def KryoModel : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td index 2ec9600f84f7e5..524fa33f498bb0 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td @@ -25,7 +25,7 @@ def NeoverseN1Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(PAUnsupported.F, SMEUnsupported.F, SVEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index a6fab5e6245f80..8ec124954362f8 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -19,7 +19,7 @@ def NeoverseN2Model : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SMEUnsupported.F, - [HasSVE2p1, HasPAuthLR, HasCPA]); + [HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index 75fbb85dce9d14..7e041dbd2abaea 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -28,7 +28,8 @@ def NeoverseV1Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVE2Unsupported.F, SMEUnsupported.F, - [HasMTE, HasCPA]); + [HasMTE, HasCPA, + HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index 658d7cdd23a63b..e7de40fdf1deb0 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -22,7 +22,8 @@ def NeoverseV2Model : SchedMachineModel { let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SMEUnsupported.F, - [HasSVE2p1, HasCPA]); + [HasSVE2p1, HasCPA, + HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td index 9e5060f1f36496..0ae9a69fd48265 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td @@ -27,7 +27,7 @@ def TSV110Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } // Define each kind of processor resource and number available on the TSV110, diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td index e1536f208e448a..8df3f56e45738c 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td @@ -28,7 +28,7 @@ def ThunderXT8XModel : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td index 89faa92155e00d..ef4baa3dedff93 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -28,7 +28,7 @@ def ThunderX2T99Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td index 8685554b00d76d..796bd4b8b5c9ae 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td @@ -27,7 +27,7 @@ def ThunderX3T110Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/basic-instructions.s new file mode 100644 index 00000000000000..7dd05eb50085c8 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/basic-instructions.s @@ -0,0 +1,3724 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s + +#------------------------------------------------------------------------------ +# Add/sub (immediate) +#------------------------------------------------------------------------------ + +add w2, w3, #4095 +add w30, w29, #1, lsl #12 +add w13, w5, #4095, lsl #12 +add x5, x7, #1638 +add w20, wsp, #801 +add wsp, wsp, #1104 +add wsp, w30, #4084 +add x0, x24, #291 +add x3, x24, #4095, lsl #12 +add x8, sp, #1074 +add sp, x29, #3816 +sub w0, wsp, #4077 +sub w4, w20, #546, lsl #12 +sub sp, sp, #288 +sub wsp, w19, #16 +adds w13, w23, #291, lsl #12 +cmn w2, #4095 +adds w20, wsp, #0 +cmn x3, #1, lsl #12 +cmp sp, #20, lsl #12 +cmp x30, #4095 +subs x4, sp, #3822 +cmn w3, #291, lsl #12 +cmn wsp, #1365 +cmn sp, #1092, lsl #12 +mov sp, x30 +mov wsp, w20 +mov x11, sp +mov w24, wsp + +#------------------------------------------------------------------------------ +# Add-subtract (shifted register) +#------------------------------------------------------------------------------ + +add w3, w5, w7 +add wzr, w3, w5 +add w20, wzr, w4 +add w4, w6, wzr +add w11, w13, w15 +add w9, w3, wzr, lsl #10 +add w17, w29, w20, lsl #31 +add w21, w22, w23, lsr #0 +add w24, w25, w26, lsr #18 +add w27, w28, w29, lsr #31 +add w2, w3, w4, asr #0 +add w5, w6, w7, asr #21 +add w8, w9, w10, asr #31 +add x3, x5, x7 +add xzr, x3, x5 +add x20, xzr, x4 +add x4, x6, xzr +add x11, x13, x15 +add x9, x3, xzr, lsl #10 +add x17, x29, x20, lsl #63 +add x21, x22, x23, lsr #0 +add x24, x25, x26, lsr #18 +add x27, x28, x29, lsr #63 +add x2, x3, x4, asr #0 +add x5, x6, x7, asr #21 +add x8, x9, x10, asr #63 +adds w3, w5, w7 +cmn w3, w5 +adds w20, wzr, w4 +adds w4, w6, wzr +adds w11, w13, w15 +adds w9, w3, wzr, lsl #10 +adds w17, w29, w20, lsl #31 +adds w21, w22, w23, lsr #0 +adds w24, w25, w26, lsr #18 +adds w27, w28, w29, lsr #31 +adds w2, w3, w4, asr #0 +adds w5, w6, w7, asr #21 +adds w8, w9, w10, asr #31 +adds x3, x5, x7 +cmn x3, x5 +adds x20, xzr, x4 +adds x4, x6, xzr +adds x11, x13, x15 +adds x9, x3, xzr, lsl #10 +adds x17, x29, x20, lsl #63 +adds x21, x22, x23, lsr #0 +adds x24, x25, x26, lsr #18 +adds x27, x28, x29, lsr #63 +adds x2, x3, x4, asr #0 +adds x5, x6, x7, asr #21 +adds x8, x9, x10, asr #63 +sub w3, w5, w7 +sub wzr, w3, w5 +sub w4, w6, wzr +sub w11, w13, w15 +sub w9, w3, wzr, lsl #10 +sub w17, w29, w20, lsl #31 +sub w21, w22, w23, lsr #0 +sub w24, w25, w26, lsr #18 +sub w27, w28, w29, lsr #31 +sub w2, w3, w4, asr #0 +sub w5, w6, w7, asr #21 +sub w8, w9, w10, asr #31 +sub x3, x5, x7 +sub xzr, x3, x5 +sub x4, x6, xzr +sub x11, x13, x15 +sub x9, x3, xzr, lsl #10 +sub x17, x29, x20, lsl #63 +sub x21, x22, x23, lsr #0 +sub x24, x25, x26, lsr #18 +sub x27, x28, x29, lsr #63 +sub x2, x3, x4, asr #0 +sub x5, x6, x7, asr #21 +sub x8, x9, x10, asr #63 +subs w3, w5, w7 +cmp w3, w5 +subs w4, w6, wzr +subs w11, w13, w15 +subs w9, w3, wzr, lsl #10 +subs w17, w29, w20, lsl #31 +subs w21, w22, w23, lsr #0 +subs w24, w25, w26, lsr #18 +subs w27, w28, w29, lsr #31 +subs w2, w3, w4, asr #0 +subs w5, w6, w7, asr #21 +subs w8, w9, w10, asr #31 +subs x3, x5, x7 +cmp x3, x5 +subs x4, x6, xzr +subs x11, x13, x15 +subs x9, x3, xzr, lsl #10 +subs x17, x29, x20, lsl #63 +subs x21, x22, x23, lsr #0 +subs x24, x25, x26, lsr #18 +subs x27, x28, x29, lsr #63 +subs x2, x3, x4, asr #0 +subs x5, x6, x7, asr #21 +subs x8, x9, x10, asr #63 +cmn wzr, w4 +cmn w5, wzr +cmn w6, w7 +cmn w8, w9, lsl #15 +cmn w10, w11, lsl #31 +cmn w12, w13, lsr #0 +cmn w14, w15, lsr #21 +cmn w16, w17, lsr #31 +cmn w18, w19, asr #0 +cmn w20, w21, asr #22 +cmn w22, w23, asr #31 +cmn x0, x3 +cmn xzr, x4 +cmn x5, xzr +cmn x6, x7 +cmn x8, x9, lsl #15 +cmn x10, x11, lsl #63 +cmn x12, x13, lsr #0 +cmn x14, x15, lsr #41 +cmn x16, x17, lsr #63 +cmn x18, x19, asr #0 +cmn x20, x21, asr #55 +cmn x22, x23, asr #63 +cmp w0, w3 +cmp wzr, w4 +cmp w5, wzr +cmp w6, w7 +cmp w8, w9, lsl #15 +cmp w10, w11, lsl #31 +cmp w12, w13, lsr #0 +cmp w14, w15, lsr #21 +cmp w18, w19, asr #0 +cmp w20, w21, asr #22 +cmp w22, w23, asr #31 +cmp x0, x3 +cmp xzr, x4 +cmp x5, xzr +cmp x6, x7 +cmp x8, x9, lsl #15 +cmp x10, x11, lsl #63 +cmp x12, x13, lsr #0 +cmp x14, x15, lsr #41 +cmp x16, x17, lsr #63 +cmp x18, x19, asr #0 +cmp x20, x21, asr #55 +cmp x22, x23, asr #63 +cmp wzr, w0 +cmp xzr, x0 + +#------------------------------------------------------------------------------ +# Add-subtract (shifted register) +#------------------------------------------------------------------------------ + +adc w29, w27, w25 +adc wzr, w3, w4 +adc w9, wzr, w10 +adc w20, w0, wzr +adc x29, x27, x25 +adc xzr, x3, x4 +adc x9, xzr, x10 +adc x20, x0, xzr +adcs w29, w27, w25 +adcs wzr, w3, w4 +adcs w9, wzr, w10 +adcs w20, w0, wzr +adcs x29, x27, x25 +adcs xzr, x3, x4 +adcs x9, xzr, x10 +adcs x20, x0, xzr +sbc w29, w27, w25 +sbc wzr, w3, w4 +ngc w9, w10 +sbc w20, w0, wzr +sbc x29, x27, x25 +sbc xzr, x3, x4 +ngc x9, x10 +sbc x20, x0, xzr +sbcs w29, w27, w25 +sbcs wzr, w3, w4 +ngcs w9, w10 +sbcs w20, w0, wzr +sbcs x29, x27, x25 +sbcs xzr, x3, x4 +ngcs x9, x10 +sbcs x20, x0, xzr +ngc w3, w12 +ngc wzr, w9 +ngc w23, wzr +ngc x29, x30 +ngc xzr, x0 +ngc x0, xzr +ngcs w3, w12 +ngcs wzr, w9 +ngcs w23, wzr +ngcs x29, x30 +ngcs xzr, x0 +ngcs x0, xzr + +#------------------------------------------------------------------------------ +# Compare and branch (immediate) +#------------------------------------------------------------------------------ + +sbfx x1, x2, #3, #2 +asr x3, x4, #63 +asr wzr, wzr, #31 +sbfx w12, w9, #0, #1 +ubfiz x4, x5, #52, #11 +ubfx xzr, x4, #0, #1 +ubfiz x4, xzr, #1, #6 +lsr x5, x6, #12 +bfi x4, x5, #52, #11 +bfxil xzr, x4, #0, #1 +bfi x4, xzr, #1, #6 +bfxil x5, x6, #12, #52 +sxtb w1, w2 +sxtb xzr, w3 +sxth w9, w10 +sxth x0, w1 +sxtw x3, w30 +uxtb w1, w2 +uxth w9, w10 +ubfx x3, x30, #0, #32 +asr w3, w2, #0 +asr w9, w10, #31 +asr x20, x21, #63 +asr w1, wzr, #3 +lsr w3, w2, #0 +lsr w9, w10, #31 +lsr x20, x21, #63 +lsr wzr, wzr, #3 +lsr w3, w2, #0 +lsl w9, w10, #31 +lsl x20, x21, #63 +lsl w1, wzr, #3 +sbfx w9, w10, #0, #1 +sbfiz x2, x3, #63, #1 +asr x19, x20, #0 +sbfiz x9, x10, #5, #59 +asr w9, w10, #0 +sbfiz w11, w12, #31, #1 +sbfiz w13, w14, #29, #3 +sbfiz xzr, xzr, #10, #11 +sbfx w9, w10, #0, #1 +asr x2, x3, #63 +asr x19, x20, #0 +asr x9, x10, #5 +asr w9, w10, #0 +asr w11, w12, #31 +asr w13, w14, #29 +sbfx xzr, xzr, #10, #11 +bfxil w9, w10, #0, #1 +bfi x2, x3, #63, #1 +bfxil x19, x20, #0, #64 +bfi x9, x10, #5, #59 +bfxil w9, w10, #0, #32 +bfi w11, w12, #31, #1 +bfi w13, w14, #29, #3 +bfi xzr, xzr, #10, #11 +bfxil w9, w10, #0, #1 +bfxil x2, x3, #63, #1 +bfxil x19, x20, #0, #64 +bfxil x9, x10, #5, #59 +bfxil w9, w10, #0, #32 +bfxil w11, w12, #31, #1 +bfxil w13, w14, #29, #3 +bfxil xzr, xzr, #10, #11 +ubfx w9, w10, #0, #1 +lsl x2, x3, #63 +lsr x19, x20, #0 +lsl x9, x10, #5 +lsr w9, w10, #0 +lsl w11, w12, #31 +lsl w13, w14, #29 +ubfiz xzr, xzr, #10, #11 +ubfx w9, w10, #0, #1 +lsr x2, x3, #63 +lsr x19, x20, #0 +lsr x9, x10, #5 +lsr w9, w10, #0 +lsr w11, w12, #31 +lsr w13, w14, #29 +ubfx xzr, xzr, #10, #11 + +#------------------------------------------------------------------------------ +# Compare and branch (immediate) +#------------------------------------------------------------------------------ + +cbz w5, #4 +cbz x5, #0 +cbnz x2, #-4 +cbnz x26, #1048572 +cbz wzr, #0 +cbnz xzr, #0 + +#------------------------------------------------------------------------------ +# Conditional branch (immediate) +#------------------------------------------------------------------------------ + +b.ne #4 +b.ge #1048572 +b.ge #-4 + +#------------------------------------------------------------------------------ +# Conditional compare (immediate) +#------------------------------------------------------------------------------ + +ccmp w1, #31, #0, eq +ccmp w3, #0, #15, hs +ccmp wzr, #15, #13, hs +ccmp x9, #31, #0, le +ccmp x3, #0, #15, gt +ccmp xzr, #5, #7, ne +ccmn w1, #31, #0, eq +ccmn w3, #0, #15, hs +ccmn wzr, #15, #13, hs +ccmn x9, #31, #0, le +ccmn x3, #0, #15, gt +ccmn xzr, #5, #7, ne + +#------------------------------------------------------------------------------ +# Conditional compare (register) +#------------------------------------------------------------------------------ + +ccmp w1, wzr, #0, eq +ccmp w3, w0, #15, hs +ccmp wzr, w15, #13, hs +ccmp x9, xzr, #0, le +ccmp x3, x0, #15, gt +ccmp xzr, x5, #7, ne +ccmn w1, wzr, #0, eq +ccmn w3, w0, #15, hs +ccmn wzr, w15, #13, hs +ccmn x9, xzr, #0, le +ccmn x3, x0, #15, gt +ccmn xzr, x5, #7, ne + +#------------------------------------------------------------------------------ +# Conditional branch (immediate) +#------------------------------------------------------------------------------ + +csel w1, w0, w19, ne +csel wzr, w5, w9, eq +csel w9, wzr, w30, gt +csel w1, w28, wzr, mi +csel x19, x23, x29, lt +csel xzr, x3, x4, ge +csel x5, xzr, x6, hs +csel x7, x8, xzr, lo +csinc w1, w0, w19, ne +csinc wzr, w5, w9, eq +csinc w9, wzr, w30, gt +csinc w1, w28, wzr, mi +csinc x19, x23, x29, lt +csinc xzr, x3, x4, ge +csinc x5, xzr, x6, hs +csinc x7, x8, xzr, lo +csinv w1, w0, w19, ne +csinv wzr, w5, w9, eq +csinv w9, wzr, w30, gt +csinv w1, w28, wzr, mi +csinv x19, x23, x29, lt +csinv xzr, x3, x4, ge +csinv x5, xzr, x6, hs +csinv x7, x8, xzr, lo +csneg w1, w0, w19, ne +csneg wzr, w5, w9, eq +csneg w9, wzr, w30, gt +csneg w1, w28, wzr, mi +csneg x19, x23, x29, lt +csneg xzr, x3, x4, ge +csneg x5, xzr, x6, hs +csneg x7, x8, xzr, lo +cset w3, eq +cset x9, pl +csetm w20, ne +csetm x30, ge +csinc w2, wzr, wzr, al +csinv x3, xzr, xzr, nv +cinc w3, w5, gt +cinc wzr, w4, le +cset w9, lt +cinc x3, x5, gt +cinc xzr, x4, le +cset x9, lt +csinc w5, w6, w6, nv +csinc x1, x2, x2, al +cinv w3, w5, gt +cinv wzr, w4, le +csetm w9, lt +cinv x3, x5, gt +cinv xzr, x4, le +csetm x9, lt +csinv x1, x0, x0, al +csinv w9, w8, w8, nv +cneg w3, w5, gt +cneg wzr, w4, le +cneg w9, wzr, lt +cneg x3, x5, gt +cneg xzr, x4, le +cneg x9, xzr, lt +csneg x4, x8, x8, al +csinv w9, w8, w8, nv + +#------------------------------------------------------------------------------ +# Data-processing (1 source) +#------------------------------------------------------------------------------ + +rbit w0, w7 +rbit x18, x3 +rev16 w17, w1 +rev16 x5, x2 +rev w18, w0 +rev32 x20, x1 +rev x22, x2 +clz w24, w3 +clz x26, x4 +cls w3, w5 +cls x20, x5 + +#------------------------------------------------------------------------------ +# Data-processing (2 source) +#------------------------------------------------------------------------------ + +udiv w0, w7, w10 +udiv x9, x22, x4 +sdiv w12, w21, w0 +sdiv x13, x2, x1 +lsl w11, w12, w13 +lsl x14, x15, x16 +lsr w17, w18, w19 +lsr x20, x21, x22 +asr w23, w24, w25 +asr x26, x27, x28 +ror w0, w1, w2 +ror x3, x4, x5 +lsl w6, w7, w8 +lsl x9, x10, x11 +lsr w12, w13, w14 +lsr x15, x16, x17 +asr w18, w19, w20 +asr x21, x22, x23 +ror w24, w25, w26 +ror x27, x28, x29 + +#------------------------------------------------------------------------------ +# Data-processing (3 sources) +#------------------------------------------------------------------------------ + +smulh x30, x29, x28 +smulh xzr, x27, x26 +umulh x30, x29, x28 +umulh x23, x30, xzr +madd w1, w3, w7, w4 +madd wzr, w0, w9, w11 +madd w13, wzr, w4, w4 +madd w19, w30, wzr, w29 +mul w4, w5, w6 +madd x1, x3, x7, x4 +madd xzr, x0, x9, x11 +madd x13, xzr, x4, x4 +madd x19, x30, xzr, x29 +mul x4, x5, x6 +msub w1, w3, w7, w4 +msub wzr, w0, w9, w11 +msub w13, wzr, w4, w4 +msub w19, w30, wzr, w29 +mneg w4, w5, w6 +msub x1, x3, x7, x4 +msub xzr, x0, x9, x11 +msub x13, xzr, x4, x4 +msub x19, x30, xzr, x29 +mneg x4, x5, x6 +smaddl x3, w5, w2, x9 +smaddl xzr, w10, w11, x12 +smaddl x13, wzr, w14, x15 +smaddl x16, w17, wzr, x18 +smull x19, w20, w21 +smsubl x3, w5, w2, x9 +smsubl xzr, w10, w11, x12 +smsubl x13, wzr, w14, x15 +smsubl x16, w17, wzr, x18 +smnegl x19, w20, w21 +umaddl x3, w5, w2, x9 +umaddl xzr, w10, w11, x12 +umaddl x13, wzr, w14, x15 +umaddl x16, w17, wzr, x18 +umull x19, w20, w21 +umsubl x3, w5, w2, x9 +umsubl x16, w17, wzr, x18 +umnegl x19, w20, w21 +smulh x30, x29, x28 +smulh x23, x22, xzr +umulh x23, x22, xzr +mul x19, x20, xzr +mneg w21, w22, w23 +smull x11, w13, w17 +umull x11, w13, w17 +smnegl x11, w13, w17 +umnegl x11, w13, w17 + +#------------------------------------------------------------------------------ +# Extract (immediate) +#------------------------------------------------------------------------------ + +extr w3, w5, w7, #0 +extr w11, w13, w17, #31 +extr x3, x5, x7, #15 +extr x11, x13, x17, #63 +ror x19, x23, #24 +ror x29, xzr, #63 +ror w9, w13, #31 + +#------------------------------------------------------------------------------ +# Floating-point compare +#------------------------------------------------------------------------------ + +fcmp s3, s5 +fcmp s31, #0.0 +fcmp s31, #0.0 +fcmpe s29, s30 +fcmpe s15, #0.0 +fcmpe s15, #0.0 +fcmp d4, d12 +fcmp d23, #0.0 +fcmp d23, #0.0 +fcmpe d26, d22 +fcmpe d29, #0.0 +fcmpe d29, #0.0 + +#------------------------------------------------------------------------------ +# Floating-point conditional compare +#------------------------------------------------------------------------------ + +fccmp s1, s31, #0, eq +fccmp s3, s0, #15, hs +fccmp s31, s15, #13, hs +fccmp d9, d31, #0, le +fccmp d3, d0, #15, gt +fccmp d31, d5, #7, ne +fccmpe s1, s31, #0, eq +fccmpe s3, s0, #15, hs +fccmpe s31, s15, #13, hs +fccmpe d9, d31, #0, le +fccmpe d3, d0, #15, gt +fccmpe d31, d5, #7, ne + +#------------------------------------------------------------------------------- +# Floating-point conditional compare +#------------------------------------------------------------------------------- + +fcsel s3, s20, s9, pl +fcsel d9, d10, d11, mi + +#------------------------------------------------------------------------------ +# Floating-point data-processing (1 source) +#------------------------------------------------------------------------------ + +fmov s0, s1 +fabs s2, s3 +fneg s4, s5 +fsqrt s6, s7 +fcvt d8, s9 +fcvt h10, s11 +frintn s12, s13 +frintp s14, s15 +frintm s16, s17 +frintz s18, s19 +frinta s20, s21 +frintx s22, s23 +frinti s24, s25 +fmov d0, d1 +fabs d2, d3 +fneg d4, d5 +fsqrt d6, d7 +fcvt s8, d9 +fcvt h10, d11 +frintn d12, d13 +frintp d14, d15 +frintm d16, d17 +frintz d18, d19 +frinta d20, d21 +frintx d22, d23 +frinti d24, d25 +fcvt s26, h27 +fcvt d28, h29 + +#------------------------------------------------------------------------------ +# Floating-point data-processing (2 sources) +#------------------------------------------------------------------------------ + +fmul s20, s19, s17 +fdiv s1, s2, s3 +fadd s4, s5, s6 +fsub s7, s8, s9 +fmax s10, s11, s12 +fmin s13, s14, s15 +fmaxnm s16, s17, s18 +fminnm s19, s20, s21 +fnmul s22, s23, s2 +fmul d20, d19, d17 +fdiv d1, d2, d3 +fadd d4, d5, d6 +fsub d7, d8, d9 +fmax d10, d11, d12 +fmin d13, d14, d15 +fmaxnm d16, d17, d18 +fminnm d19, d20, d21 +fnmul d22, d23, d24 + +#------------------------------------------------------------------------------ +# Floating-point data-processing (1 source) +#------------------------------------------------------------------------------ + +fmadd s3, s5, s6, s31 +fmadd d3, d13, d0, d23 +fmsub s3, s5, s6, s31 +fmsub d3, d13, d0, d23 +fnmadd s3, s5, s6, s31 +fnmadd d3, d13, d0, d23 +fnmsub s3, s5, s6, s31 +fnmsub d3, d13, d0, d23 + +#------------------------------------------------------------------------------ +# Floating-point <-> fixed-point conversion +#------------------------------------------------------------------------------ + +fcvtzs w3, h5, #1 +fcvtzs wzr, h20, #13 +fcvtzs w19, h0, #32 +fcvtzs x3, h5, #1 +fcvtzs x12, h30, #45 +fcvtzs x19, h0, #64 +fcvtzs w3, s5, #1 +fcvtzs wzr, s20, #13 +fcvtzs w19, s0, #32 +fcvtzs x3, s5, #1 +fcvtzs x12, s30, #45 +fcvtzs x19, s0, #64 +fcvtzs w3, d5, #1 +fcvtzs wzr, d20, #13 +fcvtzs w19, d0, #32 +fcvtzs x3, d5, #1 +fcvtzs x12, d30, #45 +fcvtzs x19, d0, #64 +fcvtzu w3, h5, #1 +fcvtzu wzr, h20, #13 +fcvtzu w19, h0, #32 +fcvtzu x3, h5, #1 +fcvtzu x12, h30, #45 +fcvtzu x19, h0, #64 +fcvtzu w3, s5, #1 +fcvtzu wzr, s20, #13 +fcvtzu w19, s0, #32 +fcvtzu x3, s5, #1 +fcvtzu x12, s30, #45 +fcvtzu x19, s0, #64 +fcvtzu w3, d5, #1 +fcvtzu wzr, d20, #13 +fcvtzu w19, d0, #32 +fcvtzu x3, d5, #1 +fcvtzu x12, d30, #45 +fcvtzu x19, d0, #64 +scvtf h23, w19, #1 +scvtf h31, wzr, #20 +scvtf h14, w0, #32 +scvtf h23, x19, #1 +scvtf h31, xzr, #20 +scvtf h14, x0, #64 +scvtf s23, w19, #1 +scvtf s31, wzr, #20 +scvtf s14, w0, #32 +scvtf s23, x19, #1 +scvtf s31, xzr, #20 +scvtf s14, x0, #64 +scvtf d23, w19, #1 +scvtf d31, wzr, #20 +scvtf d14, w0, #32 +scvtf d23, x19, #1 +scvtf d31, xzr, #20 +scvtf d14, x0, #64 +ucvtf h23, w19, #1 +ucvtf h31, wzr, #20 +ucvtf h14, w0, #32 +ucvtf h23, x19, #1 +ucvtf h31, xzr, #20 +ucvtf h14, x0, #64 +ucvtf s23, w19, #1 +ucvtf s31, wzr, #20 +ucvtf s14, w0, #32 +ucvtf s23, x19, #1 +ucvtf s31, xzr, #20 +ucvtf s14, x0, #64 +ucvtf d23, w19, #1 +ucvtf d31, wzr, #20 +ucvtf d14, w0, #32 +ucvtf d23, x19, #1 +ucvtf d31, xzr, #20 +ucvtf d14, x0, #64 + +#------------------------------------------------------------------------------ +# Floating-point <-> integer conversion +#------------------------------------------------------------------------------ + +fcvtns w3, h31 +fcvtns xzr, h12 +fcvtnu wzr, h12 +fcvtnu x0, h0 +fcvtps wzr, h9 +fcvtps x12, h20 +fcvtpu w30, h23 +fcvtpu x29, h3 +fcvtms w2, h3 +fcvtms x4, h5 +fcvtmu w6, h7 +fcvtmu x8, h9 +fcvtzs w10, h11 +fcvtzs x12, h13 +fcvtzu w14, h15 +fcvtzu x15, h16 +scvtf h17, w18 +scvtf h19, x20 +ucvtf h21, w22 +scvtf h23, x24 +fcvtas w25, h26 +fcvtas x27, h28 +fcvtau w29, h30 +fcvtau xzr, h0 +fcvtns w3, s31 +fcvtns xzr, s12 +fcvtnu wzr, s12 +fcvtnu x0, s0 +fcvtps wzr, s9 +fcvtps x12, s20 +fcvtpu w30, s23 +fcvtpu x29, s3 +fcvtms w2, s3 +fcvtms x4, s5 +fcvtmu w6, s7 +fcvtmu x8, s9 +fcvtzs w10, s11 +fcvtzs x12, s13 +fcvtzu w14, s15 +fcvtzu x15, s16 +scvtf s17, w18 +scvtf s19, x20 +ucvtf s21, w22 +scvtf s23, x24 +fcvtas w25, s26 +fcvtas x27, s28 +fcvtau w29, s30 +fcvtau xzr, s0 +fcvtns w3, d31 +fcvtns xzr, d12 +fcvtnu wzr, d12 +fcvtnu x0, d0 +fcvtps wzr, d9 +fcvtps x12, d20 +fcvtpu w30, d23 +fcvtpu x29, d3 +fcvtms w2, d3 +fcvtms x4, d5 +fcvtmu w6, d7 +fcvtmu x8, d9 +fcvtzs w10, d11 +fcvtzs x12, d13 +fcvtzu w14, d15 +fcvtzu x15, d16 +scvtf d17, w18 +scvtf d19, x20 +ucvtf d21, w22 +ucvtf d23, x24 +fcvtas w25, d26 +fcvtas x27, d28 +fcvtau w29, d30 +fcvtau xzr, d0 +fmov w3, s9 +fmov s9, w3 +fmov x20, d31 +fmov d1, x15 +fmov x3, v12.d[1] +fmov v1.d[1], x19 + +#------------------------------------------------------------------------------ +# Floating-point immediate +#------------------------------------------------------------------------------ + +fmov s2, #0.12500000 +fmov s3, #1.00000000 +fmov d30, #16.00000000 +fmov s4, #1.06250000 +fmov d10, #1.93750000 +fmov s12, #-1.00000000 +fmov d16, #8.50000000 + +#------------------------------------------------------------------------------ +# Load-register (literal) +#------------------------------------------------------------------------------ + +ldr w3, #0 +ldr x29, #4 +ldrsw xzr, #-4 +ldr s0, #8 +ldr d0, #1048572 +ldr q0, #-1048576 +prfm pldl1strm, #0 +prfm #22, #0 + +#------------------------------------------------------------------------------ +# Load/store exclusive +#------------------------------------------------------------------------------ + +stxrb w18, w8, [sp] +stxrh w24, w15, [x16] +stxr w5, w6, [x17] +stxr w1, x10, [x21] +ldxrb w30, [x0] +ldxrh w17, [x4] +ldxr w22, [sp] +ldxr x11, [x29] +ldxr x11, [x29] +ldxr x11, [x29] +stxp w12, w11, w10, [sp] +stxp wzr, x27, x9, [x12] +ldxp w0, wzr, [sp] +ldxp x17, x0, [x18] +ldxp x17, x0, [x18] +stlxrb w12, w22, [x0] +stlxrh w10, w1, [x1] +stlxr w9, w2, [x2] +stlxr w9, x3, [sp] +ldaxrb w8, [x4] +ldaxrh w7, [x5] +ldaxr w6, [sp] +ldaxr x5, [x6] +ldaxr x5, [x6] +ldaxr x5, [x6] +stlxp w4, w5, w6, [sp] +stlxp wzr, x6, x7, [x1] +ldaxp w5, w18, [sp] +ldaxp x6, x19, [x22] +ldaxp x6, x19, [x22] +stlrb w24, [sp] +stlrh w25, [x30] +stlr w26, [x29] +stlr x27, [x28] +stlr x27, [x28] +stlr x27, [x28] +ldarb w23, [sp] +ldarh w22, [x30] +ldar wzr, [x29] +ldar x21, [x28] +ldar x21, [x28] +ldar x21, [x28] + +#------------------------------------------------------------------------------ +# Load/store (unscaled immediate) +#------------------------------------------------------------------------------ + +sturb w9, [sp] +sturh wzr, [x12, #255] +stur w16, [x0, #-256] +stur x28, [x14, #1] +ldurb w1, [x20, #255] +ldurh w20, [x1, #255] +ldur w12, [sp, #255] +ldur xzr, [x12, #255] +ldursb x9, [x7, #-256] +ldursh x17, [x19, #-256] +ldursw x20, [x15, #-256] +prfum pldl2keep, [sp, #-256] +ldursb w19, [x1, #-256] +ldursh w15, [x21, #-256] +stur b0, [sp, #1] +stur h12, [x12, #-1] +stur s15, [x0, #255] +stur d31, [x5, #25] +stur q9, [x5] +ldur b3, [sp] +ldur h5, [x4, #-256] +ldur s7, [x12, #-1] +ldur d11, [x19, #4] +ldur q13, [x1, #2] + +#------------------------------------------------------------------------------ +# Load/store (immediate post-indexed) +#------------------------------------------------------------------------------ + +strb w9, [x2], #255 +strb w10, [x3], #1 +strb w10, [x3], #-256 +strh w9, [x2], #255 +strh w9, [x2], #1 +strh w10, [x3], #-256 +str w19, [sp], #255 +str w20, [x30], #1 +str w21, [x12], #-256 +str xzr, [x9], #255 +str x2, [x3], #1 +str x19, [x12], #-256 +ldrb w9, [x2], #255 +ldrb w10, [x3], #1 +ldrb w10, [x3], #-256 +ldrh w9, [x2], #255 +ldrh w9, [x2], #1 +ldrh w10, [x3], #-256 +ldr w19, [sp], #255 +ldr w20, [x30], #1 +ldr w21, [x12], #-256 +ldr xzr, [x9], #255 +ldr x2, [x3], #1 +ldr x19, [x12], #-256 +ldrsb xzr, [x9], #255 +ldrsb x2, [x3], #1 +ldrsb x19, [x12], #-256 +ldrsh xzr, [x9], #255 +ldrsh x2, [x3], #1 +ldrsh x19, [x12], #-256 +ldrsw xzr, [x9], #255 +ldrsw x2, [x3], #1 +ldrsw x19, [x12], #-256 +ldrsb wzr, [x9], #255 +ldrsb w2, [x3], #1 +ldrsb w19, [x12], #-256 +ldrsh wzr, [x9], #255 +ldrsh w2, [x3], #1 +ldrsh w19, [x12], #-256 +str b0, [x0], #255 +str b3, [x3], #1 +str b5, [sp], #-256 +str h10, [x10], #255 +str h13, [x23], #1 +str h15, [sp], #-256 +str s20, [x20], #255 +str s23, [x23], #1 +str s25, [x0], #-256 +str d20, [x20], #255 +str d23, [x23], #1 +str d25, [x0], #-256 +ldr b0, [x0], #255 +ldr b3, [x3], #1 +ldr b5, [sp], #-256 +ldr h10, [x10], #255 +ldr h13, [x23], #1 +ldr h15, [sp], #-256 +ldr s20, [x20], #255 +ldr s23, [x23], #1 +ldr s25, [x0], #-256 +ldr d20, [x20], #255 +ldr d23, [x23], #1 +ldr d25, [x0], #-256 +ldr q20, [x1], #255 +ldr q23, [x9], #1 +ldr q25, [x20], #-256 +str q10, [x1], #255 +str q22, [sp], #1 +str q21, [x20], #-256 + +#------------------------------------------------------------------------------- +# Load-store register (immediate pre-indexed) +#------------------------------------------------------------------------------- + +ldr x3, [x4, #0]! +strb w9, [x2, #255]! +strb w10, [x3, #1]! +strb w10, [x3, #-256]! +strh w9, [x2, #255]! +strh w9, [x2, #1]! +strh w10, [x3, #-256]! +str w19, [sp, #255]! +str w20, [x30, #1]! +str w21, [x12, #-256]! +str xzr, [x9, #255]! +str x2, [x3, #1]! +str x19, [x12, #-256]! +ldrb w9, [x2, #255]! +ldrb w10, [x3, #1]! +ldrb w10, [x3, #-256]! +ldrh w9, [x2, #255]! +ldrh w9, [x2, #1]! +ldrh w10, [x3, #-256]! +ldr w19, [sp, #255]! +ldr w20, [x30, #1]! +ldr w21, [x12, #-256]! +ldr xzr, [x9, #255]! +ldr x2, [x3, #1]! +ldr x19, [x12, #-256]! +ldrsb xzr, [x9, #255]! +ldrsb x2, [x3, #1]! +ldrsb x19, [x12, #-256]! +ldrsh xzr, [x9, #255]! +ldrsh x2, [x3, #1]! +ldrsh x19, [x12, #-256]! +ldrsw xzr, [x9, #255]! +ldrsw x2, [x3, #1]! +ldrsw x19, [x12, #-256]! +ldrsb wzr, [x9, #255]! +ldrsb w2, [x3, #1]! +ldrsb w19, [x12, #-256]! +ldrsh wzr, [x9, #255]! +ldrsh w2, [x3, #1]! +ldrsh w19, [x12, #-256]! +str b0, [x0, #255]! +str b3, [x3, #1]! +str b5, [sp, #-256]! +str h10, [x10, #255]! +str h13, [x23, #1]! +str h15, [sp, #-256]! +str s20, [x20, #255]! +str s23, [x23, #1]! +str s25, [x0, #-256]! +str d20, [x20, #255]! +str d23, [x23, #1]! +str d25, [x0, #-256]! +ldr b0, [x0, #255]! +ldr b3, [x3, #1]! +ldr b5, [sp, #-256]! +ldr h10, [x10, #255]! +ldr h13, [x23, #1]! +ldr h15, [sp, #-256]! +ldr s20, [x20, #255]! +ldr s23, [x23, #1]! +ldr s25, [x0, #-256]! +ldr d20, [x20, #255]! +ldr d23, [x23, #1]! +ldr d25, [x0, #-256]! +ldr q20, [x1, #255]! +ldr q23, [x9, #1]! +ldr q25, [x20, #-256]! +str q10, [x1, #255]! +str q22, [sp, #1]! +str q21, [x20, #-256]! + +#------------------------------------------------------------------------------ +# Load/store (unprivileged) +#------------------------------------------------------------------------------ + +sttrb w9, [sp] +sttrh wzr, [x12, #255] +sttr w16, [x0, #-256] +sttr x28, [x14, #1] +ldtrb w1, [x20, #255] +ldtrh w20, [x1, #255] +ldtr w12, [sp, #255] +ldtr xzr, [x12, #255] +ldtrsb x9, [x7, #-256] +ldtrsh x17, [x19, #-256] +ldtrsw x20, [x15, #-256] +ldtrsb w19, [x1, #-256] +ldtrsh w15, [x21, #-256] + +#------------------------------------------------------------------------------ +# Load/store (unsigned immediate) +#------------------------------------------------------------------------------ + +ldr x4, [x29] +ldr x30, [x12, #32760] +ldr x20, [sp, #8] +ldr xzr, [sp] +ldr w2, [sp] +ldr w17, [sp, #16380] +ldr w13, [x2, #4] +ldrsw x2, [x5, #4] +ldrsw x23, [sp, #16380] +ldrh w2, [x4] +ldrsh w23, [x6, #8190] +ldrsh wzr, [sp, #2] +ldrsh x29, [x2, #2] +ldrb w26, [x3, #121] +ldrb w12, [x2] +ldrsb w27, [sp, #4095] +ldrsb xzr, [x15] +str x30, [sp] +str w20, [x4, #16380] +strh w17, [sp, #8190] +strb w23, [x3, #4095] +strb wzr, [x2] +ldr b31, [sp, #4095] +ldr h20, [x2, #8190] +ldr s10, [x19, #16380] +ldr d3, [x10, #32760] +str q12, [sp, #65520] + +#------------------------------------------------------------------------------ +# Load/store (register offset) +#------------------------------------------------------------------------------ + +ldrb w3, [sp, x5] +ldrb w9, [x27, x6] +ldrsb w10, [x30, x7] +ldrb w11, [x29, x3, sxtx] +strb w12, [x28, xzr, sxtx] +ldrb w14, [x26, w6, uxtw] +ldrsb w15, [x25, w7, uxtw] +ldrb w17, [x23, w9, sxtw] +ldrsb x18, [x22, w10, sxtw] +ldrsh w3, [sp, x5] +ldrsh w9, [x27, x6] +ldrh w10, [x30, x7, lsl #1] +strh w11, [x29, x3, sxtx] +ldrh w12, [x28, xzr, sxtx] +ldrsh x13, [x27, x5, sxtx #1] +ldrh w14, [x26, w6, uxtw] +ldrh w15, [x25, w7, uxtw] +ldrsh w16, [x24, w8, uxtw #1] +ldrh w17, [x23, w9, sxtw] +ldrh w18, [x22, w10, sxtw] +strh w19, [x21, wzr, sxtw #1] +ldr w3, [sp, x5] +ldr s9, [x27, x6] +ldr w10, [x30, x7, lsl #2] +ldr w11, [x29, x3, sxtx] +str s12, [x28, xzr, sxtx] +str w13, [x27, x5, sxtx #2] +str w14, [x26, w6, uxtw] +ldr w15, [x25, w7, uxtw] +ldr w16, [x24, w8, uxtw #2] +ldrsw x17, [x23, w9, sxtw] +ldr w18, [x22, w10, sxtw] +ldrsw x19, [x21, wzr, sxtw #2] +ldr x3, [sp, x5] +str x9, [x27, x6] +ldr d10, [x30, x7, lsl #3] +str x11, [x29, x3, sxtx] +ldr x12, [x28, xzr, sxtx] +ldr x13, [x27, x5, sxtx #3] +prfm pldl1keep, [x26, w6, uxtw] +ldr x15, [x25, w7, uxtw] +ldr x16, [x24, w8, uxtw #3] +ldr x17, [x23, w9, sxtw] +ldr x18, [x22, w10, sxtw] +str d19, [x21, wzr, sxtw #3] +ldr q3, [sp, x5] +ldr q9, [x27, x6] +ldr q10, [x30, x7, lsl #4] +str q11, [x29, x3, sxtx] +str q12, [x28, xzr, sxtx] +str q13, [x27, x5, sxtx #4] +ldr q14, [x26, w6, uxtw] +ldr q15, [x25, w7, uxtw] +ldr q16, [x24, w8, uxtw #4] +ldr q17, [x23, w9, sxtw] +str q18, [x22, w10, sxtw] +ldr q19, [x21, wzr, sxtw #4] + +#------------------------------------------------------------------------------ +# Load/store register pair (offset) +#------------------------------------------------------------------------------ + +ldp w3, w5, [sp] +stp wzr, w9, [sp, #252] +ldp w2, wzr, [sp, #-256] +ldp w9, w10, [sp, #4] +ldpsw x9, x10, [sp, #4] +ldpsw x9, x10, [x2, #-256] +ldpsw x20, x30, [sp, #252] +ldp x21, x29, [x2, #504] +ldp x22, x23, [x3, #-512] +ldp x24, x25, [x4, #8] +ldp s29, s28, [sp, #252] +stp s27, s26, [sp, #-256] +ldp s1, s2, [x3, #44] +stp d3, d5, [x9, #504] +stp d7, d11, [x10, #-512] +ldp d2, d3, [x30, #-8] +stp q3, q5, [sp] +stp q17, q19, [sp, #1008] +ldp q23, q29, [x1, #-1024] + +#------------------------------------------------------------------------------ +# Load/store register pair (post-indexed) +#------------------------------------------------------------------------------ + +ldp w3, w5, [sp], #0 +stp wzr, w9, [sp], #252 +ldp w2, wzr, [sp], #-256 +ldp w9, w10, [sp], #4 +ldpsw x9, x10, [sp], #4 +ldpsw x9, x10, [x2], #-256 +ldpsw x20, x30, [sp], #252 +ldp x21, x29, [x2], #504 +ldp x22, x23, [x3], #-512 +ldp x24, x25, [x4], #8 +ldp s29, s28, [sp], #252 +stp s27, s26, [sp], #-256 +ldp s1, s2, [x3], #44 +stp d3, d5, [x9], #504 +stp d7, d11, [x10], #-512 +ldp d2, d3, [x30], #-8 +stp q3, q5, [sp], #0 +stp q17, q19, [sp], #1008 +ldp q23, q29, [x1], #-1024 + +#------------------------------------------------------------------------------ +# Load/store register pair (pre-indexed) +#------------------------------------------------------------------------------ + +ldp w3, w5, [sp, #0]! +stp wzr, w9, [sp, #252]! +ldp w2, wzr, [sp, #-256]! +ldp w9, w10, [sp, #4]! +ldpsw x9, x10, [sp, #4]! +ldpsw x9, x10, [x2, #-256]! +ldpsw x20, x30, [sp, #252]! +ldp x21, x29, [x2, #504]! +ldp x22, x23, [x3, #-512]! +ldp x24, x25, [x4, #8]! +ldp s29, s28, [sp, #252]! +stp s27, s26, [sp, #-256]! +ldp s1, s2, [x3, #44]! +stp d3, d5, [x9, #504]! +stp d7, d11, [x10, #-512]! +ldp d2, d3, [x30, #-8]! +stp q3, q5, [sp, #0]! +stp q17, q19, [sp, #1008]! +ldp q23, q29, [x1, #-1024]! + +#------------------------------------------------------------------------------ +# Load/store register pair (offset) +#------------------------------------------------------------------------------ + +ldnp w3, w5, [sp] +stnp wzr, w9, [sp, #252] +ldnp w2, wzr, [sp, #-256] +ldnp w9, w10, [sp, #4] +ldnp x21, x29, [x2, #504] +ldnp x22, x23, [x3, #-512] +ldnp x24, x25, [x4, #8] +ldnp s29, s28, [sp, #252] +stnp s27, s26, [sp, #-256] +ldnp s1, s2, [x3, #44] +stnp d3, d5, [x9, #504] +stnp d7, d11, [x10, #-512] +ldnp d2, d3, [x30, #-8] +stnp q3, q5, [sp] +stnp q17, q19, [sp, #1008] +ldnp q23, q29, [x1, #-1024] + +#------------------------------------------------------------------------------ +# Logical (immediate) +#------------------------------------------------------------------------------ + +mov w3, #983055 +mov x10, #-6148914691236517206 + +#------------------------------------------------------------------------------ +# Logical (shifted register) +#------------------------------------------------------------------------------ + +and w12, w23, w21 +and w16, w15, w1, lsl #1 +and w9, w4, w10, lsl #31 +and w3, w30, w11 +and x3, x5, x7, lsl #63 +and x5, x14, x19, asr #4 +and w3, w17, w19, ror #31 +and w0, w2, wzr, lsr #17 +and w3, w30, w11, asr #2 +and xzr, x4, x26 +and w3, wzr, w20, ror #2 +and x7, x20, xzr, asr #63 +bic x13, x20, x14, lsl #47 +bic w2, w7, w9 +orr w2, w7, w0, asr #31 +orr x8, x9, x10, lsl #12 +orn x3, x5, x7, asr #2 +orn w2, w5, w29 +ands w7, wzr, w9, lsl #1 +ands x3, x5, x20, ror #63 +bics w3, w5, w7 +bics x3, xzr, x3, lsl #1 +tst w3, w7, lsl #31 +tst x2, x20, asr #2 +mov x3, x6 +mov x3, xzr +mov wzr, w2 +mov w3, w5 + +#------------------------------------------------------------------------------ +# Move wide (immediate) +#------------------------------------------------------------------------------ + +movz w2, #0, lsl #16 +mov w2, #-1235 +mov x2, #5299989643264 +mov x2, #0 +movk w3, #0 +movz x4, #0, lsl #16 +movk w5, #0, lsl #16 +movz x6, #0, lsl #32 +movk x7, #0, lsl #32 +movz x8, #0, lsl #48 +movk x9, #0, lsl #48 + +#------------------------------------------------------------------------------ +# PC-relative addressing +#------------------------------------------------------------------------------ + +adr x2, #1600 +adrp x21, #6553600 +adr x0, #262144 + +#------------------------------------------------------------------------------ +# Test and branch (immediate) +#------------------------------------------------------------------------------ + +tbz x12, #62, #0 +tbz x12, #62, #4 +tbz x12, #62, #-32768 +tbnz x12, #60, #32764 + +#------------------------------------------------------------------------------ +# Unconditional branch (immediate) +#------------------------------------------------------------------------------ + +b #4 +b #-4 +b #134217724 + +#------------------------------------------------------------------------------ +# Unconditional branch (register) +#------------------------------------------------------------------------------ + +br x20 +blr xzr +ret x10 +ret +eret +drps + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.25 add w2, w3, #4095 +# CHECK-NEXT: 1 1 0.25 add w30, w29, #1, lsl #12 +# CHECK-NEXT: 1 1 0.25 add w13, w5, #4095, lsl #12 +# CHECK-NEXT: 1 1 0.25 add x5, x7, #1638 +# CHECK-NEXT: 1 1 0.25 add w20, wsp, #801 +# CHECK-NEXT: 1 1 0.25 add wsp, wsp, #1104 +# CHECK-NEXT: 1 1 0.25 add wsp, w30, #4084 +# CHECK-NEXT: 1 1 0.25 add x0, x24, #291 +# CHECK-NEXT: 1 1 0.25 add x3, x24, #4095, lsl #12 +# CHECK-NEXT: 1 1 0.25 add x8, sp, #1074 +# CHECK-NEXT: 1 1 0.25 add sp, x29, #3816 +# CHECK-NEXT: 1 1 0.25 sub w0, wsp, #4077 +# CHECK-NEXT: 1 1 0.25 sub w4, w20, #546, lsl #12 +# CHECK-NEXT: 1 1 0.25 sub sp, sp, #288 +# CHECK-NEXT: 1 1 0.25 sub wsp, w19, #16 +# CHECK-NEXT: 1 1 0.50 adds w13, w23, #291, lsl #12 +# CHECK-NEXT: 1 1 0.50 cmn w2, #4095 +# CHECK-NEXT: 1 1 0.50 adds w20, wsp, #0 +# CHECK-NEXT: 1 1 0.50 cmn x3, #1, lsl #12 +# CHECK-NEXT: 1 1 0.50 cmp sp, #20, lsl #12 +# CHECK-NEXT: 1 1 0.50 cmp x30, #4095 +# CHECK-NEXT: 1 1 0.50 subs x4, sp, #3822 +# CHECK-NEXT: 1 1 0.50 cmn w3, #291, lsl #12 +# CHECK-NEXT: 1 1 0.50 cmn wsp, #1365 +# CHECK-NEXT: 1 1 0.50 cmn sp, #1092, lsl #12 +# CHECK-NEXT: 1 1 0.25 mov sp, x30 +# CHECK-NEXT: 1 1 0.25 mov wsp, w20 +# CHECK-NEXT: 1 1 0.25 mov x11, sp +# CHECK-NEXT: 1 1 0.25 mov w24, wsp +# CHECK-NEXT: 1 1 0.25 add w3, w5, w7 +# CHECK-NEXT: 1 1 0.25 add wzr, w3, w5 +# CHECK-NEXT: 1 1 0.25 add w20, wzr, w4 +# CHECK-NEXT: 1 1 0.25 add w4, w6, wzr +# CHECK-NEXT: 1 1 0.25 add w11, w13, w15 +# CHECK-NEXT: 2 2 0.50 add w9, w3, wzr, lsl #10 +# CHECK-NEXT: 2 2 0.50 add w17, w29, w20, lsl #31 +# CHECK-NEXT: 2 2 0.50 add w21, w22, w23, lsr #0 +# CHECK-NEXT: 2 2 0.50 add w24, w25, w26, lsr #18 +# CHECK-NEXT: 2 2 0.50 add w27, w28, w29, lsr #31 +# CHECK-NEXT: 2 2 0.50 add w2, w3, w4, asr #0 +# CHECK-NEXT: 2 2 0.50 add w5, w6, w7, asr #21 +# CHECK-NEXT: 2 2 0.50 add w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 0.25 add x3, x5, x7 +# CHECK-NEXT: 1 1 0.25 add xzr, x3, x5 +# CHECK-NEXT: 1 1 0.25 add x20, xzr, x4 +# CHECK-NEXT: 1 1 0.25 add x4, x6, xzr +# CHECK-NEXT: 1 1 0.25 add x11, x13, x15 +# CHECK-NEXT: 2 2 0.50 add x9, x3, xzr, lsl #10 +# CHECK-NEXT: 2 2 0.50 add x17, x29, x20, lsl #63 +# CHECK-NEXT: 2 2 0.50 add x21, x22, x23, lsr #0 +# CHECK-NEXT: 2 2 0.50 add x24, x25, x26, lsr #18 +# CHECK-NEXT: 2 2 0.50 add x27, x28, x29, lsr #63 +# CHECK-NEXT: 2 2 0.50 add x2, x3, x4, asr #0 +# CHECK-NEXT: 2 2 0.50 add x5, x6, x7, asr #21 +# CHECK-NEXT: 2 2 0.50 add x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 0.25 adds w3, w5, w7 +# CHECK-NEXT: 1 1 0.25 cmn w3, w5 +# CHECK-NEXT: 1 1 0.25 adds w20, wzr, w4 +# CHECK-NEXT: 1 1 0.25 adds w4, w6, wzr +# CHECK-NEXT: 1 1 0.25 adds w11, w13, w15 +# CHECK-NEXT: 2 2 0.50 adds w9, w3, wzr, lsl #10 +# CHECK-NEXT: 2 2 0.50 adds w17, w29, w20, lsl #31 +# CHECK-NEXT: 2 2 0.50 adds w21, w22, w23, lsr #0 +# CHECK-NEXT: 2 2 0.50 adds w24, w25, w26, lsr #18 +# CHECK-NEXT: 2 2 0.50 adds w27, w28, w29, lsr #31 +# CHECK-NEXT: 2 2 0.50 adds w2, w3, w4, asr #0 +# CHECK-NEXT: 2 2 0.50 adds w5, w6, w7, asr #21 +# CHECK-NEXT: 2 2 0.50 adds w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 0.25 adds x3, x5, x7 +# CHECK-NEXT: 1 1 0.25 cmn x3, x5 +# CHECK-NEXT: 1 1 0.25 adds x20, xzr, x4 +# CHECK-NEXT: 1 1 0.25 adds x4, x6, xzr +# CHECK-NEXT: 1 1 0.25 adds x11, x13, x15 +# CHECK-NEXT: 2 2 0.50 adds x9, x3, xzr, lsl #10 +# CHECK-NEXT: 2 2 0.50 adds x17, x29, x20, lsl #63 +# CHECK-NEXT: 2 2 0.50 adds x21, x22, x23, lsr #0 +# CHECK-NEXT: 2 2 0.50 adds x24, x25, x26, lsr #18 +# CHECK-NEXT: 2 2 0.50 adds x27, x28, x29, lsr #63 +# CHECK-NEXT: 2 2 0.50 adds x2, x3, x4, asr #0 +# CHECK-NEXT: 2 2 0.50 adds x5, x6, x7, asr #21 +# CHECK-NEXT: 2 2 0.50 adds x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 0.25 sub w3, w5, w7 +# CHECK-NEXT: 1 1 0.25 sub wzr, w3, w5 +# CHECK-NEXT: 1 1 0.25 sub w4, w6, wzr +# CHECK-NEXT: 1 1 0.25 sub w11, w13, w15 +# CHECK-NEXT: 2 2 0.50 sub w9, w3, wzr, lsl #10 +# CHECK-NEXT: 2 2 0.50 sub w17, w29, w20, lsl #31 +# CHECK-NEXT: 2 2 0.50 sub w21, w22, w23, lsr #0 +# CHECK-NEXT: 2 2 0.50 sub w24, w25, w26, lsr #18 +# CHECK-NEXT: 2 2 0.50 sub w27, w28, w29, lsr #31 +# CHECK-NEXT: 2 2 0.50 sub w2, w3, w4, asr #0 +# CHECK-NEXT: 2 2 0.50 sub w5, w6, w7, asr #21 +# CHECK-NEXT: 2 2 0.50 sub w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 0.25 sub x3, x5, x7 +# CHECK-NEXT: 1 1 0.25 sub xzr, x3, x5 +# CHECK-NEXT: 1 1 0.25 sub x4, x6, xzr +# CHECK-NEXT: 1 1 0.25 sub x11, x13, x15 +# CHECK-NEXT: 2 2 0.50 sub x9, x3, xzr, lsl #10 +# CHECK-NEXT: 2 2 0.50 sub x17, x29, x20, lsl #63 +# CHECK-NEXT: 2 2 0.50 sub x21, x22, x23, lsr #0 +# CHECK-NEXT: 2 2 0.50 sub x24, x25, x26, lsr #18 +# CHECK-NEXT: 2 2 0.50 sub x27, x28, x29, lsr #63 +# CHECK-NEXT: 2 2 0.50 sub x2, x3, x4, asr #0 +# CHECK-NEXT: 2 2 0.50 sub x5, x6, x7, asr #21 +# CHECK-NEXT: 2 2 0.50 sub x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 0.25 subs w3, w5, w7 +# CHECK-NEXT: 1 1 0.25 cmp w3, w5 +# CHECK-NEXT: 1 1 0.25 subs w4, w6, wzr +# CHECK-NEXT: 1 1 0.25 subs w11, w13, w15 +# CHECK-NEXT: 2 2 0.50 subs w9, w3, wzr, lsl #10 +# CHECK-NEXT: 2 2 0.50 subs w17, w29, w20, lsl #31 +# CHECK-NEXT: 2 2 0.50 subs w21, w22, w23, lsr #0 +# CHECK-NEXT: 2 2 0.50 subs w24, w25, w26, lsr #18 +# CHECK-NEXT: 2 2 0.50 subs w27, w28, w29, lsr #31 +# CHECK-NEXT: 2 2 0.50 subs w2, w3, w4, asr #0 +# CHECK-NEXT: 2 2 0.50 subs w5, w6, w7, asr #21 +# CHECK-NEXT: 2 2 0.50 subs w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 0.25 subs x3, x5, x7 +# CHECK-NEXT: 1 1 0.25 cmp x3, x5 +# CHECK-NEXT: 1 1 0.25 subs x4, x6, xzr +# CHECK-NEXT: 1 1 0.25 subs x11, x13, x15 +# CHECK-NEXT: 2 2 0.50 subs x9, x3, xzr, lsl #10 +# CHECK-NEXT: 2 2 0.50 subs x17, x29, x20, lsl #63 +# CHECK-NEXT: 2 2 0.50 subs x21, x22, x23, lsr #0 +# CHECK-NEXT: 2 2 0.50 subs x24, x25, x26, lsr #18 +# CHECK-NEXT: 2 2 0.50 subs x27, x28, x29, lsr #63 +# CHECK-NEXT: 2 2 0.50 subs x2, x3, x4, asr #0 +# CHECK-NEXT: 2 2 0.50 subs x5, x6, x7, asr #21 +# CHECK-NEXT: 2 2 0.50 subs x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 0.25 cmn wzr, w4 +# CHECK-NEXT: 1 1 0.25 cmn w5, wzr +# CHECK-NEXT: 1 1 0.25 cmn w6, w7 +# CHECK-NEXT: 2 2 0.50 cmn w8, w9, lsl #15 +# CHECK-NEXT: 2 2 0.50 cmn w10, w11, lsl #31 +# CHECK-NEXT: 2 2 0.50 cmn w12, w13, lsr #0 +# CHECK-NEXT: 2 2 0.50 cmn w14, w15, lsr #21 +# CHECK-NEXT: 2 2 0.50 cmn w16, w17, lsr #31 +# CHECK-NEXT: 2 2 0.50 cmn w18, w19, asr #0 +# CHECK-NEXT: 2 2 0.50 cmn w20, w21, asr #22 +# CHECK-NEXT: 2 2 0.50 cmn w22, w23, asr #31 +# CHECK-NEXT: 1 1 0.25 cmn x0, x3 +# CHECK-NEXT: 1 1 0.25 cmn xzr, x4 +# CHECK-NEXT: 1 1 0.25 cmn x5, xzr +# CHECK-NEXT: 1 1 0.25 cmn x6, x7 +# CHECK-NEXT: 2 2 0.50 cmn x8, x9, lsl #15 +# CHECK-NEXT: 2 2 0.50 cmn x10, x11, lsl #63 +# CHECK-NEXT: 2 2 0.50 cmn x12, x13, lsr #0 +# CHECK-NEXT: 2 2 0.50 cmn x14, x15, lsr #41 +# CHECK-NEXT: 2 2 0.50 cmn x16, x17, lsr #63 +# CHECK-NEXT: 2 2 0.50 cmn x18, x19, asr #0 +# CHECK-NEXT: 2 2 0.50 cmn x20, x21, asr #55 +# CHECK-NEXT: 2 2 0.50 cmn x22, x23, asr #63 +# CHECK-NEXT: 1 1 0.25 cmp w0, w3 +# CHECK-NEXT: 1 1 0.25 cmp wzr, w4 +# CHECK-NEXT: 1 1 0.25 cmp w5, wzr +# CHECK-NEXT: 1 1 0.25 cmp w6, w7 +# CHECK-NEXT: 2 2 0.50 cmp w8, w9, lsl #15 +# CHECK-NEXT: 2 2 0.50 cmp w10, w11, lsl #31 +# CHECK-NEXT: 2 2 0.50 cmp w12, w13, lsr #0 +# CHECK-NEXT: 2 2 0.50 cmp w14, w15, lsr #21 +# CHECK-NEXT: 2 2 0.50 cmp w18, w19, asr #0 +# CHECK-NEXT: 2 2 0.50 cmp w20, w21, asr #22 +# CHECK-NEXT: 2 2 0.50 cmp w22, w23, asr #31 +# CHECK-NEXT: 1 1 0.25 cmp x0, x3 +# CHECK-NEXT: 1 1 0.25 cmp xzr, x4 +# CHECK-NEXT: 1 1 0.25 cmp x5, xzr +# CHECK-NEXT: 1 1 0.25 cmp x6, x7 +# CHECK-NEXT: 2 2 0.50 cmp x8, x9, lsl #15 +# CHECK-NEXT: 2 2 0.50 cmp x10, x11, lsl #63 +# CHECK-NEXT: 2 2 0.50 cmp x12, x13, lsr #0 +# CHECK-NEXT: 2 2 0.50 cmp x14, x15, lsr #41 +# CHECK-NEXT: 2 2 0.50 cmp x16, x17, lsr #63 +# CHECK-NEXT: 2 2 0.50 cmp x18, x19, asr #0 +# CHECK-NEXT: 2 2 0.50 cmp x20, x21, asr #55 +# CHECK-NEXT: 2 2 0.50 cmp x22, x23, asr #63 +# CHECK-NEXT: 1 1 0.25 cmp wzr, w0 +# CHECK-NEXT: 1 1 0.25 cmp xzr, x0 +# CHECK-NEXT: 1 1 0.50 adc w29, w27, w25 +# CHECK-NEXT: 1 1 0.50 adc wzr, w3, w4 +# CHECK-NEXT: 1 1 0.50 adc w9, wzr, w10 +# CHECK-NEXT: 1 1 0.50 adc w20, w0, wzr +# CHECK-NEXT: 1 1 0.50 adc x29, x27, x25 +# CHECK-NEXT: 1 1 0.50 adc xzr, x3, x4 +# CHECK-NEXT: 1 1 0.50 adc x9, xzr, x10 +# CHECK-NEXT: 1 1 0.50 adc x20, x0, xzr +# CHECK-NEXT: 1 1 0.50 adcs w29, w27, w25 +# CHECK-NEXT: 1 1 0.50 adcs wzr, w3, w4 +# CHECK-NEXT: 1 1 0.50 adcs w9, wzr, w10 +# CHECK-NEXT: 1 1 0.50 adcs w20, w0, wzr +# CHECK-NEXT: 1 1 0.50 adcs x29, x27, x25 +# CHECK-NEXT: 1 1 0.50 adcs xzr, x3, x4 +# CHECK-NEXT: 1 1 0.50 adcs x9, xzr, x10 +# CHECK-NEXT: 1 1 0.50 adcs x20, x0, xzr +# CHECK-NEXT: 1 1 0.50 sbc w29, w27, w25 +# CHECK-NEXT: 1 1 0.50 sbc wzr, w3, w4 +# CHECK-NEXT: 1 1 0.50 ngc w9, w10 +# CHECK-NEXT: 1 1 0.50 sbc w20, w0, wzr +# CHECK-NEXT: 1 1 0.50 sbc x29, x27, x25 +# CHECK-NEXT: 1 1 0.50 sbc xzr, x3, x4 +# CHECK-NEXT: 1 1 0.50 ngc x9, x10 +# CHECK-NEXT: 1 1 0.50 sbc x20, x0, xzr +# CHECK-NEXT: 1 1 0.50 sbcs w29, w27, w25 +# CHECK-NEXT: 1 1 0.50 sbcs wzr, w3, w4 +# CHECK-NEXT: 1 1 0.50 ngcs w9, w10 +# CHECK-NEXT: 1 1 0.50 sbcs w20, w0, wzr +# CHECK-NEXT: 1 1 0.50 sbcs x29, x27, x25 +# CHECK-NEXT: 1 1 0.50 sbcs xzr, x3, x4 +# CHECK-NEXT: 1 1 0.50 ngcs x9, x10 +# CHECK-NEXT: 1 1 0.50 sbcs x20, x0, xzr +# CHECK-NEXT: 1 1 0.50 ngc w3, w12 +# CHECK-NEXT: 1 1 0.50 ngc wzr, w9 +# CHECK-NEXT: 1 1 0.50 ngc w23, wzr +# CHECK-NEXT: 1 1 0.50 ngc x29, x30 +# CHECK-NEXT: 1 1 0.50 ngc xzr, x0 +# CHECK-NEXT: 1 1 0.50 ngc x0, xzr +# CHECK-NEXT: 1 1 0.50 ngcs w3, w12 +# CHECK-NEXT: 1 1 0.50 ngcs wzr, w9 +# CHECK-NEXT: 1 1 0.50 ngcs w23, wzr +# CHECK-NEXT: 1 1 0.50 ngcs x29, x30 +# CHECK-NEXT: 1 1 0.50 ngcs xzr, x0 +# CHECK-NEXT: 1 1 0.50 ngcs x0, xzr +# CHECK-NEXT: 1 1 0.50 sbfx x1, x2, #3, #2 +# CHECK-NEXT: 1 1 0.50 asr x3, x4, #63 +# CHECK-NEXT: 1 1 0.50 asr wzr, wzr, #31 +# CHECK-NEXT: 1 1 0.50 sbfx w12, w9, #0, #1 +# CHECK-NEXT: 1 1 0.50 ubfiz x4, x5, #52, #11 +# CHECK-NEXT: 1 1 0.50 ubfx xzr, x4, #0, #1 +# CHECK-NEXT: 1 1 0.50 ubfiz x4, xzr, #1, #6 +# CHECK-NEXT: 1 1 0.50 lsr x5, x6, #12 +# CHECK-NEXT: 1 1 0.50 bfi x4, x5, #52, #11 +# CHECK-NEXT: 1 1 0.50 bfxil xzr, x4, #0, #1 +# CHECK-NEXT: 1 1 0.50 bfc x4, #1, #6 +# CHECK-NEXT: 1 1 0.50 bfxil x5, x6, #12, #52 +# CHECK-NEXT: 1 1 0.50 sxtb w1, w2 +# CHECK-NEXT: 1 1 0.50 sxtb xzr, w3 +# CHECK-NEXT: 1 1 0.50 sxth w9, w10 +# CHECK-NEXT: 1 1 0.50 sxth x0, w1 +# CHECK-NEXT: 1 1 0.50 sxtw x3, w30 +# CHECK-NEXT: 1 1 0.50 uxtb w1, w2 +# CHECK-NEXT: 1 1 0.50 uxth w9, w10 +# CHECK-NEXT: 1 1 0.50 ubfx x3, x30, #0, #32 +# CHECK-NEXT: 1 1 0.50 asr w3, w2, #0 +# CHECK-NEXT: 1 1 0.50 asr w9, w10, #31 +# CHECK-NEXT: 1 1 0.50 asr x20, x21, #63 +# CHECK-NEXT: 1 1 0.50 asr w1, wzr, #3 +# CHECK-NEXT: 1 1 0.50 lsr w3, w2, #0 +# CHECK-NEXT: 1 1 0.50 lsr w9, w10, #31 +# CHECK-NEXT: 1 1 0.50 lsr x20, x21, #63 +# CHECK-NEXT: 1 1 0.50 lsr wzr, wzr, #3 +# CHECK-NEXT: 1 1 0.50 lsr w3, w2, #0 +# CHECK-NEXT: 1 1 0.50 lsl w9, w10, #31 +# CHECK-NEXT: 1 1 0.50 lsl x20, x21, #63 +# CHECK-NEXT: 1 1 0.50 lsl w1, wzr, #3 +# CHECK-NEXT: 1 1 0.50 sbfx w9, w10, #0, #1 +# CHECK-NEXT: 1 1 0.50 sbfiz x2, x3, #63, #1 +# CHECK-NEXT: 1 1 0.50 asr x19, x20, #0 +# CHECK-NEXT: 1 1 0.50 sbfiz x9, x10, #5, #59 +# CHECK-NEXT: 1 1 0.50 asr w9, w10, #0 +# CHECK-NEXT: 1 1 0.50 sbfiz w11, w12, #31, #1 +# CHECK-NEXT: 1 1 0.50 sbfiz w13, w14, #29, #3 +# CHECK-NEXT: 1 1 0.50 sbfiz xzr, xzr, #10, #11 +# CHECK-NEXT: 1 1 0.50 sbfx w9, w10, #0, #1 +# CHECK-NEXT: 1 1 0.50 asr x2, x3, #63 +# CHECK-NEXT: 1 1 0.50 asr x19, x20, #0 +# CHECK-NEXT: 1 1 0.50 asr x9, x10, #5 +# CHECK-NEXT: 1 1 0.50 asr w9, w10, #0 +# CHECK-NEXT: 1 1 0.50 asr w11, w12, #31 +# CHECK-NEXT: 1 1 0.50 asr w13, w14, #29 +# CHECK-NEXT: 1 1 0.50 sbfx xzr, xzr, #10, #11 +# CHECK-NEXT: 1 1 0.50 bfxil w9, w10, #0, #1 +# CHECK-NEXT: 1 1 0.50 bfi x2, x3, #63, #1 +# CHECK-NEXT: 1 1 0.50 bfxil x19, x20, #0, #64 +# CHECK-NEXT: 1 1 0.50 bfi x9, x10, #5, #59 +# CHECK-NEXT: 1 1 0.50 bfxil w9, w10, #0, #32 +# CHECK-NEXT: 1 1 0.50 bfi w11, w12, #31, #1 +# CHECK-NEXT: 1 1 0.50 bfi w13, w14, #29, #3 +# CHECK-NEXT: 1 1 0.50 bfc xzr, #10, #11 +# CHECK-NEXT: 1 1 0.50 bfxil w9, w10, #0, #1 +# CHECK-NEXT: 1 1 0.50 bfxil x2, x3, #63, #1 +# CHECK-NEXT: 1 1 0.50 bfxil x19, x20, #0, #64 +# CHECK-NEXT: 1 1 0.50 bfxil x9, x10, #5, #59 +# CHECK-NEXT: 1 1 0.50 bfxil w9, w10, #0, #32 +# CHECK-NEXT: 1 1 0.50 bfxil w11, w12, #31, #1 +# CHECK-NEXT: 1 1 0.50 bfxil w13, w14, #29, #3 +# CHECK-NEXT: 1 1 0.50 bfxil xzr, xzr, #10, #11 +# CHECK-NEXT: 1 1 0.50 ubfx w9, w10, #0, #1 +# CHECK-NEXT: 1 1 0.50 lsl x2, x3, #63 +# CHECK-NEXT: 1 1 0.50 lsr x19, x20, #0 +# CHECK-NEXT: 1 1 0.50 lsl x9, x10, #5 +# CHECK-NEXT: 1 1 0.50 lsr w9, w10, #0 +# CHECK-NEXT: 1 1 0.50 lsl w11, w12, #31 +# CHECK-NEXT: 1 1 0.50 lsl w13, w14, #29 +# CHECK-NEXT: 1 1 0.50 ubfiz xzr, xzr, #10, #11 +# CHECK-NEXT: 1 1 0.50 ubfx w9, w10, #0, #1 +# CHECK-NEXT: 1 1 0.50 lsr x2, x3, #63 +# CHECK-NEXT: 1 1 0.50 lsr x19, x20, #0 +# CHECK-NEXT: 1 1 0.50 lsr x9, x10, #5 +# CHECK-NEXT: 1 1 0.50 lsr w9, w10, #0 +# CHECK-NEXT: 1 1 0.50 lsr w11, w12, #31 +# CHECK-NEXT: 1 1 0.50 lsr w13, w14, #29 +# CHECK-NEXT: 1 1 0.50 ubfx xzr, xzr, #10, #11 +# CHECK-NEXT: 1 1 0.50 cbz w5, #4 +# CHECK-NEXT: 1 1 0.50 cbz x5, #0 +# CHECK-NEXT: 1 1 0.50 cbnz x2, #-4 +# CHECK-NEXT: 1 1 0.50 cbnz x26, #1048572 +# CHECK-NEXT: 1 1 0.50 cbz wzr, #0 +# CHECK-NEXT: 1 1 0.50 cbnz xzr, #0 +# CHECK-NEXT: 1 1 0.50 b.ne #4 +# CHECK-NEXT: 1 1 0.50 b.ge #1048572 +# CHECK-NEXT: 1 1 0.50 b.ge #-4 +# CHECK-NEXT: 1 1 0.50 ccmp w1, #31, #0, eq +# CHECK-NEXT: 1 1 0.50 ccmp w3, #0, #15, hs +# CHECK-NEXT: 1 1 0.50 ccmp wzr, #15, #13, hs +# CHECK-NEXT: 1 1 0.50 ccmp x9, #31, #0, le +# CHECK-NEXT: 1 1 0.50 ccmp x3, #0, #15, gt +# CHECK-NEXT: 1 1 0.50 ccmp xzr, #5, #7, ne +# CHECK-NEXT: 1 1 0.50 ccmn w1, #31, #0, eq +# CHECK-NEXT: 1 1 0.50 ccmn w3, #0, #15, hs +# CHECK-NEXT: 1 1 0.50 ccmn wzr, #15, #13, hs +# CHECK-NEXT: 1 1 0.50 ccmn x9, #31, #0, le +# CHECK-NEXT: 1 1 0.50 ccmn x3, #0, #15, gt +# CHECK-NEXT: 1 1 0.50 ccmn xzr, #5, #7, ne +# CHECK-NEXT: 1 1 0.50 ccmp w1, wzr, #0, eq +# CHECK-NEXT: 1 1 0.50 ccmp w3, w0, #15, hs +# CHECK-NEXT: 1 1 0.50 ccmp wzr, w15, #13, hs +# CHECK-NEXT: 1 1 0.50 ccmp x9, xzr, #0, le +# CHECK-NEXT: 1 1 0.50 ccmp x3, x0, #15, gt +# CHECK-NEXT: 1 1 0.50 ccmp xzr, x5, #7, ne +# CHECK-NEXT: 1 1 0.50 ccmn w1, wzr, #0, eq +# CHECK-NEXT: 1 1 0.50 ccmn w3, w0, #15, hs +# CHECK-NEXT: 1 1 0.50 ccmn wzr, w15, #13, hs +# CHECK-NEXT: 1 1 0.50 ccmn x9, xzr, #0, le +# CHECK-NEXT: 1 1 0.50 ccmn x3, x0, #15, gt +# CHECK-NEXT: 1 1 0.50 ccmn xzr, x5, #7, ne +# CHECK-NEXT: 1 1 0.50 csel w1, w0, w19, ne +# CHECK-NEXT: 1 1 0.50 csel wzr, w5, w9, eq +# CHECK-NEXT: 1 1 0.50 csel w9, wzr, w30, gt +# CHECK-NEXT: 1 1 0.50 csel w1, w28, wzr, mi +# CHECK-NEXT: 1 1 0.50 csel x19, x23, x29, lt +# CHECK-NEXT: 1 1 0.50 csel xzr, x3, x4, ge +# CHECK-NEXT: 1 1 0.50 csel x5, xzr, x6, hs +# CHECK-NEXT: 1 1 0.50 csel x7, x8, xzr, lo +# CHECK-NEXT: 1 1 0.50 csinc w1, w0, w19, ne +# CHECK-NEXT: 1 1 0.50 csinc wzr, w5, w9, eq +# CHECK-NEXT: 1 1 0.50 csinc w9, wzr, w30, gt +# CHECK-NEXT: 1 1 0.50 csinc w1, w28, wzr, mi +# CHECK-NEXT: 1 1 0.50 csinc x19, x23, x29, lt +# CHECK-NEXT: 1 1 0.50 csinc xzr, x3, x4, ge +# CHECK-NEXT: 1 1 0.50 csinc x5, xzr, x6, hs +# CHECK-NEXT: 1 1 0.50 csinc x7, x8, xzr, lo +# CHECK-NEXT: 1 1 0.50 csinv w1, w0, w19, ne +# CHECK-NEXT: 1 1 0.50 csinv wzr, w5, w9, eq +# CHECK-NEXT: 1 1 0.50 csinv w9, wzr, w30, gt +# CHECK-NEXT: 1 1 0.50 csinv w1, w28, wzr, mi +# CHECK-NEXT: 1 1 0.50 csinv x19, x23, x29, lt +# CHECK-NEXT: 1 1 0.50 csinv xzr, x3, x4, ge +# CHECK-NEXT: 1 1 0.50 csinv x5, xzr, x6, hs +# CHECK-NEXT: 1 1 0.50 csinv x7, x8, xzr, lo +# CHECK-NEXT: 1 1 0.50 csneg w1, w0, w19, ne +# CHECK-NEXT: 1 1 0.50 csneg wzr, w5, w9, eq +# CHECK-NEXT: 1 1 0.50 csneg w9, wzr, w30, gt +# CHECK-NEXT: 1 1 0.50 csneg w1, w28, wzr, mi +# CHECK-NEXT: 1 1 0.50 csneg x19, x23, x29, lt +# CHECK-NEXT: 1 1 0.50 csneg xzr, x3, x4, ge +# CHECK-NEXT: 1 1 0.50 csneg x5, xzr, x6, hs +# CHECK-NEXT: 1 1 0.50 csneg x7, x8, xzr, lo +# CHECK-NEXT: 1 1 0.50 cset w3, eq +# CHECK-NEXT: 1 1 0.50 cset x9, pl +# CHECK-NEXT: 1 1 0.50 csetm w20, ne +# CHECK-NEXT: 1 1 0.50 csetm x30, ge +# CHECK-NEXT: 1 1 0.50 csinc w2, wzr, wzr, al +# CHECK-NEXT: 1 1 0.50 csinv x3, xzr, xzr, nv +# CHECK-NEXT: 1 1 0.50 cinc w3, w5, gt +# CHECK-NEXT: 1 1 0.50 cinc wzr, w4, le +# CHECK-NEXT: 1 1 0.50 cset w9, lt +# CHECK-NEXT: 1 1 0.50 cinc x3, x5, gt +# CHECK-NEXT: 1 1 0.50 cinc xzr, x4, le +# CHECK-NEXT: 1 1 0.50 cset x9, lt +# CHECK-NEXT: 1 1 0.50 csinc w5, w6, w6, nv +# CHECK-NEXT: 1 1 0.50 csinc x1, x2, x2, al +# CHECK-NEXT: 1 1 0.50 cinv w3, w5, gt +# CHECK-NEXT: 1 1 0.50 cinv wzr, w4, le +# CHECK-NEXT: 1 1 0.50 csetm w9, lt +# CHECK-NEXT: 1 1 0.50 cinv x3, x5, gt +# CHECK-NEXT: 1 1 0.50 cinv xzr, x4, le +# CHECK-NEXT: 1 1 0.50 csetm x9, lt +# CHECK-NEXT: 1 1 0.50 csinv x1, x0, x0, al +# CHECK-NEXT: 1 1 0.50 csinv w9, w8, w8, nv +# CHECK-NEXT: 1 1 0.50 cneg w3, w5, gt +# CHECK-NEXT: 1 1 0.50 cneg wzr, w4, le +# CHECK-NEXT: 1 1 0.50 cneg w9, wzr, lt +# CHECK-NEXT: 1 1 0.50 cneg x3, x5, gt +# CHECK-NEXT: 1 1 0.50 cneg xzr, x4, le +# CHECK-NEXT: 1 1 0.50 cneg x9, xzr, lt +# CHECK-NEXT: 1 1 0.50 csneg x4, x8, x8, al +# CHECK-NEXT: 1 1 0.50 csinv w9, w8, w8, nv +# CHECK-NEXT: 1 1 0.50 rbit w0, w7 +# CHECK-NEXT: 1 1 0.50 rbit x18, x3 +# CHECK-NEXT: 1 1 0.50 rev16 w17, w1 +# CHECK-NEXT: 1 1 0.50 rev16 x5, x2 +# CHECK-NEXT: 1 1 0.50 rev w18, w0 +# CHECK-NEXT: 1 1 0.50 rev32 x20, x1 +# CHECK-NEXT: 1 1 0.50 rev x22, x2 +# CHECK-NEXT: 1 1 0.25 clz w24, w3 +# CHECK-NEXT: 1 1 0.25 clz x26, x4 +# CHECK-NEXT: 1 1 0.50 cls w3, w5 +# CHECK-NEXT: 1 1 0.50 cls x20, x5 +# CHECK-NEXT: 2 13 1.00 udiv w0, w7, w10 +# CHECK-NEXT: 3 13 2.00 udiv x9, x22, x4 +# CHECK-NEXT: 2 13 1.00 sdiv w12, w21, w0 +# CHECK-NEXT: 3 13 2.00 sdiv x13, x2, x1 +# CHECK-NEXT: 1 1 0.50 lsl w11, w12, w13 +# CHECK-NEXT: 1 1 0.50 lsl x14, x15, x16 +# CHECK-NEXT: 1 1 0.50 lsr w17, w18, w19 +# CHECK-NEXT: 1 1 0.50 lsr x20, x21, x22 +# CHECK-NEXT: 1 1 0.50 asr w23, w24, w25 +# CHECK-NEXT: 1 1 0.50 asr x26, x27, x28 +# CHECK-NEXT: 1 1 0.50 ror w0, w1, w2 +# CHECK-NEXT: 1 1 0.50 ror x3, x4, x5 +# CHECK-NEXT: 1 1 0.50 lsl w6, w7, w8 +# CHECK-NEXT: 1 1 0.50 lsl x9, x10, x11 +# CHECK-NEXT: 1 1 0.50 lsr w12, w13, w14 +# CHECK-NEXT: 1 1 0.50 lsr x15, x16, x17 +# CHECK-NEXT: 1 1 0.50 asr w18, w19, w20 +# CHECK-NEXT: 1 1 0.50 asr x21, x22, x23 +# CHECK-NEXT: 1 1 0.50 ror w24, w25, w26 +# CHECK-NEXT: 1 1 0.50 ror x27, x28, x29 +# CHECK-NEXT: 1 3 1.00 smulh x30, x29, x28 +# CHECK-NEXT: 1 3 1.00 smulh xzr, x27, x26 +# CHECK-NEXT: 1 3 1.00 umulh x30, x29, x28 +# CHECK-NEXT: 1 3 1.00 umulh x23, x30, xzr +# CHECK-NEXT: 1 3 1.00 madd w1, w3, w7, w4 +# CHECK-NEXT: 1 3 1.00 madd wzr, w0, w9, w11 +# CHECK-NEXT: 1 3 1.00 madd w13, wzr, w4, w4 +# CHECK-NEXT: 1 3 1.00 madd w19, w30, wzr, w29 +# CHECK-NEXT: 1 3 1.00 mul w4, w5, w6 +# CHECK-NEXT: 1 3 1.00 madd x1, x3, x7, x4 +# CHECK-NEXT: 1 3 1.00 madd xzr, x0, x9, x11 +# CHECK-NEXT: 1 3 1.00 madd x13, xzr, x4, x4 +# CHECK-NEXT: 1 3 1.00 madd x19, x30, xzr, x29 +# CHECK-NEXT: 1 3 1.00 mul x4, x5, x6 +# CHECK-NEXT: 1 3 1.00 msub w1, w3, w7, w4 +# CHECK-NEXT: 1 3 1.00 msub wzr, w0, w9, w11 +# CHECK-NEXT: 1 3 1.00 msub w13, wzr, w4, w4 +# CHECK-NEXT: 1 3 1.00 msub w19, w30, wzr, w29 +# CHECK-NEXT: 1 3 1.00 mneg w4, w5, w6 +# CHECK-NEXT: 1 3 1.00 msub x1, x3, x7, x4 +# CHECK-NEXT: 1 3 1.00 msub xzr, x0, x9, x11 +# CHECK-NEXT: 1 3 1.00 msub x13, xzr, x4, x4 +# CHECK-NEXT: 1 3 1.00 msub x19, x30, xzr, x29 +# CHECK-NEXT: 1 3 1.00 mneg x4, x5, x6 +# CHECK-NEXT: 2 4 1.00 smaddl x3, w5, w2, x9 +# CHECK-NEXT: 2 4 1.00 smaddl xzr, w10, w11, x12 +# CHECK-NEXT: 2 4 1.00 smaddl x13, wzr, w14, x15 +# CHECK-NEXT: 2 4 1.00 smaddl x16, w17, wzr, x18 +# CHECK-NEXT: 2 4 1.00 smull x19, w20, w21 +# CHECK-NEXT: 2 4 1.00 smsubl x3, w5, w2, x9 +# CHECK-NEXT: 2 4 1.00 smsubl xzr, w10, w11, x12 +# CHECK-NEXT: 2 4 1.00 smsubl x13, wzr, w14, x15 +# CHECK-NEXT: 2 4 1.00 smsubl x16, w17, wzr, x18 +# CHECK-NEXT: 2 4 1.00 smnegl x19, w20, w21 +# CHECK-NEXT: 2 4 1.00 umaddl x3, w5, w2, x9 +# CHECK-NEXT: 2 4 1.00 umaddl xzr, w10, w11, x12 +# CHECK-NEXT: 2 4 1.00 umaddl x13, wzr, w14, x15 +# CHECK-NEXT: 2 4 1.00 umaddl x16, w17, wzr, x18 +# CHECK-NEXT: 2 4 1.00 umull x19, w20, w21 +# CHECK-NEXT: 2 4 1.00 umsubl x3, w5, w2, x9 +# CHECK-NEXT: 2 4 1.00 umsubl x16, w17, wzr, x18 +# CHECK-NEXT: 2 4 1.00 umnegl x19, w20, w21 +# CHECK-NEXT: 1 3 1.00 smulh x30, x29, x28 +# CHECK-NEXT: 1 3 1.00 smulh x23, x22, xzr +# CHECK-NEXT: 1 3 1.00 umulh x23, x22, xzr +# CHECK-NEXT: 1 3 1.00 mul x19, x20, xzr +# CHECK-NEXT: 1 3 1.00 mneg w21, w22, w23 +# CHECK-NEXT: 2 4 1.00 smull x11, w13, w17 +# CHECK-NEXT: 2 4 1.00 umull x11, w13, w17 +# CHECK-NEXT: 2 4 1.00 smnegl x11, w13, w17 +# CHECK-NEXT: 2 4 1.00 umnegl x11, w13, w17 +# CHECK-NEXT: 1 1 0.50 extr w3, w5, w7, #0 +# CHECK-NEXT: 1 1 0.50 extr w11, w13, w17, #31 +# CHECK-NEXT: 1 1 0.50 extr x3, x5, x7, #15 +# CHECK-NEXT: 1 1 0.50 extr x11, x13, x17, #63 +# CHECK-NEXT: 1 1 0.50 ror x19, x23, #24 +# CHECK-NEXT: 1 1 0.50 ror x29, xzr, #63 +# CHECK-NEXT: 1 1 0.50 ror w9, w13, #31 +# CHECK-NEXT: 1 3 1.00 fcmp s3, s5 +# CHECK-NEXT: 1 3 1.00 fcmp s31, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmp s31, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmpe s29, s30 +# CHECK-NEXT: 1 3 1.00 fcmpe s15, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmpe s15, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmp d4, d12 +# CHECK-NEXT: 1 3 1.00 fcmp d23, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmp d23, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmpe d26, d22 +# CHECK-NEXT: 1 3 1.00 fcmpe d29, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmpe d29, #0.0 +# CHECK-NEXT: 3 9 1.00 fccmp s1, s31, #0, eq +# CHECK-NEXT: 3 9 1.00 fccmp s3, s0, #15, hs +# CHECK-NEXT: 3 9 1.00 fccmp s31, s15, #13, hs +# CHECK-NEXT: 3 9 1.00 fccmp d9, d31, #0, le +# CHECK-NEXT: 3 9 1.00 fccmp d3, d0, #15, gt +# CHECK-NEXT: 3 9 1.00 fccmp d31, d5, #7, ne +# CHECK-NEXT: 3 9 1.00 fccmpe s1, s31, #0, eq +# CHECK-NEXT: 3 9 1.00 fccmpe s3, s0, #15, hs +# CHECK-NEXT: 3 9 1.00 fccmpe s31, s15, #13, hs +# CHECK-NEXT: 3 9 1.00 fccmpe d9, d31, #0, le +# CHECK-NEXT: 3 9 1.00 fccmpe d3, d0, #15, gt +# CHECK-NEXT: 3 9 1.00 fccmpe d31, d5, #7, ne +# CHECK-NEXT: 3 9 1.00 fcsel s3, s20, s9, pl +# CHECK-NEXT: 3 9 1.00 fcsel d9, d10, d11, mi +# CHECK-NEXT: 1 2 0.50 fmov s0, s1 +# CHECK-NEXT: 1 2 0.50 fabs s2, s3 +# CHECK-NEXT: 1 2 0.50 fneg s4, s5 +# CHECK-NEXT: 1 33 1.00 fsqrt s6, s7 +# CHECK-NEXT: 1 3 0.50 fcvt d8, s9 +# CHECK-NEXT: 1 3 0.50 fcvt h10, s11 +# CHECK-NEXT: 1 2 0.50 frintn s12, s13 +# CHECK-NEXT: 1 2 0.50 frintp s14, s15 +# CHECK-NEXT: 1 2 0.50 frintm s16, s17 +# CHECK-NEXT: 1 2 0.50 frintz s18, s19 +# CHECK-NEXT: 1 2 0.50 frinta s20, s21 +# CHECK-NEXT: 1 2 0.50 frintx s22, s23 +# CHECK-NEXT: 1 2 0.50 frinti s24, s25 +# CHECK-NEXT: 1 2 0.50 fmov d0, d1 +# CHECK-NEXT: 1 2 0.50 fabs d2, d3 +# CHECK-NEXT: 1 2 0.50 fneg d4, d5 +# CHECK-NEXT: 1 63 1.00 fsqrt d6, d7 +# CHECK-NEXT: 1 3 0.50 fcvt s8, d9 +# CHECK-NEXT: 1 3 0.50 fcvt h10, d11 +# CHECK-NEXT: 1 2 0.50 frintn d12, d13 +# CHECK-NEXT: 1 2 0.50 frintp d14, d15 +# CHECK-NEXT: 1 2 0.50 frintm d16, d17 +# CHECK-NEXT: 1 2 0.50 frintz d18, d19 +# CHECK-NEXT: 1 2 0.50 frinta d20, d21 +# CHECK-NEXT: 1 2 0.50 frintx d22, d23 +# CHECK-NEXT: 1 2 0.50 frinti d24, d25 +# CHECK-NEXT: 1 3 0.50 fcvt s26, h27 +# CHECK-NEXT: 1 3 0.50 fcvt d28, h29 +# CHECK-NEXT: 1 4 0.50 fmul s20, s19, s17 +# CHECK-NEXT: 1 12 1.00 fdiv s1, s2, s3 +# CHECK-NEXT: 1 2 0.50 fadd s4, s5, s6 +# CHECK-NEXT: 1 2 0.50 fsub s7, s8, s9 +# CHECK-NEXT: 1 2 0.50 fmax s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 fmin s13, s14, s15 +# CHECK-NEXT: 1 2 0.50 fmaxnm s16, s17, s18 +# CHECK-NEXT: 1 2 0.50 fminnm s19, s20, s21 +# CHECK-NEXT: 1 4 0.50 fnmul s22, s23, s2 +# CHECK-NEXT: 1 4 0.50 fmul d20, d19, d17 +# CHECK-NEXT: 1 19 1.00 fdiv d1, d2, d3 +# CHECK-NEXT: 1 2 0.50 fadd d4, d5, d6 +# CHECK-NEXT: 1 2 0.50 fsub d7, d8, d9 +# CHECK-NEXT: 1 2 0.50 fmax d10, d11, d12 +# CHECK-NEXT: 1 2 0.50 fmin d13, d14, d15 +# CHECK-NEXT: 1 2 0.50 fmaxnm d16, d17, d18 +# CHECK-NEXT: 1 2 0.50 fminnm d19, d20, d21 +# CHECK-NEXT: 1 4 0.50 fnmul d22, d23, d24 +# CHECK-NEXT: 1 4 0.50 fmadd s3, s5, s6, s31 +# CHECK-NEXT: 1 4 0.50 fmadd d3, d13, d0, d23 +# CHECK-NEXT: 1 4 0.50 fmsub s3, s5, s6, s31 +# CHECK-NEXT: 1 4 0.50 fmsub d3, d13, d0, d23 +# CHECK-NEXT: 1 4 0.50 fnmadd s3, s5, s6, s31 +# CHECK-NEXT: 1 4 0.50 fnmadd d3, d13, d0, d23 +# CHECK-NEXT: 1 4 0.50 fnmsub s3, s5, s6, s31 +# CHECK-NEXT: 1 4 0.50 fnmsub d3, d13, d0, d23 +# CHECK-NEXT: 2 7 1.00 fcvtzs w3, h5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzs wzr, h20, #13 +# CHECK-NEXT: 2 7 1.00 fcvtzs w19, h0, #32 +# CHECK-NEXT: 2 7 1.00 fcvtzs x3, h5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzs x12, h30, #45 +# CHECK-NEXT: 2 7 1.00 fcvtzs x19, h0, #64 +# CHECK-NEXT: 2 7 1.00 fcvtzs w3, s5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzs wzr, s20, #13 +# CHECK-NEXT: 2 7 1.00 fcvtzs w19, s0, #32 +# CHECK-NEXT: 2 7 1.00 fcvtzs x3, s5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzs x12, s30, #45 +# CHECK-NEXT: 2 7 1.00 fcvtzs x19, s0, #64 +# CHECK-NEXT: 2 7 1.00 fcvtzs w3, d5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzs wzr, d20, #13 +# CHECK-NEXT: 2 7 1.00 fcvtzs w19, d0, #32 +# CHECK-NEXT: 2 7 1.00 fcvtzs x3, d5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzs x12, d30, #45 +# CHECK-NEXT: 2 7 1.00 fcvtzs x19, d0, #64 +# CHECK-NEXT: 2 7 1.00 fcvtzu w3, h5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzu wzr, h20, #13 +# CHECK-NEXT: 2 7 1.00 fcvtzu w19, h0, #32 +# CHECK-NEXT: 2 7 1.00 fcvtzu x3, h5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzu x12, h30, #45 +# CHECK-NEXT: 2 7 1.00 fcvtzu x19, h0, #64 +# CHECK-NEXT: 2 7 1.00 fcvtzu w3, s5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzu wzr, s20, #13 +# CHECK-NEXT: 2 7 1.00 fcvtzu w19, s0, #32 +# CHECK-NEXT: 2 7 1.00 fcvtzu x3, s5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzu x12, s30, #45 +# CHECK-NEXT: 2 7 1.00 fcvtzu x19, s0, #64 +# CHECK-NEXT: 2 7 1.00 fcvtzu w3, d5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzu wzr, d20, #13 +# CHECK-NEXT: 2 7 1.00 fcvtzu w19, d0, #32 +# CHECK-NEXT: 2 7 1.00 fcvtzu x3, d5, #1 +# CHECK-NEXT: 2 7 1.00 fcvtzu x12, d30, #45 +# CHECK-NEXT: 2 7 1.00 fcvtzu x19, d0, #64 +# CHECK-NEXT: 3 11 1.00 scvtf h23, w19, #1 +# CHECK-NEXT: 3 11 1.00 scvtf h31, wzr, #20 +# CHECK-NEXT: 3 11 1.00 scvtf h14, w0, #32 +# CHECK-NEXT: 3 11 1.00 scvtf h23, x19, #1 +# CHECK-NEXT: 3 11 1.00 scvtf h31, xzr, #20 +# CHECK-NEXT: 3 11 1.00 scvtf h14, x0, #64 +# CHECK-NEXT: 3 11 1.00 scvtf s23, w19, #1 +# CHECK-NEXT: 3 11 1.00 scvtf s31, wzr, #20 +# CHECK-NEXT: 3 11 1.00 scvtf s14, w0, #32 +# CHECK-NEXT: 3 11 1.00 scvtf s23, x19, #1 +# CHECK-NEXT: 3 11 1.00 scvtf s31, xzr, #20 +# CHECK-NEXT: 3 11 1.00 scvtf s14, x0, #64 +# CHECK-NEXT: 3 11 1.00 scvtf d23, w19, #1 +# CHECK-NEXT: 3 11 1.00 scvtf d31, wzr, #20 +# CHECK-NEXT: 3 11 1.00 scvtf d14, w0, #32 +# CHECK-NEXT: 3 11 1.00 scvtf d23, x19, #1 +# CHECK-NEXT: 3 11 1.00 scvtf d31, xzr, #20 +# CHECK-NEXT: 3 11 1.00 scvtf d14, x0, #64 +# CHECK-NEXT: 3 11 1.00 ucvtf h23, w19, #1 +# CHECK-NEXT: 3 11 1.00 ucvtf h31, wzr, #20 +# CHECK-NEXT: 3 11 1.00 ucvtf h14, w0, #32 +# CHECK-NEXT: 3 11 1.00 ucvtf h23, x19, #1 +# CHECK-NEXT: 3 11 1.00 ucvtf h31, xzr, #20 +# CHECK-NEXT: 3 11 1.00 ucvtf h14, x0, #64 +# CHECK-NEXT: 3 11 1.00 ucvtf s23, w19, #1 +# CHECK-NEXT: 3 11 1.00 ucvtf s31, wzr, #20 +# CHECK-NEXT: 3 11 1.00 ucvtf s14, w0, #32 +# CHECK-NEXT: 3 11 1.00 ucvtf s23, x19, #1 +# CHECK-NEXT: 3 11 1.00 ucvtf s31, xzr, #20 +# CHECK-NEXT: 3 11 1.00 ucvtf s14, x0, #64 +# CHECK-NEXT: 3 11 1.00 ucvtf d23, w19, #1 +# CHECK-NEXT: 3 11 1.00 ucvtf d31, wzr, #20 +# CHECK-NEXT: 3 11 1.00 ucvtf d14, w0, #32 +# CHECK-NEXT: 3 11 1.00 ucvtf d23, x19, #1 +# CHECK-NEXT: 3 11 1.00 ucvtf d31, xzr, #20 +# CHECK-NEXT: 3 11 1.00 ucvtf d14, x0, #64 +# CHECK-NEXT: 2 7 1.00 fcvtns w3, h31 +# CHECK-NEXT: 2 7 1.00 fcvtns xzr, h12 +# CHECK-NEXT: 2 7 1.00 fcvtnu wzr, h12 +# CHECK-NEXT: 2 7 1.00 fcvtnu x0, h0 +# CHECK-NEXT: 2 7 1.00 fcvtps wzr, h9 +# CHECK-NEXT: 2 7 1.00 fcvtps x12, h20 +# CHECK-NEXT: 2 7 1.00 fcvtpu w30, h23 +# CHECK-NEXT: 2 7 1.00 fcvtpu x29, h3 +# CHECK-NEXT: 2 7 1.00 fcvtms w2, h3 +# CHECK-NEXT: 2 7 1.00 fcvtms x4, h5 +# CHECK-NEXT: 2 7 1.00 fcvtmu w6, h7 +# CHECK-NEXT: 2 7 1.00 fcvtmu x8, h9 +# CHECK-NEXT: 2 7 1.00 fcvtzs w10, h11 +# CHECK-NEXT: 2 7 1.00 fcvtzs x12, h13 +# CHECK-NEXT: 2 7 1.00 fcvtzu w14, h15 +# CHECK-NEXT: 2 7 1.00 fcvtzu x15, h16 +# CHECK-NEXT: 3 11 1.00 scvtf h17, w18 +# CHECK-NEXT: 3 11 1.00 scvtf h19, x20 +# CHECK-NEXT: 3 11 1.00 ucvtf h21, w22 +# CHECK-NEXT: 3 11 1.00 scvtf h23, x24 +# CHECK-NEXT: 2 7 1.00 fcvtas w25, h26 +# CHECK-NEXT: 2 7 1.00 fcvtas x27, h28 +# CHECK-NEXT: 2 7 1.00 fcvtau w29, h30 +# CHECK-NEXT: 2 7 1.00 fcvtau xzr, h0 +# CHECK-NEXT: 2 7 1.00 fcvtns w3, s31 +# CHECK-NEXT: 2 7 1.00 fcvtns xzr, s12 +# CHECK-NEXT: 2 7 1.00 fcvtnu wzr, s12 +# CHECK-NEXT: 2 7 1.00 fcvtnu x0, s0 +# CHECK-NEXT: 2 7 1.00 fcvtps wzr, s9 +# CHECK-NEXT: 2 7 1.00 fcvtps x12, s20 +# CHECK-NEXT: 2 7 1.00 fcvtpu w30, s23 +# CHECK-NEXT: 2 7 1.00 fcvtpu x29, s3 +# CHECK-NEXT: 2 7 1.00 fcvtms w2, s3 +# CHECK-NEXT: 2 7 1.00 fcvtms x4, s5 +# CHECK-NEXT: 2 7 1.00 fcvtmu w6, s7 +# CHECK-NEXT: 2 7 1.00 fcvtmu x8, s9 +# CHECK-NEXT: 2 7 1.00 fcvtzs w10, s11 +# CHECK-NEXT: 2 7 1.00 fcvtzs x12, s13 +# CHECK-NEXT: 2 7 1.00 fcvtzu w14, s15 +# CHECK-NEXT: 2 7 1.00 fcvtzu x15, s16 +# CHECK-NEXT: 3 11 1.00 scvtf s17, w18 +# CHECK-NEXT: 3 11 1.00 scvtf s19, x20 +# CHECK-NEXT: 3 11 1.00 ucvtf s21, w22 +# CHECK-NEXT: 3 11 1.00 scvtf s23, x24 +# CHECK-NEXT: 2 7 1.00 fcvtas w25, s26 +# CHECK-NEXT: 2 7 1.00 fcvtas x27, s28 +# CHECK-NEXT: 2 7 1.00 fcvtau w29, s30 +# CHECK-NEXT: 2 7 1.00 fcvtau xzr, s0 +# CHECK-NEXT: 2 7 1.00 fcvtns w3, d31 +# CHECK-NEXT: 2 7 1.00 fcvtns xzr, d12 +# CHECK-NEXT: 2 7 1.00 fcvtnu wzr, d12 +# CHECK-NEXT: 2 7 1.00 fcvtnu x0, d0 +# CHECK-NEXT: 2 7 1.00 fcvtps wzr, d9 +# CHECK-NEXT: 2 7 1.00 fcvtps x12, d20 +# CHECK-NEXT: 2 7 1.00 fcvtpu w30, d23 +# CHECK-NEXT: 2 7 1.00 fcvtpu x29, d3 +# CHECK-NEXT: 2 7 1.00 fcvtms w2, d3 +# CHECK-NEXT: 2 7 1.00 fcvtms x4, d5 +# CHECK-NEXT: 2 7 1.00 fcvtmu w6, d7 +# CHECK-NEXT: 2 7 1.00 fcvtmu x8, d9 +# CHECK-NEXT: 2 7 1.00 fcvtzs w10, d11 +# CHECK-NEXT: 2 7 1.00 fcvtzs x12, d13 +# CHECK-NEXT: 2 7 1.00 fcvtzu w14, d15 +# CHECK-NEXT: 2 7 1.00 fcvtzu x15, d16 +# CHECK-NEXT: 3 11 1.00 scvtf d17, w18 +# CHECK-NEXT: 3 11 1.00 scvtf d19, x20 +# CHECK-NEXT: 3 11 1.00 ucvtf d21, w22 +# CHECK-NEXT: 3 11 1.00 ucvtf d23, x24 +# CHECK-NEXT: 2 7 1.00 fcvtas w25, d26 +# CHECK-NEXT: 2 7 1.00 fcvtas x27, d28 +# CHECK-NEXT: 2 7 1.00 fcvtau w29, d30 +# CHECK-NEXT: 2 7 1.00 fcvtau xzr, d0 +# CHECK-NEXT: 1 5 1.00 fmov w3, s9 +# CHECK-NEXT: 1 3 1.00 fmov s9, w3 +# CHECK-NEXT: 1 5 1.00 fmov x20, d31 +# CHECK-NEXT: 1 3 1.00 fmov d1, x15 +# CHECK-NEXT: 2 7 1.00 fmov x3, v12.d[1] +# CHECK-NEXT: 1 5 1.00 fmov v1.d[1], x19 +# CHECK-NEXT: 1 2 0.50 fmov s2, #0.12500000 +# CHECK-NEXT: 1 2 0.50 fmov s3, #1.00000000 +# CHECK-NEXT: 1 2 0.50 fmov d30, #16.00000000 +# CHECK-NEXT: 1 2 0.50 fmov s4, #1.06250000 +# CHECK-NEXT: 1 2 0.50 fmov d10, #1.93750000 +# CHECK-NEXT: 1 2 0.50 fmov s12, #-1.00000000 +# CHECK-NEXT: 1 2 0.50 fmov d16, #8.50000000 +# CHECK-NEXT: 1 3 0.50 * ldr w3, #0 +# CHECK-NEXT: 1 3 0.50 * ldr x29, #4 +# CHECK-NEXT: 1 3 0.50 * ldrsw xzr, #-4 +# CHECK-NEXT: 1 3 0.50 * ldr s0, #8 +# CHECK-NEXT: 1 3 0.50 * ldr d0, #1048572 +# CHECK-NEXT: 1 3 0.50 * ldr q0, #-1048576 +# CHECK-NEXT: 1 1 0.50 U prfm pldl1strm, #0 +# CHECK-NEXT: 1 1 0.50 U prfm #22, #0 +# CHECK-NEXT: 2 4 0.50 * * U stxrb w18, w8, [sp] +# CHECK-NEXT: 2 4 0.50 * * U stxrh w24, w15, [x16] +# CHECK-NEXT: 2 4 0.50 * * U stxr w5, w6, [x17] +# CHECK-NEXT: 2 4 0.50 * * U stxr w1, x10, [x21] +# CHECK-NEXT: 1 3 0.50 * * U ldxrb w30, [x0] +# CHECK-NEXT: 1 3 0.50 * * U ldxrh w17, [x4] +# CHECK-NEXT: 1 3 0.50 * * U ldxr w22, [sp] +# CHECK-NEXT: 1 3 0.50 * * U ldxr x11, [x29] +# CHECK-NEXT: 1 3 0.50 * * U ldxr x11, [x29] +# CHECK-NEXT: 1 3 0.50 * * U ldxr x11, [x29] +# CHECK-NEXT: 2 4 0.50 * * U stxp w12, w11, w10, [sp] +# CHECK-NEXT: 2 4 0.50 * * U stxp wzr, x27, x9, [x12] +# CHECK-NEXT: 2 3 0.50 * * U ldxp w0, wzr, [sp] +# CHECK-NEXT: 2 3 0.50 * * U ldxp x17, x0, [x18] +# CHECK-NEXT: 2 3 0.50 * * U ldxp x17, x0, [x18] +# CHECK-NEXT: 2 4 0.50 * * U stlxrb w12, w22, [x0] +# CHECK-NEXT: 2 4 0.50 * * U stlxrh w10, w1, [x1] +# CHECK-NEXT: 2 4 0.50 * * U stlxr w9, w2, [x2] +# CHECK-NEXT: 2 4 0.50 * * U stlxr w9, x3, [sp] +# CHECK-NEXT: 1 3 0.50 * * U ldaxrb w8, [x4] +# CHECK-NEXT: 1 3 0.50 * * U ldaxrh w7, [x5] +# CHECK-NEXT: 1 3 0.50 * * U ldaxr w6, [sp] +# CHECK-NEXT: 1 3 0.50 * * U ldaxr x5, [x6] +# CHECK-NEXT: 1 3 0.50 * * U ldaxr x5, [x6] +# CHECK-NEXT: 1 3 0.50 * * U ldaxr x5, [x6] +# CHECK-NEXT: 2 4 0.50 * * U stlxp w4, w5, w6, [sp] +# CHECK-NEXT: 2 4 0.50 * * U stlxp wzr, x6, x7, [x1] +# CHECK-NEXT: 2 3 0.50 * * U ldaxp w5, w18, [sp] +# CHECK-NEXT: 2 3 0.50 * * U ldaxp x6, x19, [x22] +# CHECK-NEXT: 2 3 0.50 * * U ldaxp x6, x19, [x22] +# CHECK-NEXT: 1 1 0.50 * U stlrb w24, [sp] +# CHECK-NEXT: 1 1 0.50 * U stlrh w25, [x30] +# CHECK-NEXT: 1 1 0.50 * U stlr w26, [x29] +# CHECK-NEXT: 1 1 0.50 * U stlr x27, [x28] +# CHECK-NEXT: 1 1 0.50 * U stlr x27, [x28] +# CHECK-NEXT: 1 1 0.50 * U stlr x27, [x28] +# CHECK-NEXT: 1 3 0.50 * U ldarb w23, [sp] +# CHECK-NEXT: 1 3 0.50 * U ldarh w22, [x30] +# CHECK-NEXT: 1 3 0.50 * U ldar wzr, [x29] +# CHECK-NEXT: 1 3 0.50 * U ldar x21, [x28] +# CHECK-NEXT: 1 3 0.50 * U ldar x21, [x28] +# CHECK-NEXT: 1 3 0.50 * U ldar x21, [x28] +# CHECK-NEXT: 1 1 0.50 * sturb w9, [sp] +# CHECK-NEXT: 1 1 0.50 * sturh wzr, [x12, #255] +# CHECK-NEXT: 1 1 0.50 * stur w16, [x0, #-256] +# CHECK-NEXT: 1 1 0.50 * stur x28, [x14, #1] +# CHECK-NEXT: 1 3 0.50 * ldurb w1, [x20, #255] +# CHECK-NEXT: 1 3 0.50 * ldurh w20, [x1, #255] +# CHECK-NEXT: 1 3 0.50 * ldur w12, [sp, #255] +# CHECK-NEXT: 1 3 0.50 * ldur xzr, [x12, #255] +# CHECK-NEXT: 1 3 0.50 * ldursb x9, [x7, #-256] +# CHECK-NEXT: 1 3 0.50 * ldursh x17, [x19, #-256] +# CHECK-NEXT: 1 3 0.50 * ldursw x20, [x15, #-256] +# CHECK-NEXT: 1 1 0.50 U prfum pldl2keep, [sp, #-256] +# CHECK-NEXT: 1 3 0.50 * ldursb w19, [x1, #-256] +# CHECK-NEXT: 1 3 0.50 * ldursh w15, [x21, #-256] +# CHECK-NEXT: 2 2 1.00 * stur b0, [sp, #1] +# CHECK-NEXT: 2 2 1.00 * stur h12, [x12, #-1] +# CHECK-NEXT: 2 2 1.00 * stur s15, [x0, #255] +# CHECK-NEXT: 2 2 1.00 * stur d31, [x5, #25] +# CHECK-NEXT: 2 2 1.00 * stur q9, [x5] +# CHECK-NEXT: 1 4 0.50 * ldur b3, [sp] +# CHECK-NEXT: 1 4 0.50 * ldur h5, [x4, #-256] +# CHECK-NEXT: 1 4 0.50 * ldur s7, [x12, #-1] +# CHECK-NEXT: 1 4 0.50 * ldur d11, [x19, #4] +# CHECK-NEXT: 1 4 0.50 * ldur q13, [x1, #2] +# CHECK-NEXT: 2 1 0.50 * strb w9, [x2], #255 +# CHECK-NEXT: 2 1 0.50 * strb w10, [x3], #1 +# CHECK-NEXT: 2 1 0.50 * strb w10, [x3], #-256 +# CHECK-NEXT: 2 1 0.50 * strh w9, [x2], #255 +# CHECK-NEXT: 2 1 0.50 * strh w9, [x2], #1 +# CHECK-NEXT: 2 1 0.50 * strh w10, [x3], #-256 +# CHECK-NEXT: 2 1 0.50 * str w19, [sp], #255 +# CHECK-NEXT: 2 1 0.50 * str w20, [x30], #1 +# CHECK-NEXT: 2 1 0.50 * str w21, [x12], #-256 +# CHECK-NEXT: 2 1 0.50 * str xzr, [x9], #255 +# CHECK-NEXT: 2 1 0.50 * str x2, [x3], #1 +# CHECK-NEXT: 2 1 0.50 * str x19, [x12], #-256 +# CHECK-NEXT: 2 3 0.50 * ldrb w9, [x2], #255 +# CHECK-NEXT: 2 3 0.50 * ldrb w10, [x3], #1 +# CHECK-NEXT: 2 3 0.50 * ldrb w10, [x3], #-256 +# CHECK-NEXT: 2 3 0.50 * ldrh w9, [x2], #255 +# CHECK-NEXT: 2 3 0.50 * ldrh w9, [x2], #1 +# CHECK-NEXT: 2 3 0.50 * ldrh w10, [x3], #-256 +# CHECK-NEXT: 2 3 0.50 * ldr w19, [sp], #255 +# CHECK-NEXT: 2 3 0.50 * ldr w20, [x30], #1 +# CHECK-NEXT: 2 3 0.50 * ldr w21, [x12], #-256 +# CHECK-NEXT: 2 3 0.50 * ldr xzr, [x9], #255 +# CHECK-NEXT: 2 3 0.50 * ldr x2, [x3], #1 +# CHECK-NEXT: 2 3 0.50 * ldr x19, [x12], #-256 +# CHECK-NEXT: 2 3 0.50 * ldrsb xzr, [x9], #255 +# CHECK-NEXT: 2 3 0.50 * ldrsb x2, [x3], #1 +# CHECK-NEXT: 2 3 0.50 * ldrsb x19, [x12], #-256 +# CHECK-NEXT: 2 3 0.50 * ldrsh xzr, [x9], #255 +# CHECK-NEXT: 2 3 0.50 * ldrsh x2, [x3], #1 +# CHECK-NEXT: 2 3 0.50 * ldrsh x19, [x12], #-256 +# CHECK-NEXT: 2 3 0.50 * ldrsw xzr, [x9], #255 +# CHECK-NEXT: 2 3 0.50 * ldrsw x2, [x3], #1 +# CHECK-NEXT: 2 3 0.50 * ldrsw x19, [x12], #-256 +# CHECK-NEXT: 2 3 0.50 * ldrsb wzr, [x9], #255 +# CHECK-NEXT: 2 3 0.50 * ldrsb w2, [x3], #1 +# CHECK-NEXT: 2 3 0.50 * ldrsb w19, [x12], #-256 +# CHECK-NEXT: 2 3 0.50 * ldrsh wzr, [x9], #255 +# CHECK-NEXT: 2 3 0.50 * ldrsh w2, [x3], #1 +# CHECK-NEXT: 2 3 0.50 * ldrsh w19, [x12], #-256 +# CHECK-NEXT: 2 1 0.50 * str b0, [x0], #255 +# CHECK-NEXT: 2 1 0.50 * str b3, [x3], #1 +# CHECK-NEXT: 2 1 0.50 * str b5, [sp], #-256 +# CHECK-NEXT: 2 1 0.50 * str h10, [x10], #255 +# CHECK-NEXT: 2 1 0.50 * str h13, [x23], #1 +# CHECK-NEXT: 2 1 0.50 * str h15, [sp], #-256 +# CHECK-NEXT: 2 1 0.50 * str s20, [x20], #255 +# CHECK-NEXT: 2 1 0.50 * str s23, [x23], #1 +# CHECK-NEXT: 2 1 0.50 * str s25, [x0], #-256 +# CHECK-NEXT: 2 1 0.50 * str d20, [x20], #255 +# CHECK-NEXT: 2 1 0.50 * str d23, [x23], #1 +# CHECK-NEXT: 2 1 0.50 * str d25, [x0], #-256 +# CHECK-NEXT: 2 3 0.50 * ldr b0, [x0], #255 +# CHECK-NEXT: 2 3 0.50 * ldr b3, [x3], #1 +# CHECK-NEXT: 2 3 0.50 * ldr b5, [sp], #-256 +# CHECK-NEXT: 2 3 0.50 * ldr h10, [x10], #255 +# CHECK-NEXT: 2 3 0.50 * ldr h13, [x23], #1 +# CHECK-NEXT: 2 3 0.50 * ldr h15, [sp], #-256 +# CHECK-NEXT: 2 3 0.50 * ldr s20, [x20], #255 +# CHECK-NEXT: 2 3 0.50 * ldr s23, [x23], #1 +# CHECK-NEXT: 2 3 0.50 * ldr s25, [x0], #-256 +# CHECK-NEXT: 2 3 0.50 * ldr d20, [x20], #255 +# CHECK-NEXT: 2 3 0.50 * ldr d23, [x23], #1 +# CHECK-NEXT: 2 3 0.50 * ldr d25, [x0], #-256 +# CHECK-NEXT: 2 3 0.50 * ldr q20, [x1], #255 +# CHECK-NEXT: 2 3 0.50 * ldr q23, [x9], #1 +# CHECK-NEXT: 2 3 0.50 * ldr q25, [x20], #-256 +# CHECK-NEXT: 2 1 0.50 * str q10, [x1], #255 +# CHECK-NEXT: 2 1 0.50 * str q22, [sp], #1 +# CHECK-NEXT: 2 1 0.50 * str q21, [x20], #-256 +# CHECK-NEXT: 2 3 0.50 * ldr x3, [x4, #0]! +# CHECK-NEXT: 2 1 0.50 * strb w9, [x2, #255]! +# CHECK-NEXT: 2 1 0.50 * strb w10, [x3, #1]! +# CHECK-NEXT: 2 1 0.50 * strb w10, [x3, #-256]! +# CHECK-NEXT: 2 1 0.50 * strh w9, [x2, #255]! +# CHECK-NEXT: 2 1 0.50 * strh w9, [x2, #1]! +# CHECK-NEXT: 2 1 0.50 * strh w10, [x3, #-256]! +# CHECK-NEXT: 2 1 0.50 * str w19, [sp, #255]! +# CHECK-NEXT: 2 1 0.50 * str w20, [x30, #1]! +# CHECK-NEXT: 2 1 0.50 * str w21, [x12, #-256]! +# CHECK-NEXT: 2 1 0.50 * str xzr, [x9, #255]! +# CHECK-NEXT: 2 1 0.50 * str x2, [x3, #1]! +# CHECK-NEXT: 2 1 0.50 * str x19, [x12, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldrb w9, [x2, #255]! +# CHECK-NEXT: 2 3 0.50 * ldrb w10, [x3, #1]! +# CHECK-NEXT: 2 3 0.50 * ldrb w10, [x3, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldrh w9, [x2, #255]! +# CHECK-NEXT: 2 3 0.50 * ldrh w9, [x2, #1]! +# CHECK-NEXT: 2 3 0.50 * ldrh w10, [x3, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldr w19, [sp, #255]! +# CHECK-NEXT: 2 3 0.50 * ldr w20, [x30, #1]! +# CHECK-NEXT: 2 3 0.50 * ldr w21, [x12, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldr xzr, [x9, #255]! +# CHECK-NEXT: 2 3 0.50 * ldr x2, [x3, #1]! +# CHECK-NEXT: 2 3 0.50 * ldr x19, [x12, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldrsb xzr, [x9, #255]! +# CHECK-NEXT: 2 3 0.50 * ldrsb x2, [x3, #1]! +# CHECK-NEXT: 2 3 0.50 * ldrsb x19, [x12, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldrsh xzr, [x9, #255]! +# CHECK-NEXT: 2 3 0.50 * ldrsh x2, [x3, #1]! +# CHECK-NEXT: 2 3 0.50 * ldrsh x19, [x12, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldrsw xzr, [x9, #255]! +# CHECK-NEXT: 2 3 0.50 * ldrsw x2, [x3, #1]! +# CHECK-NEXT: 2 3 0.50 * ldrsw x19, [x12, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldrsb wzr, [x9, #255]! +# CHECK-NEXT: 2 3 0.50 * ldrsb w2, [x3, #1]! +# CHECK-NEXT: 2 3 0.50 * ldrsb w19, [x12, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldrsh wzr, [x9, #255]! +# CHECK-NEXT: 2 3 0.50 * ldrsh w2, [x3, #1]! +# CHECK-NEXT: 2 3 0.50 * ldrsh w19, [x12, #-256]! +# CHECK-NEXT: 2 1 0.50 * str b0, [x0, #255]! +# CHECK-NEXT: 2 1 0.50 * str b3, [x3, #1]! +# CHECK-NEXT: 2 1 0.50 * str b5, [sp, #-256]! +# CHECK-NEXT: 2 1 0.50 * str h10, [x10, #255]! +# CHECK-NEXT: 2 1 0.50 * str h13, [x23, #1]! +# CHECK-NEXT: 2 1 0.50 * str h15, [sp, #-256]! +# CHECK-NEXT: 2 1 0.50 * str s20, [x20, #255]! +# CHECK-NEXT: 2 1 0.50 * str s23, [x23, #1]! +# CHECK-NEXT: 2 1 0.50 * str s25, [x0, #-256]! +# CHECK-NEXT: 2 1 0.50 * str d20, [x20, #255]! +# CHECK-NEXT: 2 1 0.50 * str d23, [x23, #1]! +# CHECK-NEXT: 2 1 0.50 * str d25, [x0, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldr b0, [x0, #255]! +# CHECK-NEXT: 2 3 0.50 * ldr b3, [x3, #1]! +# CHECK-NEXT: 2 3 0.50 * ldr b5, [sp, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldr h10, [x10, #255]! +# CHECK-NEXT: 2 3 0.50 * ldr h13, [x23, #1]! +# CHECK-NEXT: 2 3 0.50 * ldr h15, [sp, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldr s20, [x20, #255]! +# CHECK-NEXT: 2 3 0.50 * ldr s23, [x23, #1]! +# CHECK-NEXT: 2 3 0.50 * ldr s25, [x0, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldr d20, [x20, #255]! +# CHECK-NEXT: 2 3 0.50 * ldr d23, [x23, #1]! +# CHECK-NEXT: 2 3 0.50 * ldr d25, [x0, #-256]! +# CHECK-NEXT: 2 3 0.50 * ldr q20, [x1, #255]! +# CHECK-NEXT: 2 3 0.50 * ldr q23, [x9, #1]! +# CHECK-NEXT: 2 3 0.50 * ldr q25, [x20, #-256]! +# CHECK-NEXT: 2 1 0.50 * str q10, [x1, #255]! +# CHECK-NEXT: 2 1 0.50 * str q22, [sp, #1]! +# CHECK-NEXT: 2 1 0.50 * str q21, [x20, #-256]! +# CHECK-NEXT: 1 1 0.50 * sttrb w9, [sp] +# CHECK-NEXT: 1 1 0.50 * sttrh wzr, [x12, #255] +# CHECK-NEXT: 1 1 0.50 * sttr w16, [x0, #-256] +# CHECK-NEXT: 1 1 0.50 * sttr x28, [x14, #1] +# CHECK-NEXT: 1 3 0.50 * ldtrb w1, [x20, #255] +# CHECK-NEXT: 1 3 0.50 * ldtrh w20, [x1, #255] +# CHECK-NEXT: 1 3 0.50 * ldtr w12, [sp, #255] +# CHECK-NEXT: 1 3 0.50 * ldtr xzr, [x12, #255] +# CHECK-NEXT: 1 3 0.50 * ldtrsb x9, [x7, #-256] +# CHECK-NEXT: 1 3 0.50 * ldtrsh x17, [x19, #-256] +# CHECK-NEXT: 1 3 0.50 * ldtrsw x20, [x15, #-256] +# CHECK-NEXT: 1 3 0.50 * ldtrsb w19, [x1, #-256] +# CHECK-NEXT: 1 3 0.50 * ldtrsh w15, [x21, #-256] +# CHECK-NEXT: 1 3 0.50 * ldr x4, [x29] +# CHECK-NEXT: 1 3 0.50 * ldr x30, [x12, #32760] +# CHECK-NEXT: 1 3 0.50 * ldr x20, [sp, #8] +# CHECK-NEXT: 1 3 0.50 * ldr xzr, [sp] +# CHECK-NEXT: 1 3 0.50 * ldr w2, [sp] +# CHECK-NEXT: 1 3 0.50 * ldr w17, [sp, #16380] +# CHECK-NEXT: 1 3 0.50 * ldr w13, [x2, #4] +# CHECK-NEXT: 1 3 0.50 * ldrsw x2, [x5, #4] +# CHECK-NEXT: 1 3 0.50 * ldrsw x23, [sp, #16380] +# CHECK-NEXT: 1 3 0.50 * ldrh w2, [x4] +# CHECK-NEXT: 1 3 0.50 * ldrsh w23, [x6, #8190] +# CHECK-NEXT: 1 3 0.50 * ldrsh wzr, [sp, #2] +# CHECK-NEXT: 1 3 0.50 * ldrsh x29, [x2, #2] +# CHECK-NEXT: 1 3 0.50 * ldrb w26, [x3, #121] +# CHECK-NEXT: 1 3 0.50 * ldrb w12, [x2] +# CHECK-NEXT: 1 3 0.50 * ldrsb w27, [sp, #4095] +# CHECK-NEXT: 1 3 0.50 * ldrsb xzr, [x15] +# CHECK-NEXT: 1 1 0.50 * str x30, [sp] +# CHECK-NEXT: 1 1 0.50 * str w20, [x4, #16380] +# CHECK-NEXT: 1 1 0.50 * strh w17, [sp, #8190] +# CHECK-NEXT: 1 1 0.50 * strb w23, [x3, #4095] +# CHECK-NEXT: 1 1 0.50 * strb wzr, [x2] +# CHECK-NEXT: 1 3 0.50 * ldr b31, [sp, #4095] +# CHECK-NEXT: 1 3 0.50 * ldr h20, [x2, #8190] +# CHECK-NEXT: 1 3 0.50 * ldr s10, [x19, #16380] +# CHECK-NEXT: 1 3 0.50 * ldr d3, [x10, #32760] +# CHECK-NEXT: 2 2 1.00 * str q12, [sp, #65520] +# CHECK-NEXT: 1 3 0.50 * ldrb w3, [sp, x5] +# CHECK-NEXT: 1 3 0.50 * ldrb w9, [x27, x6] +# CHECK-NEXT: 1 3 0.50 * ldrsb w10, [x30, x7] +# CHECK-NEXT: 1 3 0.50 * ldrb w11, [x29, x3, sxtx] +# CHECK-NEXT: 2 1 1.00 * strb w12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 3 0.50 * ldrb w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 3 0.50 * ldrsb w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 3 0.50 * ldrb w17, [x23, w9, sxtw] +# CHECK-NEXT: 1 3 0.50 * ldrsb x18, [x22, w10, sxtw] +# CHECK-NEXT: 1 3 0.50 * ldrsh w3, [sp, x5] +# CHECK-NEXT: 1 3 0.50 * ldrsh w9, [x27, x6] +# CHECK-NEXT: 1 3 0.50 * ldrh w10, [x30, x7, lsl #1] +# CHECK-NEXT: 2 1 1.00 * strh w11, [x29, x3, sxtx] +# CHECK-NEXT: 1 3 0.50 * ldrh w12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 3 0.50 * ldrsh x13, [x27, x5, sxtx #1] +# CHECK-NEXT: 1 3 0.50 * ldrh w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 3 0.50 * ldrh w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 3 0.50 * ldrsh w16, [x24, w8, uxtw #1] +# CHECK-NEXT: 1 3 0.50 * ldrh w17, [x23, w9, sxtw] +# CHECK-NEXT: 1 3 0.50 * ldrh w18, [x22, w10, sxtw] +# CHECK-NEXT: 2 1 1.00 * strh w19, [x21, wzr, sxtw #1] +# CHECK-NEXT: 1 3 0.50 * ldr w3, [sp, x5] +# CHECK-NEXT: 1 4 0.50 * ldr s9, [x27, x6] +# CHECK-NEXT: 1 3 0.50 * ldr w10, [x30, x7, lsl #2] +# CHECK-NEXT: 1 3 0.50 * ldr w11, [x29, x3, sxtx] +# CHECK-NEXT: 2 2 1.00 * str s12, [x28, xzr, sxtx] +# CHECK-NEXT: 2 1 1.00 * str w13, [x27, x5, sxtx #2] +# CHECK-NEXT: 2 1 1.00 * str w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 3 0.50 * ldr w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 3 0.50 * ldr w16, [x24, w8, uxtw #2] +# CHECK-NEXT: 1 3 0.50 * ldrsw x17, [x23, w9, sxtw] +# CHECK-NEXT: 1 3 0.50 * ldr w18, [x22, w10, sxtw] +# CHECK-NEXT: 1 3 0.50 * ldrsw x19, [x21, wzr, sxtw #2] +# CHECK-NEXT: 1 3 0.50 * ldr x3, [sp, x5] +# CHECK-NEXT: 2 1 1.00 * str x9, [x27, x6] +# CHECK-NEXT: 1 4 0.50 * ldr d10, [x30, x7, lsl #3] +# CHECK-NEXT: 2 1 1.00 * str x11, [x29, x3, sxtx] +# CHECK-NEXT: 1 3 0.50 * ldr x12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 3 0.50 * ldr x13, [x27, x5, sxtx #3] +# CHECK-NEXT: 1 1 0.50 U prfm pldl1keep, [x26, w6, uxtw] +# CHECK-NEXT: 1 3 0.50 * ldr x15, [x25, w7, uxtw] +# CHECK-NEXT: 1 3 0.50 * ldr x16, [x24, w8, uxtw #3] +# CHECK-NEXT: 1 3 0.50 * ldr x17, [x23, w9, sxtw] +# CHECK-NEXT: 1 3 0.50 * ldr x18, [x22, w10, sxtw] +# CHECK-NEXT: 2 2 1.00 * str d19, [x21, wzr, sxtw #3] +# CHECK-NEXT: 1 4 0.50 * ldr q3, [sp, x5] +# CHECK-NEXT: 1 4 0.50 * ldr q9, [x27, x6] +# CHECK-NEXT: 1 4 0.50 * ldr q10, [x30, x7, lsl #4] +# CHECK-NEXT: 2 2 1.00 * str q11, [x29, x3, sxtx] +# CHECK-NEXT: 2 2 1.00 * str q12, [x28, xzr, sxtx] +# CHECK-NEXT: 2 2 1.00 * str q13, [x27, x5, sxtx #4] +# CHECK-NEXT: 1 4 0.50 * ldr q14, [x26, w6, uxtw] +# CHECK-NEXT: 1 4 0.50 * ldr q15, [x25, w7, uxtw] +# CHECK-NEXT: 1 4 0.50 * ldr q16, [x24, w8, uxtw #4] +# CHECK-NEXT: 1 4 0.50 * ldr q17, [x23, w9, sxtw] +# CHECK-NEXT: 2 2 1.00 * str q18, [x22, w10, sxtw] +# CHECK-NEXT: 1 4 0.50 * ldr q19, [x21, wzr, sxtw #4] +# CHECK-NEXT: 1 3 0.50 * ldp w3, w5, [sp] +# CHECK-NEXT: 2 2 0.50 * stp wzr, w9, [sp, #252] +# CHECK-NEXT: 1 3 0.50 * ldp w2, wzr, [sp, #-256] +# CHECK-NEXT: 1 3 0.50 * ldp w9, w10, [sp, #4] +# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [sp, #4] +# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [x2, #-256] +# CHECK-NEXT: 2 5 1.00 * ldpsw x20, x30, [sp, #252] +# CHECK-NEXT: 1 3 0.50 * ldp x21, x29, [x2, #504] +# CHECK-NEXT: 1 3 0.50 * ldp x22, x23, [x3, #-512] +# CHECK-NEXT: 1 3 0.50 * ldp x24, x25, [x4, #8] +# CHECK-NEXT: 2 5 1.00 * ldp s29, s28, [sp, #252] +# CHECK-NEXT: 4 3 2.00 * stp s27, s26, [sp, #-256] +# CHECK-NEXT: 2 5 1.00 * ldp s1, s2, [x3, #44] +# CHECK-NEXT: 4 3 2.00 * stp d3, d5, [x9, #504] +# CHECK-NEXT: 4 3 2.00 * stp d7, d11, [x10, #-512] +# CHECK-NEXT: 2 5 1.00 * ldp d2, d3, [x30, #-8] +# CHECK-NEXT: 4 3 2.00 * stp q3, q5, [sp] +# CHECK-NEXT: 4 3 2.00 * stp q17, q19, [sp, #1008] +# CHECK-NEXT: 2 4 1.00 * ldp q23, q29, [x1, #-1024] +# CHECK-NEXT: 1 3 0.50 * ldp w3, w5, [sp], #0 +# CHECK-NEXT: 3 2 0.50 * stp wzr, w9, [sp], #252 +# CHECK-NEXT: 1 3 0.50 * ldp w2, wzr, [sp], #-256 +# CHECK-NEXT: 1 3 0.50 * ldp w9, w10, [sp], #4 +# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [sp], #4 +# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [x2], #-256 +# CHECK-NEXT: 2 5 1.00 * ldpsw x20, x30, [sp], #252 +# CHECK-NEXT: 1 3 0.50 * ldp x21, x29, [x2], #504 +# CHECK-NEXT: 1 3 0.50 * ldp x22, x23, [x3], #-512 +# CHECK-NEXT: 1 3 0.50 * ldp x24, x25, [x4], #8 +# CHECK-NEXT: 2 5 1.00 * ldp s29, s28, [sp], #252 +# CHECK-NEXT: 4 3 2.00 * stp s27, s26, [sp], #-256 +# CHECK-NEXT: 2 5 1.00 * ldp s1, s2, [x3], #44 +# CHECK-NEXT: 4 3 2.00 * stp d3, d5, [x9], #504 +# CHECK-NEXT: 4 3 2.00 * stp d7, d11, [x10], #-512 +# CHECK-NEXT: 2 5 1.00 * ldp d2, d3, [x30], #-8 +# CHECK-NEXT: 4 3 2.00 * stp q3, q5, [sp], #0 +# CHECK-NEXT: 4 3 2.00 * stp q17, q19, [sp], #1008 +# CHECK-NEXT: 2 4 1.00 * ldp q23, q29, [x1], #-1024 +# CHECK-NEXT: 1 3 0.50 * ldp w3, w5, [sp, #0]! +# CHECK-NEXT: 3 2 0.50 * stp wzr, w9, [sp, #252]! +# CHECK-NEXT: 1 3 0.50 * ldp w2, wzr, [sp, #-256]! +# CHECK-NEXT: 1 3 0.50 * ldp w9, w10, [sp, #4]! +# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [sp, #4]! +# CHECK-NEXT: 2 5 1.00 * ldpsw x9, x10, [x2, #-256]! +# CHECK-NEXT: 2 5 1.00 * ldpsw x20, x30, [sp, #252]! +# CHECK-NEXT: 1 3 0.50 * ldp x21, x29, [x2, #504]! +# CHECK-NEXT: 1 3 0.50 * ldp x22, x23, [x3, #-512]! +# CHECK-NEXT: 1 3 0.50 * ldp x24, x25, [x4, #8]! +# CHECK-NEXT: 2 5 1.00 * ldp s29, s28, [sp, #252]! +# CHECK-NEXT: 4 3 2.00 * stp s27, s26, [sp, #-256]! +# CHECK-NEXT: 2 5 1.00 * ldp s1, s2, [x3, #44]! +# CHECK-NEXT: 4 3 2.00 * stp d3, d5, [x9, #504]! +# CHECK-NEXT: 4 3 2.00 * stp d7, d11, [x10, #-512]! +# CHECK-NEXT: 2 5 1.00 * ldp d2, d3, [x30, #-8]! +# CHECK-NEXT: 4 3 2.00 * stp q3, q5, [sp, #0]! +# CHECK-NEXT: 4 3 2.00 * stp q17, q19, [sp, #1008]! +# CHECK-NEXT: 2 4 1.00 * ldp q23, q29, [x1, #-1024]! +# CHECK-NEXT: 1 3 0.50 * ldnp w3, w5, [sp] +# CHECK-NEXT: 2 1 1.00 * stnp wzr, w9, [sp, #252] +# CHECK-NEXT: 1 3 0.50 * ldnp w2, wzr, [sp, #-256] +# CHECK-NEXT: 1 3 0.50 * ldnp w9, w10, [sp, #4] +# CHECK-NEXT: 1 3 0.50 * ldnp x21, x29, [x2, #504] +# CHECK-NEXT: 1 3 0.50 * ldnp x22, x23, [x3, #-512] +# CHECK-NEXT: 1 3 0.50 * ldnp x24, x25, [x4, #8] +# CHECK-NEXT: 2 5 1.00 * ldnp s29, s28, [sp, #252] +# CHECK-NEXT: 4 3 2.00 * stnp s27, s26, [sp, #-256] +# CHECK-NEXT: 2 5 1.00 * ldnp s1, s2, [x3, #44] +# CHECK-NEXT: 4 3 2.00 * stnp d3, d5, [x9, #504] +# CHECK-NEXT: 4 3 2.00 * stnp d7, d11, [x10, #-512] +# CHECK-NEXT: 2 5 1.00 * ldnp d2, d3, [x30, #-8] +# CHECK-NEXT: 4 3 2.00 * stnp q3, q5, [sp] +# CHECK-NEXT: 4 3 2.00 * stnp q17, q19, [sp, #1008] +# CHECK-NEXT: 2 4 1.00 * ldnp q23, q29, [x1, #-1024] +# CHECK-NEXT: 1 1 0.25 mov w3, #983055 +# CHECK-NEXT: 1 1 0.25 mov x10, #-6148914691236517206 +# CHECK-NEXT: 1 1 0.25 and w12, w23, w21 +# CHECK-NEXT: 1 1 0.25 and w16, w15, w1, lsl #1 +# CHECK-NEXT: 2 2 0.50 and w9, w4, w10, lsl #31 +# CHECK-NEXT: 1 1 0.25 and w3, w30, w11 +# CHECK-NEXT: 2 2 0.50 and x3, x5, x7, lsl #63 +# CHECK-NEXT: 2 2 0.50 and x5, x14, x19, asr #4 +# CHECK-NEXT: 2 2 0.50 and w3, w17, w19, ror #31 +# CHECK-NEXT: 2 2 0.50 and w0, w2, wzr, lsr #17 +# CHECK-NEXT: 2 2 0.50 and w3, w30, w11, asr #2 +# CHECK-NEXT: 1 1 0.25 and xzr, x4, x26 +# CHECK-NEXT: 2 2 0.50 and w3, wzr, w20, ror #2 +# CHECK-NEXT: 2 2 0.50 and x7, x20, xzr, asr #63 +# CHECK-NEXT: 2 2 0.50 bic x13, x20, x14, lsl #47 +# CHECK-NEXT: 1 1 0.25 bic w2, w7, w9 +# CHECK-NEXT: 2 2 0.50 orr w2, w7, w0, asr #31 +# CHECK-NEXT: 2 2 0.50 orr x8, x9, x10, lsl #12 +# CHECK-NEXT: 2 2 0.50 orn x3, x5, x7, asr #2 +# CHECK-NEXT: 1 1 0.25 orn w2, w5, w29 +# CHECK-NEXT: 1 1 0.25 ands w7, wzr, w9, lsl #1 +# CHECK-NEXT: 2 2 0.50 ands x3, x5, x20, ror #63 +# CHECK-NEXT: 1 1 0.25 bics w3, w5, w7 +# CHECK-NEXT: 1 1 0.25 bics x3, xzr, x3, lsl #1 +# CHECK-NEXT: 2 2 0.50 tst w3, w7, lsl #31 +# CHECK-NEXT: 2 2 0.50 tst x2, x20, asr #2 +# CHECK-NEXT: 1 1 0.25 mov x3, x6 +# CHECK-NEXT: 1 1 0.25 mov x3, xzr +# CHECK-NEXT: 1 1 0.25 mov wzr, w2 +# CHECK-NEXT: 1 1 0.25 mov w3, w5 +# CHECK-NEXT: 1 1 0.25 movz w2, #0, lsl #16 +# CHECK-NEXT: 1 1 0.25 mov w2, #-1235 +# CHECK-NEXT: 1 1 0.25 mov x2, #5299989643264 +# CHECK-NEXT: 1 1 0.25 mov x2, #0 +# CHECK-NEXT: 1 1 0.25 movk w3, #0 +# CHECK-NEXT: 1 1 0.25 movz x4, #0, lsl #16 +# CHECK-NEXT: 1 1 0.25 movk w5, #0, lsl #16 +# CHECK-NEXT: 1 1 0.25 movz x6, #0, lsl #32 +# CHECK-NEXT: 1 1 0.25 movk x7, #0, lsl #32 +# CHECK-NEXT: 1 1 0.25 movz x8, #0, lsl #48 +# CHECK-NEXT: 1 1 0.25 movk x9, #0, lsl #48 +# CHECK-NEXT: 1 1 0.50 adr x2, #1600 +# CHECK-NEXT: 1 1 0.50 adrp x21, #6553600 +# CHECK-NEXT: 1 1 0.50 adr x0, #262144 +# CHECK-NEXT: 1 1 0.50 tbz x12, #62, #0 +# CHECK-NEXT: 1 1 0.50 tbz x12, #62, #4 +# CHECK-NEXT: 1 1 0.50 tbz x12, #62, #-32768 +# CHECK-NEXT: 1 1 0.50 tbnz x12, #60, #32764 +# CHECK-NEXT: 1 1 0.50 b #4 +# CHECK-NEXT: 1 1 0.50 b #-4 +# CHECK-NEXT: 1 1 0.50 b #134217724 +# CHECK-NEXT: 1 1 1.00 br x20 +# CHECK-NEXT: 2 1 1.00 blr xzr +# CHECK-NEXT: 1 1 0.50 U ret x10 +# CHECK-NEXT: 1 1 0.50 U ret +# CHECK-NEXT: 1 1 1.00 U eret +# CHECK-NEXT: 1 1 1.00 U drps + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - Ampere1BUnitA +# CHECK-NEXT: [0.1] - Ampere1BUnitA +# CHECK-NEXT: [1.0] - Ampere1BUnitB +# CHECK-NEXT: [1.1] - Ampere1BUnitB +# CHECK-NEXT: [2] - Ampere1BUnitBS +# CHECK-NEXT: [3.0] - Ampere1BUnitL +# CHECK-NEXT: [3.1] - Ampere1BUnitL +# CHECK-NEXT: [4.0] - Ampere1BUnitS +# CHECK-NEXT: [4.1] - Ampere1BUnitS +# CHECK-NEXT: [5] - Ampere1BUnitX +# CHECK-NEXT: [6] - Ampere1BUnitY +# CHECK-NEXT: [7] - Ampere1BUnitZ + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] +# CHECK-NEXT: 190.00 190.00 211.00 211.00 143.00 130.50 130.50 83.00 83.00 159.00 126.00 150.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions: +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add w2, w3, #4095 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add w30, w29, #1, lsl #12 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add w13, w5, #4095, lsl #12 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add x5, x7, #1638 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add w20, wsp, #801 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add wsp, wsp, #1104 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add wsp, w30, #4084 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add x0, x24, #291 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add x3, x24, #4095, lsl #12 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add x8, sp, #1074 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add sp, x29, #3816 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub w0, wsp, #4077 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub w4, w20, #546, lsl #12 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub sp, sp, #288 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub wsp, w19, #16 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adds w13, w23, #291, lsl #12 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cmn w2, #4095 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adds w20, wsp, #0 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cmn x3, #1, lsl #12 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cmp sp, #20, lsl #12 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cmp x30, #4095 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - subs x4, sp, #3822 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cmn w3, #291, lsl #12 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cmn wsp, #1365 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cmn sp, #1092, lsl #12 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov sp, x30 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov wsp, w20 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov x11, sp +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov w24, wsp +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add w3, w5, w7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add wzr, w3, w5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add w20, wzr, w4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add w4, w6, wzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add w11, w13, w15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add w9, w3, wzr, lsl #10 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add w17, w29, w20, lsl #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add w21, w22, w23, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add w24, w25, w26, lsr #18 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add w27, w28, w29, lsr #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add w2, w3, w4, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add w5, w6, w7, asr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add w8, w9, w10, asr #31 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add x3, x5, x7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add xzr, x3, x5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add x20, xzr, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add x4, x6, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - add x11, x13, x15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add x9, x3, xzr, lsl #10 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add x17, x29, x20, lsl #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add x21, x22, x23, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add x24, x25, x26, lsr #18 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add x27, x28, x29, lsr #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add x2, x3, x4, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add x5, x6, x7, asr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - add x8, x9, x10, asr #63 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - adds w3, w5, w7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn w3, w5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - adds w20, wzr, w4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - adds w4, w6, wzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - adds w11, w13, w15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds w9, w3, wzr, lsl #10 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds w17, w29, w20, lsl #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds w21, w22, w23, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds w24, w25, w26, lsr #18 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds w27, w28, w29, lsr #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds w2, w3, w4, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds w5, w6, w7, asr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds w8, w9, w10, asr #31 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - adds x3, x5, x7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn x3, x5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - adds x20, xzr, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - adds x4, x6, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - adds x11, x13, x15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds x9, x3, xzr, lsl #10 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds x17, x29, x20, lsl #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds x21, x22, x23, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds x24, x25, x26, lsr #18 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds x27, x28, x29, lsr #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds x2, x3, x4, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds x5, x6, x7, asr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - adds x8, x9, x10, asr #63 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub w3, w5, w7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub wzr, w3, w5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub w4, w6, wzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub w11, w13, w15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub w9, w3, wzr, lsl #10 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub w17, w29, w20, lsl #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub w21, w22, w23, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub w24, w25, w26, lsr #18 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub w27, w28, w29, lsr #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub w2, w3, w4, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub w5, w6, w7, asr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub w8, w9, w10, asr #31 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub x3, x5, x7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub xzr, x3, x5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub x4, x6, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - sub x11, x13, x15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub x9, x3, xzr, lsl #10 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub x17, x29, x20, lsl #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub x21, x22, x23, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub x24, x25, x26, lsr #18 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub x27, x28, x29, lsr #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub x2, x3, x4, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub x5, x6, x7, asr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - sub x8, x9, x10, asr #63 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subs w3, w5, w7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp w3, w5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subs w4, w6, wzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subs w11, w13, w15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs w9, w3, wzr, lsl #10 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs w17, w29, w20, lsl #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs w21, w22, w23, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs w24, w25, w26, lsr #18 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs w27, w28, w29, lsr #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs w2, w3, w4, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs w5, w6, w7, asr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs w8, w9, w10, asr #31 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subs x3, x5, x7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp x3, x5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subs x4, x6, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subs x11, x13, x15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs x9, x3, xzr, lsl #10 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs x17, x29, x20, lsl #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs x21, x22, x23, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs x24, x25, x26, lsr #18 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs x27, x28, x29, lsr #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs x2, x3, x4, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs x5, x6, x7, asr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - subs x8, x9, x10, asr #63 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn wzr, w4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn w5, wzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn w6, w7 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn w8, w9, lsl #15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn w10, w11, lsl #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn w12, w13, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn w14, w15, lsr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn w16, w17, lsr #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn w18, w19, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn w20, w21, asr #22 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn w22, w23, asr #31 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn x0, x3 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn xzr, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn x5, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmn x6, x7 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn x8, x9, lsl #15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn x10, x11, lsl #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn x12, x13, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn x14, x15, lsr #41 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn x16, x17, lsr #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn x18, x19, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn x20, x21, asr #55 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmn x22, x23, asr #63 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp w0, w3 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp wzr, w4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp w5, wzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp w6, w7 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp w8, w9, lsl #15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp w10, w11, lsl #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp w12, w13, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp w14, w15, lsr #21 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp w18, w19, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp w20, w21, asr #22 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp w22, w23, asr #31 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp x0, x3 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp xzr, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp x5, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp x6, x7 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp x8, x9, lsl #15 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp x10, x11, lsl #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp x12, x13, lsr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp x14, x15, lsr #41 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp x16, x17, lsr #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp x18, x19, asr #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp x20, x21, asr #55 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - cmp x22, x23, asr #63 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp wzr, w0 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - cmp xzr, x0 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adc w29, w27, w25 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adc wzr, w3, w4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adc w9, wzr, w10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adc w20, w0, wzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adc x29, x27, x25 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adc xzr, x3, x4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adc x9, xzr, x10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adc x20, x0, xzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adcs w29, w27, w25 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adcs wzr, w3, w4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adcs w9, wzr, w10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adcs w20, w0, wzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adcs x29, x27, x25 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adcs xzr, x3, x4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adcs x9, xzr, x10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adcs x20, x0, xzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbc w29, w27, w25 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbc wzr, w3, w4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngc w9, w10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbc w20, w0, wzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbc x29, x27, x25 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbc xzr, x3, x4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngc x9, x10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbc x20, x0, xzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbcs w29, w27, w25 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbcs wzr, w3, w4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngcs w9, w10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbcs w20, w0, wzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbcs x29, x27, x25 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbcs xzr, x3, x4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngcs x9, x10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sbcs x20, x0, xzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngc w3, w12 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngc wzr, w9 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngc w23, wzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngc x29, x30 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngc xzr, x0 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngc x0, xzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngcs w3, w12 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngcs wzr, w9 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngcs w23, wzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngcs x29, x30 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngcs xzr, x0 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ngcs x0, xzr +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfx x1, x2, #3, #2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr x3, x4, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr wzr, wzr, #31 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfx w12, w9, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ubfiz x4, x5, #52, #11 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ubfx xzr, x4, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ubfiz x4, xzr, #1, #6 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr x5, x6, #12 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfi x4, x5, #52, #11 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil xzr, x4, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfc x4, #1, #6 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil x5, x6, #12, #52 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sxtb w1, w2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sxtb xzr, w3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sxth w9, w10 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sxth x0, w1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sxtw x3, w30 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - uxtb w1, w2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - uxth w9, w10 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ubfx x3, x30, #0, #32 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w3, w2, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w9, w10, #31 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr x20, x21, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w1, wzr, #3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w3, w2, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w9, w10, #31 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr x20, x21, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr wzr, wzr, #3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w3, w2, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl w9, w10, #31 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl x20, x21, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl w1, wzr, #3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfx w9, w10, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfiz x2, x3, #63, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr x19, x20, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfiz x9, x10, #5, #59 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w9, w10, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfiz w11, w12, #31, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfiz w13, w14, #29, #3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfiz xzr, xzr, #10, #11 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfx w9, w10, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr x2, x3, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr x19, x20, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr x9, x10, #5 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w9, w10, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w11, w12, #31 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w13, w14, #29 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - sbfx xzr, xzr, #10, #11 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil w9, w10, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfi x2, x3, #63, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil x19, x20, #0, #64 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfi x9, x10, #5, #59 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil w9, w10, #0, #32 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfi w11, w12, #31, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfi w13, w14, #29, #3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfc xzr, #10, #11 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil w9, w10, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil x2, x3, #63, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil x19, x20, #0, #64 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil x9, x10, #5, #59 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil w9, w10, #0, #32 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil w11, w12, #31, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil w13, w14, #29, #3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - bfxil xzr, xzr, #10, #11 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ubfx w9, w10, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl x2, x3, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr x19, x20, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl x9, x10, #5 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w9, w10, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl w11, w12, #31 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl w13, w14, #29 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ubfiz xzr, xzr, #10, #11 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ubfx w9, w10, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr x2, x3, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr x19, x20, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr x9, x10, #5 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w9, w10, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w11, w12, #31 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w13, w14, #29 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ubfx xzr, xzr, #10, #11 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cbz w5, #4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cbz x5, #0 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cbnz x2, #-4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cbnz x26, #1048572 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cbz wzr, #0 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cbnz xzr, #0 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - b.ne #4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - b.ge #1048572 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - b.ge #-4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp w1, #31, #0, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp w3, #0, #15, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp wzr, #15, #13, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp x9, #31, #0, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp x3, #0, #15, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp xzr, #5, #7, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn w1, #31, #0, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn w3, #0, #15, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn wzr, #15, #13, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn x9, #31, #0, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn x3, #0, #15, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn xzr, #5, #7, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp w1, wzr, #0, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp w3, w0, #15, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp wzr, w15, #13, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp x9, xzr, #0, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp x3, x0, #15, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmp xzr, x5, #7, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn w1, wzr, #0, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn w3, w0, #15, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn wzr, w15, #13, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn x9, xzr, #0, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn x3, x0, #15, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ccmn xzr, x5, #7, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csel w1, w0, w19, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csel wzr, w5, w9, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csel w9, wzr, w30, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csel w1, w28, wzr, mi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csel x19, x23, x29, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csel xzr, x3, x4, ge +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csel x5, xzr, x6, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csel x7, x8, xzr, lo +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc w1, w0, w19, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc wzr, w5, w9, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc w9, wzr, w30, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc w1, w28, wzr, mi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc x19, x23, x29, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc xzr, x3, x4, ge +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc x5, xzr, x6, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc x7, x8, xzr, lo +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv w1, w0, w19, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv wzr, w5, w9, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv w9, wzr, w30, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv w1, w28, wzr, mi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv x19, x23, x29, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv xzr, x3, x4, ge +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv x5, xzr, x6, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv x7, x8, xzr, lo +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg w1, w0, w19, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg wzr, w5, w9, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg w9, wzr, w30, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg w1, w28, wzr, mi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg x19, x23, x29, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg xzr, x3, x4, ge +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg x5, xzr, x6, hs +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg x7, x8, xzr, lo +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cset w3, eq +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cset x9, pl +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csetm w20, ne +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csetm x30, ge +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc w2, wzr, wzr, al +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv x3, xzr, xzr, nv +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cinc w3, w5, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cinc wzr, w4, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cset w9, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cinc x3, x5, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cinc xzr, x4, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cset x9, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc w5, w6, w6, nv +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinc x1, x2, x2, al +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cinv w3, w5, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cinv wzr, w4, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csetm w9, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cinv x3, x5, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cinv xzr, x4, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csetm x9, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv x1, x0, x0, al +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv w9, w8, w8, nv +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cneg w3, w5, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cneg wzr, w4, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cneg w9, wzr, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cneg x3, x5, gt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cneg xzr, x4, le +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - cneg x9, xzr, lt +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csneg x4, x8, x8, al +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - csinv w9, w8, w8, nv +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - rbit w0, w7 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - rbit x18, x3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - rev16 w17, w1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - rev16 x5, x2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - rev w18, w0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - rev32 x20, x1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - rev x22, x2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - clz w24, w3 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - clz x26, x4 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - cls w3, w5 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - cls x20, x5 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 - - udiv w0, w7, w10 +# CHECK-NEXT: - - - - 2.00 - - - - 1.00 - - udiv x9, x22, x4 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 - - sdiv w12, w21, w0 +# CHECK-NEXT: - - - - 2.00 - - - - 1.00 - - sdiv x13, x2, x1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl w11, w12, w13 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl x14, x15, x16 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w17, w18, w19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr x20, x21, x22 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w23, w24, w25 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr x26, x27, x28 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ror w0, w1, w2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ror x3, x4, x5 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl w6, w7, w8 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsl x9, x10, x11 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr w12, w13, w14 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - lsr x15, x16, x17 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr w18, w19, w20 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - asr x21, x22, x23 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ror w24, w25, w26 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ror x27, x28, x29 +# CHECK-NEXT: - - - - 1.00 - - - - - - - smulh x30, x29, x28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - smulh xzr, x27, x26 +# CHECK-NEXT: - - - - 1.00 - - - - - - - umulh x30, x29, x28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - umulh x23, x30, xzr +# CHECK-NEXT: - - - - 1.00 - - - - - - - madd w1, w3, w7, w4 +# CHECK-NEXT: - - - - 1.00 - - - - - - - madd wzr, w0, w9, w11 +# CHECK-NEXT: - - - - 1.00 - - - - - - - madd w13, wzr, w4, w4 +# CHECK-NEXT: - - - - 1.00 - - - - - - - madd w19, w30, wzr, w29 +# CHECK-NEXT: - - - - 1.00 - - - - - - - mul w4, w5, w6 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - madd x1, x3, x7, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - madd xzr, x0, x9, x11 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - madd x13, xzr, x4, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - madd x19, x30, xzr, x29 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - mul x4, x5, x6 +# CHECK-NEXT: - - - - 1.00 - - - - - - - msub w1, w3, w7, w4 +# CHECK-NEXT: - - - - 1.00 - - - - - - - msub wzr, w0, w9, w11 +# CHECK-NEXT: - - - - 1.00 - - - - - - - msub w13, wzr, w4, w4 +# CHECK-NEXT: - - - - 1.00 - - - - - - - msub w19, w30, wzr, w29 +# CHECK-NEXT: - - - - 1.00 - - - - - - - mneg w4, w5, w6 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - msub x1, x3, x7, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - msub xzr, x0, x9, x11 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - msub x13, xzr, x4, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - msub x19, x30, xzr, x29 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - mneg x4, x5, x6 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smaddl x3, w5, w2, x9 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smaddl xzr, w10, w11, x12 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smaddl x13, wzr, w14, x15 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smaddl x16, w17, wzr, x18 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smull x19, w20, w21 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smsubl x3, w5, w2, x9 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smsubl xzr, w10, w11, x12 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smsubl x13, wzr, w14, x15 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smsubl x16, w17, wzr, x18 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smnegl x19, w20, w21 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umaddl x3, w5, w2, x9 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umaddl xzr, w10, w11, x12 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umaddl x13, wzr, w14, x15 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umaddl x16, w17, wzr, x18 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umull x19, w20, w21 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umsubl x3, w5, w2, x9 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umsubl x16, w17, wzr, x18 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umnegl x19, w20, w21 +# CHECK-NEXT: - - - - 1.00 - - - - - - - smulh x30, x29, x28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - smulh x23, x22, xzr +# CHECK-NEXT: - - - - 1.00 - - - - - - - umulh x23, x22, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - mul x19, x20, xzr +# CHECK-NEXT: - - - - 1.00 - - - - - - - mneg w21, w22, w23 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smull x11, w13, w17 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umull x11, w13, w17 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - smnegl x11, w13, w17 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 1.00 - - - - - - - umnegl x11, w13, w17 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - extr w3, w5, w7, #0 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - extr w11, w13, w17, #31 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - extr x3, x5, x7, #15 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - extr x11, x13, x17, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ror x19, x23, #24 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ror x29, xzr, #63 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ror w9, w13, #31 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmp s3, s5 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmp s31, #0.0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmp s31, #0.0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmpe s29, s30 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmpe s15, #0.0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmpe s15, #0.0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmp d4, d12 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmp d23, #0.0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmp d23, #0.0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmpe d26, d22 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmpe d29, #0.0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fcmpe d29, #0.0 +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmp s1, s31, #0, eq +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmp s3, s0, #15, hs +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmp s31, s15, #13, hs +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmp d9, d31, #0, le +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmp d3, d0, #15, gt +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmp d31, d5, #7, ne +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmpe s1, s31, #0, eq +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmpe s3, s0, #15, hs +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmpe s31, s15, #13, hs +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmpe d9, d31, #0, le +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmpe d3, d0, #15, gt +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 1.00 - - fccmpe d31, d5, #7, ne +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 0.50 0.50 - fcsel s3, s20, s9, pl +# CHECK-NEXT: 0.50 0.50 - - 1.00 - - - - 0.50 0.50 - fcsel d9, d10, d11, mi +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov s0, s1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabs s2, s3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fneg s4, s5 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fsqrt s6, s7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvt d8, s9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvt h10, s11 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintn s12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintp s14, s15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintm s16, s17 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintz s18, s19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinta s20, s21 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintx s22, s23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinti s24, s25 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov d0, d1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabs d2, d3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fneg d4, d5 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fsqrt d6, d7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvt s8, d9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvt h10, d11 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintn d12, d13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintp d14, d15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintm d16, d17 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintz d18, d19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinta d20, d21 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintx d22, d23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinti d24, d25 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvt s26, h27 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvt d28, h29 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmul s20, s19, s17 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fdiv s1, s2, s3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fadd s4, s5, s6 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fsub s7, s8, s9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmax s10, s11, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmin s13, s14, s15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxnm s16, s17, s18 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminnm s19, s20, s21 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fnmul s22, s23, s2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmul d20, d19, d17 +# CHECK-NEXT: - - - - - - - - - 1.00 - - fdiv d1, d2, d3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fadd d4, d5, d6 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fsub d7, d8, d9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmax d10, d11, d12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmin d13, d14, d15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxnm d16, d17, d18 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminnm d19, d20, d21 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fnmul d22, d23, d24 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmadd s3, s5, s6, s31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmadd d3, d13, d0, d23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmsub s3, s5, s6, s31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmsub d3, d13, d0, d23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fnmadd s3, s5, s6, s31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fnmadd d3, d13, d0, d23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fnmsub s3, s5, s6, s31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fnmsub d3, d13, d0, d23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w3, h5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs wzr, h20, #13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w19, h0, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x3, h5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x12, h30, #45 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x19, h0, #64 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w3, s5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs wzr, s20, #13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w19, s0, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x3, s5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x12, s30, #45 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x19, s0, #64 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w3, d5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs wzr, d20, #13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w19, d0, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x3, d5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x12, d30, #45 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x19, d0, #64 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w3, h5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu wzr, h20, #13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w19, h0, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x3, h5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x12, h30, #45 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x19, h0, #64 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w3, s5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu wzr, s20, #13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w19, s0, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x3, s5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x12, s30, #45 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x19, s0, #64 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w3, d5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu wzr, d20, #13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w19, d0, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x3, d5, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x12, d30, #45 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x19, d0, #64 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h23, w19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h31, wzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h14, w0, #32 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h23, x19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h31, xzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h14, x0, #64 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s23, w19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s31, wzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s14, w0, #32 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s23, x19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s31, xzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s14, x0, #64 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf d23, w19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf d31, wzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf d14, w0, #32 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf d23, x19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf d31, xzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf d14, x0, #64 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf h23, w19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf h31, wzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf h14, w0, #32 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf h23, x19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf h31, xzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf h14, x0, #64 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf s23, w19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf s31, wzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf s14, w0, #32 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf s23, x19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf s31, xzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf s14, x0, #64 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf d23, w19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf d31, wzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf d14, w0, #32 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf d23, x19, #1 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf d31, xzr, #20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf d14, x0, #64 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtns w3, h31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtns xzr, h12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtnu wzr, h12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtnu x0, h0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtps wzr, h9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtps x12, h20 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtpu w30, h23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtpu x29, h3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtms w2, h3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtms x4, h5 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtmu w6, h7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtmu x8, h9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w10, h11 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x12, h13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w14, h15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x15, h16 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h17, w18 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h19, x20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf h21, w22 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf h23, x24 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtas w25, h26 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtas x27, h28 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtau w29, h30 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtau xzr, h0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtns w3, s31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtns xzr, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtnu wzr, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtnu x0, s0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtps wzr, s9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtps x12, s20 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtpu w30, s23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtpu x29, s3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtms w2, s3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtms x4, s5 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtmu w6, s7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtmu x8, s9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w10, s11 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w14, s15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x15, s16 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s17, w18 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s19, x20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf s21, w22 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf s23, x24 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtas w25, s26 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtas x27, s28 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtau w29, s30 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtau xzr, s0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtns w3, d31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtns xzr, d12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtnu wzr, d12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtnu x0, d0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtps wzr, d9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtps x12, d20 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtpu w30, d23 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtpu x29, d3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtms w2, d3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtms x4, d5 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtmu w6, d7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtmu x8, d9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs w10, d11 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzs x12, d13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu w14, d15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtzu x15, d16 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf d17, w18 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - scvtf d19, x20 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf d21, w22 +# CHECK-NEXT: - - - - 1.00 - - - - 1.00 1.00 - ucvtf d23, x24 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtas w25, d26 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtas x27, d28 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtau w29, d30 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 fcvtau xzr, d0 +# CHECK-NEXT: - - - - 1.00 - - - - - - - fmov w3, s9 +# CHECK-NEXT: - - - - - - - - - - - 1.00 fmov s9, w3 +# CHECK-NEXT: - - - - 1.00 - - - - - - - fmov x20, d31 +# CHECK-NEXT: - - - - - - - - - - - 1.00 fmov d1, x15 +# CHECK-NEXT: - - - - 1.00 - - - - 0.50 0.50 - fmov x3, v12.d[1] +# CHECK-NEXT: - - - - - - - - - 1.00 - - fmov v1.d[1], x19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov s2, #0.12500000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov s3, #1.00000000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov d30, #16.00000000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov s4, #1.06250000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov d10, #1.93750000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov s12, #-1.00000000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov d16, #8.50000000 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w3, #0 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x29, #4 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsw xzr, #-4 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr s0, #8 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr d0, #1048572 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q0, #-1048576 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - prfm pldl1strm, #0 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - prfm #22, #0 +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stxrb w18, w8, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stxrh w24, w15, [x16] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stxr w5, w6, [x17] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stxr w1, x10, [x21] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxrb w30, [x0] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxrh w17, [x4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxr w22, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxr x11, [x29] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxr x11, [x29] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxr x11, [x29] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stxp w12, w11, w10, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stxp wzr, x27, x9, [x12] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxp w0, wzr, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxp x17, x0, [x18] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldxp x17, x0, [x18] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stlxrb w12, w22, [x0] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stlxrh w10, w1, [x1] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stlxr w9, w2, [x2] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stlxr w9, x3, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxrb w8, [x4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxrh w7, [x5] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxr w6, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxr x5, [x6] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxr x5, [x6] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxr x5, [x6] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stlxp w4, w5, w6, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 0.50 0.50 - - - stlxp wzr, x6, x7, [x1] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxp w5, w18, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxp x6, x19, [x22] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldaxp x6, x19, [x22] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stlrb w24, [sp] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stlrh w25, [x30] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stlr w26, [x29] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stlr x27, [x28] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stlr x27, [x28] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stlr x27, [x28] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldarb w23, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldarh w22, [x30] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldar wzr, [x29] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldar x21, [x28] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldar x21, [x28] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldar x21, [x28] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - sturb w9, [sp] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - sturh wzr, [x12, #255] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stur w16, [x0, #-256] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stur x28, [x14, #1] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldurb w1, [x20, #255] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldurh w20, [x1, #255] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldur w12, [sp, #255] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldur xzr, [x12, #255] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldursb x9, [x7, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldursh x17, [x19, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldursw x20, [x15, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - prfum pldl2keep, [sp, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldursb w19, [x1, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldursh w15, [x21, #-256] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 stur b0, [sp, #1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 stur h12, [x12, #-1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 stur s15, [x0, #255] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 stur d31, [x5, #25] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 stur q9, [x5] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldur b3, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldur h5, [x4, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldur s7, [x12, #-1] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldur d11, [x19, #4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldur q13, [x1, #2] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strb w9, [x2], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strb w10, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strb w10, [x3], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strh w9, [x2], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strh w9, [x2], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strh w10, [x3], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str w19, [sp], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str w20, [x30], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str w21, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str xzr, [x9], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str x2, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str x19, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrb w9, [x2], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrb w10, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrb w10, [x3], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrh w9, [x2], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrh w9, [x2], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrh w10, [x3], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr w19, [sp], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr w20, [x30], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr w21, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr xzr, [x9], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr x2, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr x19, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb xzr, [x9], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb x2, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb x19, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh xzr, [x9], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh x2, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh x19, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsw xzr, [x9], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsw x2, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsw x19, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb wzr, [x9], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb w2, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb w19, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh wzr, [x9], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh w2, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh w19, [x12], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str b0, [x0], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str b3, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str b5, [sp], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str h10, [x10], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str h13, [x23], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str h15, [sp], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str s20, [x20], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str s23, [x23], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str s25, [x0], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str d20, [x20], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str d23, [x23], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str d25, [x0], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr b0, [x0], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr b3, [x3], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr b5, [sp], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr h10, [x10], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr h13, [x23], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr h15, [sp], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr s20, [x20], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr s23, [x23], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr s25, [x0], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr d20, [x20], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr d23, [x23], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr d25, [x0], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr q20, [x1], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr q23, [x9], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr q25, [x20], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str q10, [x1], #255 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str q22, [sp], #1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str q21, [x20], #-256 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr x3, [x4, #0]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strb w9, [x2, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strb w10, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strb w10, [x3, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strh w9, [x2, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strh w9, [x2, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - strh w10, [x3, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str w19, [sp, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str w20, [x30, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str w21, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str xzr, [x9, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str x2, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str x19, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrb w9, [x2, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrb w10, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrb w10, [x3, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrh w9, [x2, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrh w9, [x2, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrh w10, [x3, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr w19, [sp, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr w20, [x30, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr w21, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr xzr, [x9, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr x2, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr x19, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb xzr, [x9, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb x2, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb x19, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh xzr, [x9, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh x2, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh x19, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsw xzr, [x9, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsw x2, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsw x19, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb wzr, [x9, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb w2, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsb w19, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh wzr, [x9, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh w2, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldrsh w19, [x12, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str b0, [x0, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str b3, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str b5, [sp, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str h10, [x10, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str h13, [x23, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str h15, [sp, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str s20, [x20, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str s23, [x23, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str s25, [x0, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str d20, [x20, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str d23, [x23, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str d25, [x0, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr b0, [x0, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr b3, [x3, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr b5, [sp, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr h10, [x10, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr h13, [x23, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr h15, [sp, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr s20, [x20, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr s23, [x23, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr s25, [x0, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr d20, [x20, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr d23, [x23, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr d25, [x0, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr q20, [x1, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr q23, [x9, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - 0.50 0.50 - - - - - ldr q25, [x20, #-256]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str q10, [x1, #255]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str q22, [sp, #1]! +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - str q21, [x20, #-256]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - sttrb w9, [sp] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - sttrh wzr, [x12, #255] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - sttr w16, [x0, #-256] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - sttr x28, [x14, #1] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtrb w1, [x20, #255] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtrh w20, [x1, #255] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtr w12, [sp, #255] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtr xzr, [x12, #255] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtrsb x9, [x7, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtrsh x17, [x19, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtrsw x20, [x15, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtrsb w19, [x1, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldtrsh w15, [x21, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x4, [x29] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x30, [x12, #32760] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x20, [sp, #8] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr xzr, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w2, [sp] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w17, [sp, #16380] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w13, [x2, #4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsw x2, [x5, #4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsw x23, [sp, #16380] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrh w2, [x4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsh w23, [x6, #8190] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsh wzr, [sp, #2] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsh x29, [x2, #2] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrb w26, [x3, #121] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrb w12, [x2] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsb w27, [sp, #4095] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsb xzr, [x15] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - str x30, [sp] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - str w20, [x4, #16380] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - strh w17, [sp, #8190] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - strb w23, [x3, #4095] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - strb wzr, [x2] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr b31, [sp, #4095] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr h20, [x2, #8190] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr s10, [x19, #16380] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr d3, [x10, #32760] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 str q12, [sp, #65520] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrb w3, [sp, x5] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrb w9, [x27, x6] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrsb w10, [x30, x7] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrb w11, [x29, x3, sxtx] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - strb w12, [x28, xzr, sxtx] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrb w14, [x26, w6, uxtw] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrsb w15, [x25, w7, uxtw] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrb w17, [x23, w9, sxtw] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrsb x18, [x22, w10, sxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsh w3, [sp, x5] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsh w9, [x27, x6] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrh w10, [x30, x7, lsl #1] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - strh w11, [x29, x3, sxtx] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrh w12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsh x13, [x27, x5, sxtx #1] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrh w14, [x26, w6, uxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrh w15, [x25, w7, uxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrsh w16, [x24, w8, uxtw #1] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrh w17, [x23, w9, sxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldrh w18, [x22, w10, sxtw] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - strh w19, [x21, wzr, sxtw #1] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w3, [sp, x5] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr s9, [x27, x6] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w10, [x30, x7, lsl #2] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w11, [x29, x3, sxtx] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 str s12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - str w13, [x27, x5, sxtx #2] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - str w14, [x26, w6, uxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w15, [x25, w7, uxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w16, [x24, w8, uxtw #2] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrsw x17, [x23, w9, sxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr w18, [x22, w10, sxtw] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - ldrsw x19, [x21, wzr, sxtw #2] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x3, [sp, x5] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - str x9, [x27, x6] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr d10, [x30, x7, lsl #3] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - str x11, [x29, x3, sxtx] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x13, [x27, x5, sxtx #3] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - prfm pldl1keep, [x26, w6, uxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x15, [x25, w7, uxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x16, [x24, w8, uxtw #3] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x17, [x23, w9, sxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr x18, [x22, w10, sxtw] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 str d19, [x21, wzr, sxtw #3] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q3, [sp, x5] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q9, [x27, x6] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q10, [x30, x7, lsl #4] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 str q11, [x29, x3, sxtx] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 str q12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 str q13, [x27, x5, sxtx #4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q14, [x26, w6, uxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q15, [x25, w7, uxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q16, [x24, w8, uxtw #4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q17, [x23, w9, sxtw] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 str q18, [x22, w10, sxtw] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldr q19, [x21, wzr, sxtw #4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w3, w5, [sp] +# CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - - - stp wzr, w9, [sp, #252] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w2, wzr, [sp, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w9, w10, [sp, #4] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x9, x10, [sp, #4] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x9, x10, [x2, #-256] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x20, x30, [sp, #252] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x21, x29, [x2, #504] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x22, x23, [x3, #-512] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x24, x25, [x4, #8] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp s29, s28, [sp, #252] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp s27, s26, [sp, #-256] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp s1, s2, [x3, #44] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp d3, d5, [x9, #504] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp d7, d11, [x10, #-512] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp d2, d3, [x30, #-8] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp q3, q5, [sp] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp q17, q19, [sp, #1008] +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - ldp q23, q29, [x1, #-1024] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w3, w5, [sp], #0 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - 0.50 0.50 - - - stp wzr, w9, [sp], #252 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w2, wzr, [sp], #-256 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w9, w10, [sp], #4 +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x9, x10, [sp], #4 +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x9, x10, [x2], #-256 +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x20, x30, [sp], #252 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x21, x29, [x2], #504 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x22, x23, [x3], #-512 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x24, x25, [x4], #8 +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp s29, s28, [sp], #252 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp s27, s26, [sp], #-256 +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp s1, s2, [x3], #44 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp d3, d5, [x9], #504 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp d7, d11, [x10], #-512 +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp d2, d3, [x30], #-8 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp q3, q5, [sp], #0 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp q17, q19, [sp], #1008 +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - ldp q23, q29, [x1], #-1024 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w3, w5, [sp, #0]! +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - 0.50 0.50 - - - stp wzr, w9, [sp, #252]! +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w2, wzr, [sp, #-256]! +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp w9, w10, [sp, #4]! +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x9, x10, [sp, #4]! +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x9, x10, [x2, #-256]! +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldpsw x20, x30, [sp, #252]! +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x21, x29, [x2, #504]! +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x22, x23, [x3, #-512]! +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldp x24, x25, [x4, #8]! +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp s29, s28, [sp, #252]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp s27, s26, [sp, #-256]! +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp s1, s2, [x3, #44]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp d3, d5, [x9, #504]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp d7, d11, [x10, #-512]! +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldp d2, d3, [x30, #-8]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp q3, q5, [sp, #0]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stp q17, q19, [sp, #1008]! +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - ldp q23, q29, [x1, #-1024]! +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldnp w3, w5, [sp] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stnp wzr, w9, [sp, #252] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldnp w2, wzr, [sp, #-256] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldnp w9, w10, [sp, #4] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldnp x21, x29, [x2, #504] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldnp x22, x23, [x3, #-512] +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ldnp x24, x25, [x4, #8] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldnp s29, s28, [sp, #252] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stnp s27, s26, [sp, #-256] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldnp s1, s2, [x3, #44] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stnp d3, d5, [x9, #504] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stnp d7, d11, [x10, #-512] +# CHECK-NEXT: - - - - 1.00 0.50 0.50 - - - - - ldnp d2, d3, [x30, #-8] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stnp q3, q5, [sp] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 stnp q17, q19, [sp, #1008] +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - ldnp q23, q29, [x1, #-1024] +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov w3, #983055 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov x10, #-6148914691236517206 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - and w12, w23, w21 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - and w16, w15, w1, lsl #1 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - and w9, w4, w10, lsl #31 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - and w3, w30, w11 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - and x3, x5, x7, lsl #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - and x5, x14, x19, asr #4 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - and w3, w17, w19, ror #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - and w0, w2, wzr, lsr #17 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - and w3, w30, w11, asr #2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - and xzr, x4, x26 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - and w3, wzr, w20, ror #2 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - and x7, x20, xzr, asr #63 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - bic x13, x20, x14, lsl #47 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - bic w2, w7, w9 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - orr w2, w7, w0, asr #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - orr x8, x9, x10, lsl #12 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - orn x3, x5, x7, asr #2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - orn w2, w5, w29 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - ands w7, wzr, w9, lsl #1 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - ands x3, x5, x20, ror #63 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - bics w3, w5, w7 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - bics x3, xzr, x3, lsl #1 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - tst w3, w7, lsl #31 +# CHECK-NEXT: 0.25 0.25 0.75 0.75 - - - - - - - - tst x2, x20, asr #2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov x3, x6 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov x3, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov wzr, w2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov w3, w5 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - movz w2, #0, lsl #16 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov w2, #-1235 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov x2, #5299989643264 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - mov x2, #0 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - movk w3, #0 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - movz x4, #0, lsl #16 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - movk w5, #0, lsl #16 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - movz x6, #0, lsl #32 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - movk x7, #0, lsl #32 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - movz x8, #0, lsl #48 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - movk x9, #0, lsl #48 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adr x2, #1600 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adrp x21, #6553600 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - adr x0, #262144 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - tbz x12, #62, #0 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - tbz x12, #62, #4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - tbz x12, #62, #-32768 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - tbnz x12, #60, #32764 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - b #4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - b #-4 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - b #134217724 +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - br x20 +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - blr xzr +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ret x10 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - ret +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - eret +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - drps diff --git a/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/cssc-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/cssc-instructions.s new file mode 100644 index 00000000000000..a19a106f4b47ec --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/cssc-instructions.s @@ -0,0 +1,76 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s + +abs w1, w2 +abs x2, x3 +cnt w3, w4 +cnt x4, x5 +ctz w5, w6 +ctz x6, x7 +smax w7, w8, w9 +smax x8, x9, x10 +umax w9, w10, w11 +umax x10, x11, x12 +smin w11, w12, w13 +smin w12, w13, w14 +umin w13, w14, w15 +umin x14, x15, x16 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.25 abs w1, w2 +# CHECK-NEXT: 1 1 0.25 abs x2, x3 +# CHECK-NEXT: 1 3 1.00 cnt w3, w4 +# CHECK-NEXT: 1 3 1.00 cnt x4, x5 +# CHECK-NEXT: 1 1 0.50 ctz w5, w6 +# CHECK-NEXT: 1 1 0.50 ctz x6, x7 +# CHECK-NEXT: 2 1 0.50 smax w7, w8, w9 +# CHECK-NEXT: 2 1 0.50 smax x8, x9, x10 +# CHECK-NEXT: 2 1 0.50 umax w9, w10, w11 +# CHECK-NEXT: 2 1 0.50 umax x10, x11, x12 +# CHECK-NEXT: 2 1 0.50 smin w11, w12, w13 +# CHECK-NEXT: 2 1 0.50 smin w12, w13, w14 +# CHECK-NEXT: 2 1 0.50 umin w13, w14, w15 +# CHECK-NEXT: 2 1 0.50 umin x14, x15, x16 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - Ampere1BUnitA +# CHECK-NEXT: [0.1] - Ampere1BUnitA +# CHECK-NEXT: [1.0] - Ampere1BUnitB +# CHECK-NEXT: [1.1] - Ampere1BUnitB +# CHECK-NEXT: [2] - Ampere1BUnitBS +# CHECK-NEXT: [3.0] - Ampere1BUnitL +# CHECK-NEXT: [3.1] - Ampere1BUnitL +# CHECK-NEXT: [4.0] - Ampere1BUnitS +# CHECK-NEXT: [4.1] - Ampere1BUnitS +# CHECK-NEXT: [5] - Ampere1BUnitX +# CHECK-NEXT: [6] - Ampere1BUnitY +# CHECK-NEXT: [7] - Ampere1BUnitZ + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] +# CHECK-NEXT: 6.50 6.50 3.50 3.50 2.00 - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions: +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - abs w1, w2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - abs x2, x3 +# CHECK-NEXT: - - - - 1.00 - - - - - - - cnt w3, w4 +# CHECK-NEXT: - - - - 1.00 - - - - - - - cnt x4, x5 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ctz w5, w6 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ctz x6, x7 +# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smax w7, w8, w9 +# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smax x8, x9, x10 +# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umax w9, w10, w11 +# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umax x10, x11, x12 +# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smin w11, w12, w13 +# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smin w12, w13, w14 +# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umin w13, w14, w15 +# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umin x14, x15, x16 diff --git a/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/mte-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/mte-instructions.s new file mode 100644 index 00000000000000..5148522431edbf --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/mte-instructions.s @@ -0,0 +1,349 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s + +irg x0, x1 +irg sp, x1 +irg x0, sp +irg x0, x1, x2 +irg sp, x1, x2 +addg x0, x1, #0, #1 +addg sp, x2, #32, #3 +addg x0, sp, #64, #5 +addg x3, x4, #1008, #6 +addg x5, x6, #112, #15 +subg x0, x1, #0, #1 +subg sp, x2, #32, #3 +subg x0, sp, #64, #5 +subg x3, x4, #1008, #6 +subg x5, x6, #112, #15 +gmi x0, x1, x2 +gmi x3, sp, x4 +gmi xzr, x0, x30 +gmi x30, x0, xzr +subp x0, x1, x2 +subps x0, x1, x2 +subp x0, sp, sp +subps x0, sp, sp +subps xzr, x0, x1 +subps xzr, sp, sp +stg x0, [x1, #-4096] +stg x1, [x2, #4080] +stg x2, [sp, #16] +stg x3, [x1] +stg sp, [x1] +stzg x0, [x1, #-4096] +stzg x1, [x2, #4080] +stzg x2, [sp, #16] +stzg x3, [x1] +stzg sp, [x1] +stg x0, [x1, #-4096]! +stg x1, [x2, #4080]! +stg x2, [sp, #16]! +stg sp, [sp, #16]! +stzg x0, [x1, #-4096]! +stzg x1, [x2, #4080]! +stzg x2, [sp, #16]! +stzg sp, [sp, #16]! +stg x0, [x1], #-4096 +stg x1, [x2], #4080 +stg x2, [sp], #16 +stg sp, [sp], #16 +stzg x0, [x1], #-4096 +stzg x1, [x2], #4080 +stzg x2, [sp], #16 +stzg sp, [sp], #16 +st2g x0, [x1, #-4096] +st2g x1, [x2, #4080] +st2g x2, [sp, #16] +st2g x3, [x1] +st2g sp, [x1] +stz2g x0, [x1, #-4096] +stz2g x1, [x2, #4080] +stz2g x2, [sp, #16] +stz2g x3, [x1] +stz2g sp, [x1] +st2g x0, [x1, #-4096]! +st2g x1, [x2, #4080]! +st2g x2, [sp, #16]! +st2g sp, [sp, #16]! +stz2g x0, [x1, #-4096]! +stz2g x1, [x2, #4080]! +stz2g x2, [sp, #16]! +stz2g sp, [sp, #16]! +st2g x0, [x1], #-4096 +st2g x1, [x2], #4080 +st2g x2, [sp], #16 +st2g sp, [sp], #16 +stz2g x0, [x1], #-4096 +stz2g x1, [x2], #4080 +stz2g x2, [sp], #16 +stz2g sp, [sp], #16 +stgp x0, x1, [x2, #-1024] +stgp x0, x1, [x2, #1008] +stgp x0, x1, [sp, #16] +stgp xzr, x1, [x2, #16] +stgp x0, xzr, [x2, #16] +stgp x0, xzr, [x2] +stgp x0, x1, [x2, #-1024]! +stgp x0, x1, [x2, #1008]! +stgp x0, x1, [sp, #16]! +stgp xzr, x1, [x2, #16]! +stgp x0, xzr, [x2, #16]! +stgp x0, x1, [x2], #-1024 +stgp x0, x1, [x2], #1008 +stgp x0, x1, [sp], #16 +stgp xzr, x1, [x2], #16 +stgp x0, xzr, [x2], #16 +ldg x0, [x1] +ldg x2, [sp, #-4096] +ldg x3, [x4, #4080] +ldgm x0, [x1] +ldgm x1, [sp] +ldgm xzr, [x2] +stgm x0, [x1] +stgm x1, [sp] +stgm xzr, [x2] +stzgm x0, [x1] +stzgm x1, [sp] +stzgm xzr, [x2] + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 1 1.00 U irg x0, x1 +# CHECK-NEXT: 2 1 1.00 U irg sp, x1 +# CHECK-NEXT: 2 1 1.00 U irg x0, sp +# CHECK-NEXT: 2 1 1.00 U irg x0, x1, x2 +# CHECK-NEXT: 2 1 1.00 U irg sp, x1, x2 +# CHECK-NEXT: 1 1 0.50 addg x0, x1, #0, #1 +# CHECK-NEXT: 1 1 0.50 addg sp, x2, #32, #3 +# CHECK-NEXT: 1 1 0.50 addg x0, sp, #64, #5 +# CHECK-NEXT: 1 1 0.50 addg x3, x4, #1008, #6 +# CHECK-NEXT: 1 1 0.50 addg x5, x6, #112, #15 +# CHECK-NEXT: 1 1 0.50 U subg x0, x1, #0, #1 +# CHECK-NEXT: 1 1 0.50 U subg sp, x2, #32, #3 +# CHECK-NEXT: 1 1 0.50 U subg x0, sp, #64, #5 +# CHECK-NEXT: 1 1 0.50 U subg x3, x4, #1008, #6 +# CHECK-NEXT: 1 1 0.50 U subg x5, x6, #112, #15 +# CHECK-NEXT: 1 1 0.25 gmi x0, x1, x2 +# CHECK-NEXT: 1 1 0.25 gmi x3, sp, x4 +# CHECK-NEXT: 1 1 0.25 gmi xzr, x0, x30 +# CHECK-NEXT: 1 1 0.25 gmi x30, x0, xzr +# CHECK-NEXT: 1 1 0.25 subp x0, x1, x2 +# CHECK-NEXT: 1 1 0.25 U subps x0, x1, x2 +# CHECK-NEXT: 1 1 0.25 subp x0, sp, sp +# CHECK-NEXT: 1 1 0.25 U subps x0, sp, sp +# CHECK-NEXT: 1 1 0.25 U subps xzr, x0, x1 +# CHECK-NEXT: 1 1 0.25 U subps xzr, sp, sp +# CHECK-NEXT: 1 1 0.50 * stg x0, [x1, #-4096] +# CHECK-NEXT: 1 1 0.50 * stg x1, [x2, #4080] +# CHECK-NEXT: 1 1 0.50 * stg x2, [sp, #16] +# CHECK-NEXT: 1 1 0.50 * stg x3, [x1] +# CHECK-NEXT: 1 1 0.50 * stg sp, [x1] +# CHECK-NEXT: 1 1 0.50 * stzg x0, [x1, #-4096] +# CHECK-NEXT: 1 1 0.50 * stzg x1, [x2, #4080] +# CHECK-NEXT: 1 1 0.50 * stzg x2, [sp, #16] +# CHECK-NEXT: 1 1 0.50 * stzg x3, [x1] +# CHECK-NEXT: 1 1 0.50 * stzg sp, [x1] +# CHECK-NEXT: 1 1 0.50 * U stg x0, [x1, #-4096]! +# CHECK-NEXT: 1 1 0.50 * U stg x1, [x2, #4080]! +# CHECK-NEXT: 1 1 0.50 * U stg x2, [sp, #16]! +# CHECK-NEXT: 1 1 0.50 * U stg sp, [sp, #16]! +# CHECK-NEXT: 1 1 0.50 * U stzg x0, [x1, #-4096]! +# CHECK-NEXT: 1 1 0.50 * U stzg x1, [x2, #4080]! +# CHECK-NEXT: 1 1 0.50 * U stzg x2, [sp, #16]! +# CHECK-NEXT: 1 1 0.50 * U stzg sp, [sp, #16]! +# CHECK-NEXT: 1 1 0.50 * U stg x0, [x1], #-4096 +# CHECK-NEXT: 1 1 0.50 * U stg x1, [x2], #4080 +# CHECK-NEXT: 1 1 0.50 * U stg x2, [sp], #16 +# CHECK-NEXT: 1 1 0.50 * U stg sp, [sp], #16 +# CHECK-NEXT: 1 1 0.50 * U stzg x0, [x1], #-4096 +# CHECK-NEXT: 1 1 0.50 * U stzg x1, [x2], #4080 +# CHECK-NEXT: 1 1 0.50 * U stzg x2, [sp], #16 +# CHECK-NEXT: 1 1 0.50 * U stzg sp, [sp], #16 +# CHECK-NEXT: 2 1 1.00 * st2g x0, [x1, #-4096] +# CHECK-NEXT: 2 1 1.00 * st2g x1, [x2, #4080] +# CHECK-NEXT: 2 1 1.00 * st2g x2, [sp, #16] +# CHECK-NEXT: 2 1 1.00 * st2g x3, [x1] +# CHECK-NEXT: 2 1 1.00 * st2g sp, [x1] +# CHECK-NEXT: 2 1 1.00 * stz2g x0, [x1, #-4096] +# CHECK-NEXT: 2 1 1.00 * stz2g x1, [x2, #4080] +# CHECK-NEXT: 2 1 1.00 * stz2g x2, [sp, #16] +# CHECK-NEXT: 2 1 1.00 * stz2g x3, [x1] +# CHECK-NEXT: 2 1 1.00 * stz2g sp, [x1] +# CHECK-NEXT: 2 1 1.00 * U st2g x0, [x1, #-4096]! +# CHECK-NEXT: 2 1 1.00 * U st2g x1, [x2, #4080]! +# CHECK-NEXT: 2 1 1.00 * U st2g x2, [sp, #16]! +# CHECK-NEXT: 2 1 1.00 * U st2g sp, [sp, #16]! +# CHECK-NEXT: 2 1 1.00 * U stz2g x0, [x1, #-4096]! +# CHECK-NEXT: 2 1 1.00 * U stz2g x1, [x2, #4080]! +# CHECK-NEXT: 2 1 1.00 * U stz2g x2, [sp, #16]! +# CHECK-NEXT: 2 1 1.00 * U stz2g sp, [sp, #16]! +# CHECK-NEXT: 2 1 1.00 * U st2g x0, [x1], #-4096 +# CHECK-NEXT: 2 1 1.00 * U st2g x1, [x2], #4080 +# CHECK-NEXT: 2 1 1.00 * U st2g x2, [sp], #16 +# CHECK-NEXT: 2 1 1.00 * U st2g sp, [sp], #16 +# CHECK-NEXT: 2 1 1.00 * U stz2g x0, [x1], #-4096 +# CHECK-NEXT: 2 1 1.00 * U stz2g x1, [x2], #4080 +# CHECK-NEXT: 2 1 1.00 * U stz2g x2, [sp], #16 +# CHECK-NEXT: 2 1 1.00 * U stz2g sp, [sp], #16 +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #-1024] +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #1008] +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp, #16] +# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2, #16] +# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2, #16] +# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2] +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #-1024]! +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2, #1008]! +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp, #16]! +# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2, #16]! +# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2, #16]! +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2], #-1024 +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [x2], #1008 +# CHECK-NEXT: 2 1 1.00 * stgp x0, x1, [sp], #16 +# CHECK-NEXT: 2 1 1.00 * stgp xzr, x1, [x2], #16 +# CHECK-NEXT: 2 1 1.00 * stgp x0, xzr, [x2], #16 +# CHECK-NEXT: 2 4 0.50 * ldg x0, [x1] +# CHECK-NEXT: 2 4 0.50 * ldg x2, [sp, #-4096] +# CHECK-NEXT: 2 4 0.50 * ldg x3, [x4, #4080] +# CHECK-NEXT: 2 4 0.50 * U ldgm x0, [x1] +# CHECK-NEXT: 2 4 0.50 * U ldgm x1, [sp] +# CHECK-NEXT: 2 4 0.50 * U ldgm xzr, [x2] +# CHECK-NEXT: 1 1 0.50 U stgm x0, [x1] +# CHECK-NEXT: 1 1 0.50 U stgm x1, [sp] +# CHECK-NEXT: 1 1 0.50 U stgm xzr, [x2] +# CHECK-NEXT: 1 1 0.50 U stzgm x0, [x1] +# CHECK-NEXT: 1 1 0.50 U stzgm x1, [sp] +# CHECK-NEXT: 1 1 0.50 U stzgm xzr, [x2] + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - Ampere1BUnitA +# CHECK-NEXT: [0.1] - Ampere1BUnitA +# CHECK-NEXT: [1.0] - Ampere1BUnitB +# CHECK-NEXT: [1.1] - Ampere1BUnitB +# CHECK-NEXT: [2] - Ampere1BUnitBS +# CHECK-NEXT: [3.0] - Ampere1BUnitL +# CHECK-NEXT: [3.1] - Ampere1BUnitL +# CHECK-NEXT: [4.0] - Ampere1BUnitS +# CHECK-NEXT: [4.1] - Ampere1BUnitS +# CHECK-NEXT: [5] - Ampere1BUnitX +# CHECK-NEXT: [6] - Ampere1BUnitY +# CHECK-NEXT: [7] - Ampere1BUnitZ + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] +# CHECK-NEXT: 2.50 2.50 13.00 13.00 5.00 3.00 3.00 58.00 58.00 - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions: +# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, x1 +# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg sp, x1 +# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, sp +# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg x0, x1, x2 +# CHECK-NEXT: - - 0.50 0.50 1.00 - - - - - - - irg sp, x1, x2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x0, x1, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg sp, x2, #32, #3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x0, sp, #64, #5 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x3, x4, #1008, #6 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - addg x5, x6, #112, #15 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x0, x1, #0, #1 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg sp, x2, #32, #3 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x0, sp, #64, #5 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x3, x4, #1008, #6 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - subg x5, x6, #112, #15 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x0, x1, x2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x3, sp, x4 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi xzr, x0, x30 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - gmi x30, x0, xzr +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subp x0, x1, x2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps x0, x1, x2 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subp x0, sp, sp +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps x0, sp, sp +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps xzr, x0, x1 +# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - subps xzr, sp, sp +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1, #-4096] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2, #4080] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp, #16] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x3, [x1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [x1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1, #-4096] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2, #4080] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp, #16] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x3, [x1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [x1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1, #-4096]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2, #4080]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp, #16]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [sp, #16]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1, #-4096]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2, #4080]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp, #16]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [sp, #16]! +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x0, [x1], #-4096 +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x1, [x2], #4080 +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg x2, [sp], #16 +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stg sp, [sp], #16 +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x0, [x1], #-4096 +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x1, [x2], #4080 +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg x2, [sp], #16 +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzg sp, [sp], #16 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1, #-4096] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2, #4080] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp, #16] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x3, [x1] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [x1] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1, #-4096] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2, #4080] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp, #16] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x3, [x1] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [x1] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1, #-4096]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2, #4080]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp, #16]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [sp, #16]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1, #-4096]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2, #4080]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp, #16]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [sp, #16]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x0, [x1], #-4096 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x1, [x2], #4080 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g x2, [sp], #16 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - st2g sp, [sp], #16 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x0, [x1], #-4096 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x1, [x2], #4080 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g x2, [sp], #16 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stz2g sp, [sp], #16 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #-1024] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #1008] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp, #16] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2, #16] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2, #16] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #-1024]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2, #1008]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp, #16]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2, #16]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2, #16]! +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2], #-1024 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [x2], #1008 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, x1, [sp], #16 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp xzr, x1, [x2], #16 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - stgp x0, xzr, [x2], #16 +# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x0, [x1] +# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x2, [sp, #-4096] +# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldg x3, [x4, #4080] +# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm x0, [x1] +# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm x1, [sp] +# CHECK-NEXT: - - 0.50 0.50 - 0.50 0.50 - - - - - ldgm xzr, [x2] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm x0, [x1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm x1, [sp] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stgm xzr, [x2] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm x0, [x1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm x1, [sp] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - stzgm xzr, [x2] diff --git a/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/neon-instructions.s new file mode 100644 index 00000000000000..827c13a24763de --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/neon-instructions.s @@ -0,0 +1,3235 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s + +abs d29, d24 +abs v0.16b, v0.16b +abs v0.2d, v0.2d +abs v0.2s, v0.2s +abs v0.4h, v0.4h +abs v0.4s, v0.4s +abs v0.8b, v0.8b +abs v0.8h, v0.8h +add d17, d31, d29 +add v0.8b, v0.8b, v0.8b +addhn v0.2s, v0.2d, v0.2d +addhn v0.4h, v0.4s, v0.4s +addhn v0.8b, v0.8h, v0.8h +addhn2 v0.16b, v0.8h, v0.8h +addhn2 v0.4s, v0.2d, v0.2d +addhn2 v0.8h, v0.4s, v0.4s +addp v0.2d, v0.2d, v0.2d +addp v0.8b, v0.8b, v0.8b +and v0.8b, v0.8b, v0.8b +bic v0.4h, #15, lsl #8 +bic v0.8b, v0.8b, v0.8b +bif v0.16b, v0.16b, v0.16b +bit v0.16b, v0.16b, v0.16b +bsl v0.8b, v0.8b, v0.8b +cls v0.16b, v0.16b +cls v0.2s, v0.2s +cls v0.4h, v0.4h +cls v0.4s, v0.4s +cls v0.8b, v0.8b +cls v0.8h, v0.8h +clz v0.16b, v0.16b +clz v0.2s, v0.2s +clz v0.4h, v0.4h +clz v0.4s, v0.4s +clz v0.8b, v0.8b +clz v0.8h, v0.8h +cmeq d20, d21, 0 +cmeq d20, d21, d22 +cmeq v0.16b, v0.16b, 0 +cmeq v0.16b, v0.16b, v0.16b +cmge d20, d21, 0 +cmge d20, d21, d22 +cmge v0.4h, v0.4h, v0.4h +cmge v0.8b, v0.8b, 0 +cmgt d20, d21, 0 +cmgt d20, d21, d22 +cmgt v0.2s, v0.2s, 0 +cmgt v0.4s, v0.4s, v0.4s +cmhi d20, d21, d22 +cmhi v0.8h, v0.8h, v0.8h +cmhs d20, d21, d22 +cmhs v0.8b, v0.8b, v0.8b +cmle d20, d21, 0 +cmle v0.2d, v0.2d, 0 +cmlt d20, d21, 0 +cmlt v0.8h, v0.8h, 0 +cmtst d20, d21, d22 +cmtst v0.2s, v0.2s, v0.2s +cnt v0.16b, v0.16b +cnt v0.8b, v0.8b +dup v0.16b,w28 +dup v0.2d,x28 +dup v0.2s,w28 +dup v0.4h,w28 +dup v0.4s,w28 +dup v0.8b,w28 +dup v0.8h,w28 +eor v0.16b, v0.16b, v0.16b +ext v0.16b, v0.16b, v0.16b, #3 +ext v0.8b, v0.8b, v0.8b, #3 +fabd d29, d24, d20 +fabd s29, s24, s20 +fabd v0.4s, v0.4s, v0.4s +fabs v0.2d, v0.2d +fabs v0.2s, v0.2s +fabs v0.4h, v0.4h +fabs v0.4s, v0.4s +fabs v0.8h, v0.8h +facge d20, d21, d22 +facge s10, s11, s12 +facge v0.4s, v0.4s, v0.4s +facgt d20, d21, d22 +facgt s10, s11, s12 +facgt v0.2d, v0.2d, v0.2d +fadd v0.4s, v0.4s, v0.4s +faddp v0.2s, v0.2s, v0.2s +faddp v0.4s, v0.4s, v0.4s +fcmeq d20, d21, #0.0 +fcmeq d20, d21, d22 +fcmeq s10, s11, #0.0 +fcmeq s10, s11, s12 +fcmeq v0.2s, v0.2s, #0.0 +fcmeq v0.2s, v0.2s, v0.2s +fcmge d20, d21, #0.0 +fcmge d20, d21, d22 +fcmge s10, s11, #0.0 +fcmge s10, s11, s12 +fcmge v0.2d, v0.2d, #0.0 +fcmge v0.4s, v0.4s, v0.4s +fcmgt d20, d21, #0.0 +fcmgt d20, d21, d22 +fcmgt s10, s11, #0.0 +fcmgt s10, s11, s12 +fcmgt v0.4s, v0.4s, #0.0 +fcmgt v0.4s, v0.4s, v0.4s +fcmle d20, d21, #0.0 +fcmle s10, s11, #0.0 +fcmle v0.2d, v0.2d, #0.0 +fcmlt d20, d21, #0.0 +fcmlt s10, s11, #0.0 +fcmlt v0.4s, v0.4s, #0.0 +fcvtas d21, d14 +fcvtas s12, s13 +fcvtas v0.2d, v0.2d +fcvtas v0.2s, v0.2s +fcvtas v0.4h, v0.4h +fcvtas v0.4s, v0.4s +fcvtas v0.8h, v0.8h +fcvtau d21, d14 +fcvtau s12, s13 +fcvtau v0.2d, v0.2d +fcvtau v0.2s, v0.2s +fcvtau v0.4h, v0.4h +fcvtau v0.4s, v0.4s +fcvtau v0.8h, v0.8h +fcvtl v0.2d, v0.2s +fcvtl v0.4s, v0.4h +fcvtl2 v0.2d, v0.4s +fcvtl2 v0.4s, v0.8h +fcvtms d21, d14 +fcvtms s22, s13 +fcvtms v0.2d, v0.2d +fcvtms v0.2s, v0.2s +fcvtms v0.4h, v0.4h +fcvtms v0.4s, v0.4s +fcvtms v0.8h, v0.8h +fcvtmu d21, d14 +fcvtmu s12, s13 +fcvtmu v0.2d, v0.2d +fcvtmu v0.2s, v0.2s +fcvtmu v0.4h, v0.4h +fcvtmu v0.4s, v0.4s +fcvtmu v0.8h, v0.8h +fcvtn v0.2s, v0.2d +fcvtn v0.4h, v0.4s +fcvtn2 v0.4s, v0.2d +fcvtn2 v0.8h, v0.4s +fcvtns d21, d14 +fcvtns s22, s13 +fcvtns v0.2d, v0.2d +fcvtns v0.2s, v0.2s +fcvtns v0.4h, v0.4h +fcvtns v0.4s, v0.4s +fcvtns v0.8h, v0.8h +fcvtnu d21, d14 +fcvtnu s12, s13 +fcvtnu v0.2d, v0.2d +fcvtnu v0.2s, v0.2s +fcvtnu v0.4h, v0.4h +fcvtnu v0.4s, v0.4s +fcvtnu v0.8h, v0.8h +fcvtps d21, d14 +fcvtps s22, s13 +fcvtps v0.2d, v0.2d +fcvtps v0.2s, v0.2s +fcvtps v0.4h, v0.4h +fcvtps v0.4s, v0.4s +fcvtps v0.8h, v0.8h +fcvtpu d21, d14 +fcvtpu s12, s13 +fcvtpu v0.2d, v0.2d +fcvtpu v0.2s, v0.2s +fcvtpu v0.4h, v0.4h +fcvtpu v0.4s, v0.4s +fcvtpu v0.8h, v0.8h +fcvtxn s22, d13 +fcvtxn v0.2s, v0.2d +fcvtxn2 v0.4s, v0.2d +fcvtzs d21, d12, #1 +fcvtzs d21, d14 +fcvtzs s12, s13 +fcvtzs s21, s12, #1 +fcvtzs v0.2d, v0.2d +fcvtzs v0.2d, v0.2d, #3 +fcvtzs v0.2s, v0.2s +fcvtzs v0.2s, v0.2s, #3 +fcvtzs v0.4h, v0.4h +fcvtzs v0.4s, v0.4s +fcvtzs v0.4s, v0.4s, #3 +fcvtzs v0.8h, v0.8h +fcvtzu d21, d12, #1 +fcvtzu d21, d14 +fcvtzu s12, s13 +fcvtzu s21, s12, #1 +fcvtzu v0.2d, v0.2d +fcvtzu v0.2d, v0.2d, #3 +fcvtzu v0.2s, v0.2s +fcvtzu v0.2s, v0.2s, #3 +fcvtzu v0.4h, v0.4h +fcvtzu v0.4s, v0.4s +fcvtzu v0.4s, v0.4s, #3 +fcvtzu v0.8h, v0.8h +fdiv v0.2s, v0.2s, v0.2s +fmax v0.2d, v0.2d, v0.2d +fmax v0.2s, v0.2s, v0.2s +fmax v0.4s, v0.4s, v0.4s +fmaxnm v0.2d, v0.2d, v0.2d +fmaxnm v0.2s, v0.2s, v0.2s +fmaxnm v0.4s, v0.4s, v0.4s +fmaxnmp v0.2d, v0.2d, v0.2d +fmaxnmp v0.2s, v0.2s, v0.2s +fmaxnmp v0.4s, v0.4s, v0.4s +fmaxp v0.2d, v0.2d, v0.2d +fmaxp v0.2s, v0.2s, v0.2s +fmaxp v0.4s, v0.4s, v0.4s +fmin v0.2d, v0.2d, v0.2d +fmin v0.2s, v0.2s, v0.2s +fmin v0.4s, v0.4s, v0.4s +fminnm v0.2d, v0.2d, v0.2d +fminnm v0.2s, v0.2s, v0.2s +fminnm v0.4s, v0.4s, v0.4s +fminnmp v0.2d, v0.2d, v0.2d +fminnmp v0.2s, v0.2s, v0.2s +fminnmp v0.4s, v0.4s, v0.4s +fminp v0.2d, v0.2d, v0.2d +fminp v0.2s, v0.2s, v0.2s +fminp v0.4s, v0.4s, v0.4s +fmla d0, d1, v0.d[1] +fmla s0, s1, v0.s[3] +fmla v0.2s, v0.2s, v0.2s +fmls d0, d4, v0.d[1] +fmls s3, s5, v0.s[3] +fmls v0.2s, v0.2s, v0.2s +fmov v0.2d, #-1.25 +fmov v0.2s, #13.0 +fmov v0.4s, #1.0 +fmul d0, d1, v0.d[1] +fmul s0, s1, v0.s[3] +fmul v0.2s, v0.2s, v0.2s +fmulx d0, d4, v0.d[1] +fmulx d23, d11, d1 +fmulx s20, s22, s15 +fmulx s3, s5, v0.s[3] +fmulx v0.2d, v0.2d, v0.2d +fmulx v0.2s, v0.2s, v0.2s +fmulx v0.4s, v0.4s, v0.4s +fneg v0.2d, v0.2d +fneg v0.2s, v0.2s +fneg v0.4h, v0.4h +fneg v0.4s, v0.4s +fneg v0.8h, v0.8h +frecpe d13, d13 +frecpe s19, s14 +frecpe v0.2d, v0.2d +frecpe v0.2s, v0.2s +frecpe v0.4h, v0.4h +frecpe v0.4s, v0.4s +frecpe v0.8h, v0.8h +frecps v0.4s, v0.4s, v0.4s +frecps d22, d30, d21 +frecps s21, s16, s13 +frecpx d16, d19 +frecpx s18, s10 +frinta v0.2d, v0.2d +frinta v0.2s, v0.2s +frinta v0.4h, v0.4h +frinta v0.4s, v0.4s +frinta v0.8h, v0.8h +frinti v0.2d, v0.2d +frinti v0.2s, v0.2s +frinti v0.4h, v0.4h +frinti v0.4s, v0.4s +frinti v0.8h, v0.8h +frintm v0.2d, v0.2d +frintm v0.2s, v0.2s +frintm v0.4h, v0.4h +frintm v0.4s, v0.4s +frintm v0.8h, v0.8h +frintn v0.2d, v0.2d +frintn v0.2s, v0.2s +frintn v0.4h, v0.4h +frintn v0.4s, v0.4s +frintn v0.8h, v0.8h +frintp v0.2d, v0.2d +frintp v0.2s, v0.2s +frintp v0.4h, v0.4h +frintp v0.4s, v0.4s +frintp v0.8h, v0.8h +frintx v0.2d, v0.2d +frintx v0.2s, v0.2s +frintx v0.4h, v0.4h +frintx v0.4s, v0.4s +frintx v0.8h, v0.8h +frintz v0.2d, v0.2d +frintz v0.2s, v0.2s +frintz v0.4h, v0.4h +frintz v0.4s, v0.4s +frintz v0.8h, v0.8h +frsqrte d21, d12 +frsqrte s22, s13 +frsqrte v0.2d, v0.2d +frsqrte v0.2s, v0.2s +frsqrte v0.4h, v0.4h +frsqrte v0.4s, v0.4s +frsqrte v0.8h, v0.8h +frsqrts d8, d22, d18 +frsqrts s21, s5, s12 +frsqrts v0.2d, v0.2d, v0.2d +fsqrt v0.2d, v0.2d +fsqrt v0.2s, v0.2s +fsqrt v0.4h, v0.4h +fsqrt v0.4s, v0.4s +fsqrt v0.8h, v0.8h +fsub v0.2s, v0.2s, v0.2s +ld1 { v0.16b }, [x0] +ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +ld1 { v0.4s, v1.4s }, [sp], #32 +ld1 { v0.4s, v1.4s, v2.4s }, [sp] +ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +ld1 { v0.8h }, [x15], x2 +ld1 { v0.8h, v1.8h }, [x15] +ld1 { v0.b }[9], [x0] +ld1 { v0.b }[9], [x0], #1 +ld1r { v0.16b }, [x0] +ld1r { v0.16b }, [x0], #1 +ld1r { v0.8h }, [x15] +ld1r { v0.8h }, [x15], #2 +ld2 { v0.16b, v1.16b }, [x0], x1 +ld2 { v0.8b, v1.8b }, [x0] +ld2 { v0.h, v1.h }[7], [x15] +ld2 { v0.h, v1.h }[7], [x15], #4 +ld2r { v0.2d, v1.2d }, [x0] +ld2r { v0.2d, v1.2d }, [x0], #16 +ld2r { v0.4s, v1.4s }, [sp] +ld2r { v0.4s, v1.4s }, [sp], #8 +ld3 { v0.4h, v1.4h, v2.4h }, [x15] +ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +ld3 { v0.s, v1.s, v2.s }[3], [sp] +ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 +ld3r { v0.4h, v1.4h, v2.4h }, [x15] +ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 +ld3r { v0.8b, v1.8b, v2.8b }, [x0] +ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] +ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 +ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 +mla v0.8b, v0.8b, v0.8b +mls v0.4h, v0.4h, v0.4h +mov b0, v0.b[15] +mov d6, v0.d[1] +mov h2, v0.h[5] +mov s17, v0.s[2] +mov v2.b[0], v0.b[0] +mov v2.h[1], v0.h[1] +mov v2.s[2], v0.s[2] +mov v2.d[1], v0.d[1] +mov v0.b[0], w8 +mov v0.h[1], w8 +mov v0.s[2], w8 +mov v0.d[1], x8 +mov v0.16b, v0.16b +mov v0.8b, v0.8b +movi d15, #0xff00ff00ff00ff +movi v0.16b, #31 +movi v0.2d, #0xff0000ff0000ffff +movi v0.2s, #8, msl #8 +movi v0.4s, #255, lsl #24 +movi v0.8b, #255 +mul v0.8b, v0.8b, v0.8b +mvni v0.2s, 0 +mvni v0.4s, #16, msl #16 +neg d29, d24 +neg v0.16b, v0.16b +neg v0.2d, v0.2d +neg v0.2s, v0.2s +neg v0.4h, v0.4h +neg v0.4s, v0.4s +neg v0.8b, v0.8b +neg v0.8h, v0.8h +not v0.16b, v0.16b +not v0.8b, v0.8b +orn v0.16b, v0.16b, v0.16b +orr v0.16b, v0.16b, v0.16b +orr v0.8h, #31 +pmul v0.16b, v0.16b, v0.16b +pmul v0.8b, v0.8b, v0.8b +pmull v0.8h, v0.8b, v0.8b +pmull2 v0.8h, v0.16b, v0.16b +raddhn v0.2s, v0.2d, v0.2d +raddhn v0.4h, v0.4s, v0.4s +raddhn v0.8b, v0.8h, v0.8h +raddhn2 v0.16b, v0.8h, v0.8h +raddhn2 v0.4s, v0.2d, v0.2d +raddhn2 v0.8h, v0.4s, v0.4s +rbit v0.16b, v0.16b +rbit v0.8b, v0.8b +rev16 v21.8b, v1.8b +rev16 v30.16b, v31.16b +rev32 v0.4h, v9.4h +rev32 v21.8b, v1.8b +rev32 v30.16b, v31.16b +rev32 v4.8h, v7.8h +rev64 v0.16b, v31.16b +rev64 v1.8b, v9.8b +rev64 v13.4h, v21.4h +rev64 v2.8h, v4.8h +rev64 v4.2s, v0.2s +rev64 v6.4s, v8.4s +rshrn v0.2s, v0.2d, #3 +rshrn v0.4h, v0.4s, #3 +rshrn v0.8b, v0.8h, #3 +rshrn2 v0.16b, v0.8h, #3 +rshrn2 v0.4s, v0.2d, #3 +rshrn2 v0.8h, v0.4s, #3 +rsubhn v0.2s, v0.2d, v0.2d +rsubhn v0.4h, v0.4s, v0.4s +rsubhn v0.8b, v0.8h, v0.8h +rsubhn2 v0.16b, v0.8h, v0.8h +rsubhn2 v0.4s, v0.2d, v0.2d +rsubhn2 v0.8h, v0.4s, v0.4s +saba v0.16b, v0.16b, v0.16b +sabal v0.2d, v0.2s, v0.2s +sabal v0.4s, v0.4h, v0.4h +sabal v0.8h, v0.8b, v0.8b +sabal2 v0.2d, v0.4s, v0.4s +sabal2 v0.4s, v0.8h, v0.8h +sabal2 v0.8h, v0.16b, v0.16b +sabd v0.4h, v0.4h, v0.4h +sabdl v0.2d, v0.2s, v0.2s +sabdl v0.4s, v0.4h, v0.4h +sabdl v0.8h, v0.8b, v0.8b +sabdl2 v0.2d, v0.4s, v0.4s +sabdl2 v0.4s, v0.8h, v0.8h +sabdl2 v0.8h, v0.16b, v0.16b +sadalp v0.1d, v0.2s +sadalp v0.2d, v0.4s +sadalp v0.2s, v0.4h +sadalp v0.4h, v0.8b +sadalp v0.4s, v0.8h +sadalp v0.8h, v0.16b +saddl v0.2d, v0.2s, v0.2s +saddl v0.4s, v0.4h, v0.4h +saddl v0.8h, v0.8b, v0.8b +saddl2 v0.2d, v0.4s, v0.4s +saddl2 v0.4s, v0.8h, v0.8h +saddl2 v0.8h, v0.16b, v0.16b +saddlp v0.1d, v0.2s +saddlp v0.2d, v0.4s +saddlp v0.2s, v0.4h +saddlp v0.4h, v0.8b +saddlp v0.4s, v0.8h +saddlp v0.8h, v0.16b +saddw v0.2d, v0.2d, v0.2s +saddw v0.4s, v0.4s, v0.4h +saddw v0.8h, v0.8h, v0.8b +saddw2 v0.2d, v0.2d, v0.4s +saddw2 v0.4s, v0.4s, v0.8h +saddw2 v0.8h, v0.8h, v0.16b +scvtf d21, d12 +scvtf d21, d12, #64 +scvtf s22, s13 +scvtf s22, s13, #32 +scvtf v0.2d, v0.2d +scvtf v0.2d, v0.2d, #3 +scvtf v0.2s, v0.2s +scvtf v0.2s, v0.2s, #3 +scvtf v0.4h, v0.4h +scvtf v0.4s, v0.4s +scvtf v0.4s, v0.4s, #3 +scvtf v0.8h, v0.8h +shadd v0.8b, v0.8b, v0.8b +shl d7, d10, #12 +shl v0.16b, v0.16b, #3 +shl v0.2d, v0.2d, #3 +shl v0.4h, v0.4h, #3 +shl v0.4s, v0.4s, #3 +shll v0.2d, v0.2s, #32 +shll v0.4s, v0.4h, #16 +shll v0.8h, v0.8b, #8 +shll v0.2d, v0.2s, #32 +shll v0.4s, v0.4h, #16 +shll v0.8h, v0.8b, #8 +shll2 v0.2d, v0.4s, #32 +shll2 v0.4s, v0.8h, #16 +shll2 v0.8h, v0.16b, #8 +shll2 v0.2d, v0.4s, #32 +shll2 v0.4s, v0.8h, #16 +shll2 v0.8h, v0.16b, #8 +shrn v0.2s, v0.2d, #3 +shrn v0.4h, v0.4s, #3 +shrn v0.8b, v0.8h, #3 +shrn2 v0.16b, v0.8h, #3 +shrn2 v0.4s, v0.2d, #3 +shrn2 v0.8h, v0.4s, #3 +shsub v0.2s, v0.2s, v0.2s +shsub v0.4h, v0.4h, v0.4h +sli d10, d14, #12 +sli v0.16b, v0.16b, #3 +sli v0.2d, v0.2d, #3 +sli v0.2s, v0.2s, #3 +sli v0.4h, v0.4h, #3 +sli v0.4s, v0.4s, #3 +sli v0.8b, v0.8b, #3 +sli v0.8h, v0.8h, #3 +smax v0.2s, v0.2s, v0.2s +smax v0.4h, v0.4h, v0.4h +smax v0.8b, v0.8b, v0.8b +smaxp v0.2s, v0.2s, v0.2s +smaxp v0.4h, v0.4h, v0.4h +smaxp v0.8b, v0.8b, v0.8b +smin v0.16b, v0.16b, v0.16b +smin v0.4s, v0.4s, v0.4s +smin v0.8h, v0.8h, v0.8h +sminp v0.16b, v0.16b, v0.16b +sminp v0.4s, v0.4s, v0.4s +sminp v0.8h, v0.8h, v0.8h +smlal v0.2d, v0.2s, v0.2s +smlal v0.4s, v0.4h, v0.4h +smlal v0.8h, v0.8b, v0.8b +smlal2 v0.2d, v0.4s, v0.4s +smlal2 v0.4s, v0.8h, v0.8h +smlal2 v0.8h, v0.16b, v0.16b +smlsl v0.2d, v0.2s, v0.2s +smlsl v0.4s, v0.4h, v0.4h +smlsl v0.8h, v0.8b, v0.8b +smlsl2 v0.2d, v0.4s, v0.4s +smlsl2 v0.4s, v0.8h, v0.8h +smlsl2 v0.8h, v0.16b, v0.16b +smull v0.2d, v0.2s, v0.2s +smull v0.4s, v0.4h, v0.4h +smull v0.8h, v0.8b, v0.8b +smull2 v0.2d, v0.4s, v0.4s +smull2 v0.4s, v0.8h, v0.8h +smull2 v0.8h, v0.16b, v0.16b +sqabs b19, b14 +sqabs d18, d12 +sqabs h21, h15 +sqabs s20, s12 +sqabs v0.16b, v0.16b +sqabs v0.2d, v0.2d +sqabs v0.2s, v0.2s +sqabs v0.4h, v0.4h +sqabs v0.4s, v0.4s +sqabs v0.8b, v0.8b +sqabs v0.8h, v0.8h +sqadd b20, b11, b15 +sqadd v0.16b, v0.16b, v0.16b +sqadd v0.2s, v0.2s, v0.2s +sqdmlal d19, s24, s12 +sqdmlal d8, s9, v0.s[1] +sqdmlal s0, h0, v0.h[3] +sqdmlal s17, h27, h12 +sqdmlal v0.2d, v0.2s, v0.2s +sqdmlal v0.4s, v0.4h, v0.4h +sqdmlal2 v0.2d, v0.4s, v0.4s +sqdmlal2 v0.4s, v0.8h, v0.8h +sqdmlsl d12, s23, s13 +sqdmlsl d8, s9, v0.s[1] +sqdmlsl s0, h0, v0.h[3] +sqdmlsl s14, h12, h25 +sqdmlsl v0.2d, v0.2s, v0.2s +sqdmlsl v0.4s, v0.4h, v0.4h +sqdmlsl2 v0.2d, v0.4s, v0.4s +sqdmlsl2 v0.4s, v0.8h, v0.8h +sqdmulh h10, h11, h12 +sqdmulh h7, h15, v0.h[3] +sqdmulh s15, s14, v0.s[1] +sqdmulh s20, s21, s2 +sqdmulh v0.2s, v0.2s, v0.2s +sqdmulh v0.4s, v0.4s, v0.4s +sqdmull d1, s1, v0.s[1] +sqdmull d15, s22, s12 +sqdmull s1, h1, v0.h[3] +sqdmull s12, h22, h12 +sqdmull v0.2d, v0.2s, v0.2s +sqdmull v0.4s, v0.4h, v0.4h +sqdmull2 v0.2d, v0.4s, v0.4s +sqdmull2 v0.4s, v0.8h, v0.8h +sqneg b19, b14 +sqneg d18, d12 +sqneg h21, h15 +sqneg s20, s12 +sqneg v0.16b, v0.16b +sqneg v0.2d, v0.2d +sqneg v0.2s, v0.2s +sqneg v0.4h, v0.4h +sqneg v0.4s, v0.4s +sqneg v0.8b, v0.8b +sqneg v0.8h, v0.8h +sqrdmulh h10, h11, h12 +sqrdmulh h7, h15, v0.h[3] +sqrdmulh s15, s14, v0.s[1] +sqrdmulh s20, s21, s2 +sqrdmulh v0.4h, v0.4h, v0.4h +sqrdmulh v0.8h, v0.8h, v0.8h +sqrshl d31, d31, d31 +sqrshl h3, h4, h15 +sqrshl v0.2s, v0.2s, v0.2s +sqrshl v0.4h, v0.4h, v0.4h +sqrshl v0.8b, v0.8b, v0.8b +sqrshrn b10, h13, #2 +sqrshrn h15, s10, #6 +sqrshrn s15, d12, #9 +sqrshrn v0.2s, v0.2d, #3 +sqrshrn v0.4h, v0.4s, #3 +sqrshrn v0.8b, v0.8h, #3 +sqrshrn2 v0.16b, v0.8h, #3 +sqrshrn2 v0.4s, v0.2d, #3 +sqrshrn2 v0.8h, v0.4s, #3 +sqrshrun b17, h10, #6 +sqrshrun h10, s13, #15 +sqrshrun s22, d16, #31 +sqrshrun v0.2s, v0.2d, #3 +sqrshrun v0.4h, v0.4s, #3 +sqrshrun v0.8b, v0.8h, #3 +sqrshrun2 v0.16b, v0.8h, #3 +sqrshrun2 v0.4s, v0.2d, #3 +sqrshrun2 v0.8h, v0.4s, #3 +sqshl b11, b19, #7 +sqshl d15, d16, #51 +sqshl d31, d31, d31 +sqshl h13, h18, #11 +sqshl h3, h4, h15 +sqshl s14, s17, #22 +sqshl v0.16b, v0.16b, #3 +sqshl v0.2d, v0.2d, #3 +sqshl v0.2s, v0.2s, #3 +sqshl v0.2s, v0.2s, v0.2s +sqshl v0.4h, v0.4h, #3 +sqshl v0.4h, v0.4h, v0.4h +sqshl v0.4s, v0.4s, #3 +sqshl v0.8b, v0.8b, #3 +sqshl v0.8b, v0.8b, v0.8b +sqshl v0.8h, v0.8h, #3 +sqshlu b15, b18, #6 +sqshlu d11, d13, #32 +sqshlu h19, h17, #6 +sqshlu s16, s14, #25 +sqshlu v0.16b, v0.16b, #3 +sqshlu v0.2d, v0.2d, #3 +sqshlu v0.2s, v0.2s, #3 +sqshlu v0.4h, v0.4h, #3 +sqshlu v0.4s, v0.4s, #3 +sqshlu v0.8b, v0.8b, #3 +sqshlu v0.8h, v0.8h, #3 +sqshrn b10, h15, #5 +sqshrn h17, s10, #4 +sqshrn s18, d10, #31 +sqshrn v0.2s, v0.2d, #3 +sqshrn v0.4h, v0.4s, #3 +sqshrn v0.8b, v0.8h, #3 +sqshrn2 v0.16b, v0.8h, #3 +sqshrn2 v0.4s, v0.2d, #3 +sqshrn2 v0.8h, v0.4s, #3 +sqshrun b15, h10, #7 +sqshrun h20, s14, #3 +sqshrun s10, d15, #15 +sqshrun v0.2s, v0.2d, #3 +sqshrun v0.4h, v0.4s, #3 +sqshrun v0.8b, v0.8h, #3 +sqshrun2 v0.16b, v0.8h, #3 +sqshrun2 v0.4s, v0.2d, #3 +sqshrun2 v0.8h, v0.4s, #3 +sqsub s20, s10, s7 +sqsub v0.2d, v0.2d, v0.2d +sqsub v0.4s, v0.4s, v0.4s +sqsub v0.8b, v0.8b, v0.8b +sqxtn b18, h18 +sqxtn h20, s17 +sqxtn s19, d14 +sqxtn v0.2s, v0.2d +sqxtn v0.4h, v0.4s +sqxtn v0.8b, v0.8h +sqxtn2 v0.16b, v0.8h +sqxtn2 v0.4s, v0.2d +sqxtn2 v0.8h, v0.4s +sqxtun b19, h14 +sqxtun h21, s15 +sqxtun s20, d12 +sqxtun v0.2s, v0.2d +sqxtun v0.4h, v0.4s +sqxtun v0.8b, v0.8h +sqxtun2 v0.16b, v0.8h +sqxtun2 v0.4s, v0.2d +sqxtun2 v0.8h, v0.4s +srhadd v0.2s, v0.2s, v0.2s +srhadd v0.4h, v0.4h, v0.4h +srhadd v0.8b, v0.8b, v0.8b +sri d10, d12, #14 +sri v0.16b, v0.16b, #3 +sri v0.2d, v0.2d, #3 +sri v0.2s, v0.2s, #3 +sri v0.4h, v0.4h, #3 +sri v0.4s, v0.4s, #3 +sri v0.8b, v0.8b, #3 +sri v0.8h, v0.8h, #3 +srshl d16, d16, d16 +srshl v0.2s, v0.2s, v0.2s +srshl v0.4h, v0.4h, v0.4h +srshl v0.8b, v0.8b, v0.8b +srshr d19, d18, #7 +srshr v0.16b, v0.16b, #3 +srshr v0.2d, v0.2d, #3 +srshr v0.2s, v0.2s, #3 +srshr v0.4h, v0.4h, #3 +srshr v0.4s, v0.4s, #3 +srshr v0.8b, v0.8b, #3 +srshr v0.8h, v0.8h, #3 +srsra d15, d11, #19 +srsra v0.16b, v0.16b, #3 +srsra v0.2d, v0.2d, #3 +srsra v0.2s, v0.2s, #3 +srsra v0.4h, v0.4h, #3 +srsra v0.4s, v0.4s, #3 +srsra v0.8b, v0.8b, #3 +srsra v0.8h, v0.8h, #3 +sshl d31, d31, d31 +sshl v0.2d, v0.2d, v0.2d +sshl v0.2s, v0.2s, v0.2s +sshl v0.4h, v0.4h, v0.4h +sshl v0.8b, v0.8b, v0.8b +sshll v0.2d, v0.2s, #3 +sshll2 v0.4s, v0.8h, #3 +sshr d15, d16, #12 +sshr v0.16b, v0.16b, #3 +sshr v0.2d, v0.2d, #3 +sshr v0.2s, v0.2s, #3 +sshr v0.4h, v0.4h, #3 +sshr v0.4s, v0.4s, #3 +sshr v0.8b, v0.8b, #3 +sshr v0.8h, v0.8h, #3 +ssra d18, d12, #21 +ssra v0.16b, v0.16b, #3 +ssra v0.2d, v0.2d, #3 +ssra v0.2s, v0.2s, #3 +ssra v0.4h, v0.4h, #3 +ssra v0.4s, v0.4s, #3 +ssra v0.8b, v0.8b, #3 +ssra v0.8h, v0.8h, #3 +ssubl v0.2d, v0.2s, v0.2s +ssubl v0.4s, v0.4h, v0.4h +ssubl v0.8h, v0.8b, v0.8b +ssubl2 v0.2d, v0.4s, v0.4s +ssubl2 v0.4s, v0.8h, v0.8h +ssubl2 v0.8h, v0.16b, v0.16b +ssubw v0.2d, v0.2d, v0.2s +ssubw v0.4s, v0.4s, v0.4h +ssubw v0.8h, v0.8h, v0.8b +ssubw2 v0.2d, v0.2d, v0.4s +ssubw2 v0.4s, v0.4s, v0.8h +ssubw2 v0.8h, v0.8h, v0.16b +st1 { v0.16b }, [x0] +st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +st1 { v0.4s, v1.4s }, [sp], #32 +st1 { v0.4s, v1.4s, v2.4s }, [sp] +st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +st1 { v0.8h }, [x15], x2 +st1 { v0.8h, v1.8h }, [x15] +st1 { v0.d }[1], [x0] +st1 { v0.d }[1], [x0], #8 +st2 { v0.16b, v1.16b }, [x0], x1 +st2 { v0.8b, v1.8b }, [x0] +st2 { v0.s, v1.s }[3], [sp] +st2 { v0.s, v1.s }[3], [sp], #8 +st3 { v0.4h, v1.4h, v2.4h }, [x15] +st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +st3 { v0.h, v1.h, v2.h }[7], [x15] +st3 { v0.h, v1.h, v2.h }[7], [x15], #6 +st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 +sub d15, d5, d16 +sub v0.2d, v0.2d, v0.2d +suqadd b19, b14 +suqadd d18, d22 +suqadd h20, h15 +suqadd s21, s12 +suqadd v0.16b, v0.16b +suqadd v0.2d, v0.2d +suqadd v0.2s, v0.2s +suqadd v0.4h, v0.4h +suqadd v0.4s, v0.4s +suqadd v0.8b, v0.8b +suqadd v0.8h, v0.8h +tbl v0.16b, { v0.16b }, v0.16b +tbl v0.16b, { v0.16b, v1.16b }, v0.16b +tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +tbl v0.8b, { v0.16b }, v0.8b +tbl v0.8b, { v0.16b, v1.16b }, v0.8b +tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +tbx v0.16b, { v0.16b }, v0.16b +tbx v0.16b, { v0.16b, v1.16b }, v0.16b +tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +tbx v0.8b, { v0.16b }, v0.8b +tbx v0.8b, { v0.16b, v1.16b }, v0.8b +tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +trn1 v0.16b, v0.16b, v0.16b +trn1 v0.2d, v0.2d, v0.2d +trn1 v0.2s, v0.2s, v0.2s +trn1 v0.4h, v0.4h, v0.4h +trn1 v0.4s, v0.4s, v0.4s +trn1 v0.8b, v0.8b, v0.8b +trn1 v0.8h, v0.8h, v0.8h +trn2 v0.16b, v0.16b, v0.16b +trn2 v0.2d, v0.2d, v0.2d +trn2 v0.2s, v0.2s, v0.2s +trn2 v0.4h, v0.4h, v0.4h +trn2 v0.4s, v0.4s, v0.4s +trn2 v0.8b, v0.8b, v0.8b +trn2 v0.8h, v0.8h, v0.8h +uaba v0.8b, v0.8b, v0.8b +uabal v0.2d, v0.2s, v0.2s +uabal v0.4s, v0.4h, v0.4h +uabal v0.8h, v0.8b, v0.8b +uabal2 v0.2d, v0.4s, v0.4s +uabal2 v0.4s, v0.8h, v0.8h +uabal2 v0.8h, v0.16b, v0.16b +uabd v0.4h, v0.4h, v0.4h +uabdl v0.2d, v0.2s, v0.2s +uabdl v0.4s, v0.4h, v0.4h +uabdl v0.8h, v0.8b, v0.8b +uabdl2 v0.2d, v0.4s, v0.4s +uabdl2 v0.4s, v0.8h, v0.8h +uabdl2 v0.8h, v0.16b, v0.16b +uadalp v0.1d, v0.2s +uadalp v0.2d, v0.4s +uadalp v0.2s, v0.4h +uadalp v0.4h, v0.8b +uadalp v0.4s, v0.8h +uadalp v0.8h, v0.16b +uaddl v0.2d, v0.2s, v0.2s +uaddl v0.4s, v0.4h, v0.4h +uaddl v0.8h, v0.8b, v0.8b +uaddl2 v0.2d, v0.4s, v0.4s +uaddl2 v0.4s, v0.8h, v0.8h +uaddl2 v0.8h, v0.16b, v0.16b +uaddlp v0.1d, v0.2s +uaddlp v0.2d, v0.4s +uaddlp v0.2s, v0.4h +uaddlp v0.4h, v0.8b +uaddlp v0.4s, v0.8h +uaddlp v0.8h, v0.16b +uaddw v0.2d, v0.2d, v0.2s +uaddw v0.4s, v0.4s, v0.4h +uaddw v0.8h, v0.8h, v0.8b +uaddw2 v0.2d, v0.2d, v0.4s +uaddw2 v0.4s, v0.4s, v0.8h +uaddw2 v0.8h, v0.8h, v0.16b +ucvtf d21, d14 +ucvtf d21, d14, #64 +ucvtf s22, s13 +ucvtf s22, s13, #32 +ucvtf v0.2d, v0.2d +ucvtf v0.2d, v0.2d, #3 +ucvtf v0.2s, v0.2s +ucvtf v0.2s, v0.2s, #3 +ucvtf v0.4h, v0.4h +ucvtf v0.4s, v0.4s +ucvtf v0.4s, v0.4s, #3 +ucvtf v0.8h, v0.8h +uhadd v0.16b, v0.16b, v0.16b +uhadd v0.8h, v0.8h, v0.8h +uhsub v0.4s, v0.4s, v0.4s +umax v0.16b, v0.16b, v0.16b +umax v0.4s, v0.4s, v0.4s +umax v0.8h, v0.8h, v0.8h +umaxp v0.16b, v0.16b, v0.16b +umaxp v0.4s, v0.4s, v0.4s +umaxp v0.8h, v0.8h, v0.8h +umin v0.2s, v0.2s, v0.2s +umin v0.4h, v0.4h, v0.4h +umin v0.8b, v0.8b, v0.8b +uminp v0.2s, v0.2s, v0.2s +uminp v0.4h, v0.4h, v0.4h +uminp v0.8b, v0.8b, v0.8b +umlal v0.2d, v0.2s, v0.2s +umlal v0.4s, v0.4h, v0.4h +umlal v0.8h, v0.8b, v0.8b +umlal2 v0.2d, v0.4s, v0.4s +umlal2 v0.4s, v0.8h, v0.8h +umlal2 v0.8h, v0.16b, v0.16b +umlsl v0.2d, v0.2s, v0.2s +umlsl v0.4s, v0.4h, v0.4h +umlsl v0.8h, v0.8b, v0.8b +umlsl2 v0.2d, v0.4s, v0.4s +umlsl2 v0.4s, v0.8h, v0.8h +umlsl2 v0.8h, v0.16b, v0.16b +umull v0.2d, v0.2s, v0.2s +umull v0.4s, v0.4h, v0.4h +umull v0.8h, v0.8b, v0.8b +umull2 v0.2d, v0.4s, v0.4s +umull2 v0.4s, v0.8h, v0.8h +umull2 v0.8h, v0.16b, v0.16b +uqadd h0, h1, h5 +uqadd v0.8h, v0.8h, v0.8h +uqrshl b11, b20, b30 +uqrshl s23, s20, s16 +uqrshl v0.16b, v0.16b, v0.16b +uqrshl v0.4s, v0.4s, v0.4s +uqrshl v0.4s, v0.4s, v0.4s +uqrshl v0.8h, v0.8h, v0.8h +uqrshrn b10, h12, #5 +uqrshrn h12, s10, #14 +uqrshrn s10, d10, #25 +uqrshrn v0.2s, v0.2d, #3 +uqrshrn v0.4h, v0.4s, #3 +uqrshrn v0.8b, v0.8h, #3 +uqrshrn2 v0.16b, v0.8h, #3 +uqrshrn2 v0.4s, v0.2d, #3 +uqrshrn2 v0.8h, v0.4s, #3 +uqshl b11, b20, b30 +uqshl b18, b15, #6 +uqshl d15, d12, #19 +uqshl h11, h18, #7 +uqshl s14, s19, #18 +uqshl s23, s20, s16 +uqshl v0.16b, v0.16b, #3 +uqshl v0.16b, v0.16b, v0.16b +uqshl v0.2d, v0.2d, #3 +uqshl v0.2d, v0.2d, v0.2d +uqshl v0.2s, v0.2s, #3 +uqshl v0.4h, v0.4h, #3 +uqshl v0.4s, v0.4s, #3 +uqshl v0.4s, v0.4s, v0.4s +uqshl v0.8b, v0.8b, #3 +uqshl v0.8h, v0.8h, #3 +uqshl v0.8h, v0.8h, v0.8h +uqshrn b12, h10, #7 +uqshrn h10, s14, #5 +uqshrn s10, d12, #13 +uqshrn v0.2s, v0.2d, #3 +uqshrn v0.4h, v0.4s, #3 +uqshrn v0.8b, v0.8h, #3 +uqshrn2 v0.16b, v0.8h, #3 +uqshrn2 v0.4s, v0.2d, #3 +uqshrn2 v0.8h, v0.4s, #3 +uqsub d16, d16, d16 +uqsub v0.4h, v0.4h, v0.4h +uqxtn b18, h18 +uqxtn h20, s17 +uqxtn s19, d14 +uqxtn v0.2s, v0.2d +uqxtn v0.4h, v0.4s +uqxtn v0.8b, v0.8h +uqxtn2 v0.16b, v0.8h +uqxtn2 v0.4s, v0.2d +uqxtn2 v0.8h, v0.4s +urecpe v0.2s, v0.2s +urecpe v0.4s, v0.4s +urhadd v0.16b, v0.16b, v0.16b +urhadd v0.4s, v0.4s, v0.4s +urhadd v0.8h, v0.8h, v0.8h +urshl d8, d7, d4 +urshl v0.16b, v0.16b, v0.16b +urshl v0.2d, v0.2d, v0.2d +urshl v0.4s, v0.4s, v0.4s +urshl v0.8h, v0.8h, v0.8h +urshr d20, d23, #31 +urshr v0.16b, v0.16b, #3 +urshr v0.2d, v0.2d, #3 +urshr v0.2s, v0.2s, #3 +urshr v0.4h, v0.4h, #3 +urshr v0.4s, v0.4s, #3 +urshr v0.8b, v0.8b, #3 +urshr v0.8h, v0.8h, #3 +ursqrte v0.2s, v0.2s +ursqrte v0.4s, v0.4s +ursra d18, d10, #13 +ursra v0.16b, v0.16b, #3 +ursra v0.2d, v0.2d, #3 +ursra v0.2s, v0.2s, #3 +ursra v0.4h, v0.4h, #3 +ursra v0.4s, v0.4s, #3 +ursra v0.8b, v0.8b, #3 +ursra v0.8h, v0.8h, #3 +ushl d0, d0, d0 +ushl v0.16b, v0.16b, v0.16b +ushl v0.4s, v0.4s, v0.4s +ushl v0.8h, v0.8h, v0.8h +ushll v0.4s, v0.4h, #3 +ushll2 v0.8h, v0.16b, #3 +ushr d10, d17, #18 +ushr v0.16b, v0.16b, #3 +ushr v0.2d, v0.2d, #3 +ushr v0.2s, v0.2s, #3 +ushr v0.4h, v0.4h, #3 +ushr v0.4s, v0.4s, #3 +ushr v0.8b, v0.8b, #3 +ushr v0.8h, v0.8h, #3 +usqadd b19, b14 +usqadd d18, d22 +usqadd h20, h15 +usqadd s21, s12 +usqadd v0.16b, v0.16b +usqadd v0.2d, v0.2d +usqadd v0.2s, v0.2s +usqadd v0.4h, v0.4h +usqadd v0.4s, v0.4s +usqadd v0.8b, v0.8b +usqadd v0.8h, v0.8h +usra d20, d13, #61 +usra v0.16b, v0.16b, #3 +usra v0.2d, v0.2d, #3 +usra v0.2s, v0.2s, #3 +usra v0.4h, v0.4h, #3 +usra v0.4s, v0.4s, #3 +usra v0.8b, v0.8b, #3 +usra v0.8h, v0.8h, #3 +usubl v0.2d, v0.2s, v0.2s +usubl v0.4s, v0.4h, v0.4h +usubl v0.8h, v0.8b, v0.8b +usubl2 v0.2d, v0.4s, v0.4s +usubl2 v0.4s, v0.8h, v0.8h +usubl2 v0.8h, v0.16b, v0.16b +usubw v0.2d, v0.2d, v0.2s +usubw v0.4s, v0.4s, v0.4h +usubw v0.8h, v0.8h, v0.8b +usubw2 v0.2d, v0.2d, v0.4s +usubw2 v0.4s, v0.4s, v0.8h +usubw2 v0.8h, v0.8h, v0.16b +uzp1 v0.16b, v0.16b, v0.16b +uzp1 v0.2d, v0.2d, v0.2d +uzp1 v0.2s, v0.2s, v0.2s +uzp1 v0.4h, v0.4h, v0.4h +uzp1 v0.4s, v0.4s, v0.4s +uzp1 v0.8b, v0.8b, v0.8b +uzp1 v0.8h, v0.8h, v0.8h +uzp2 v0.16b, v0.16b, v0.16b +uzp2 v0.2d, v0.2d, v0.2d +uzp2 v0.2s, v0.2s, v0.2s +uzp2 v0.4h, v0.4h, v0.4h +uzp2 v0.4s, v0.4s, v0.4s +uzp2 v0.8b, v0.8b, v0.8b +uzp2 v0.8h, v0.8h, v0.8h +xtn v0.2s, v0.2d +xtn v0.4h, v0.4s +xtn v0.8b, v0.8h +xtn2 v0.16b, v0.8h +xtn2 v0.4s, v0.2d +xtn2 v0.8h, v0.4s +zip1 v0.16b, v0.16b, v0.16b +zip1 v0.2d, v0.2d, v0.2d +zip1 v0.2s, v0.2s, v0.2s +zip1 v0.4h, v0.4h, v0.4h +zip1 v0.4s, v0.4s, v0.4s +zip1 v0.8b, v0.8b, v0.8b +zip1 v0.8h, v0.8h, v0.8h +zip2 v0.16b, v0.16b, v0.16b +zip2 v0.2d, v0.2d, v0.2d +zip2 v0.2s, v0.2s, v0.2s +zip2 v0.4h, v0.4h, v0.4h +zip2 v0.4s, v0.4s, v0.4s +zip2 v0.8b, v0.8b, v0.8b +zip2 v0.8h, v0.8h, v0.8h + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 2 0.50 abs d29, d24 +# CHECK-NEXT: 1 2 0.50 abs v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 abs v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 abs v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 abs v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 abs v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 abs v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 abs v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 add d17, d31, d29 +# CHECK-NEXT: 1 2 0.50 add v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 2 6 1.00 addhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 2 6 1.00 addhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 2 6 1.00 addhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 2 6 1.00 addhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 2 6 1.00 addhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 2 6 1.00 addhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 addp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 addp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 and v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 bic v0.4h, #15, lsl #8 +# CHECK-NEXT: 1 2 0.50 bic v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 bif v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 bit v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 bsl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 cls v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 cls v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 cls v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 cls v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 cls v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 cls v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 clz v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 clz v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 clz v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 clz v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 clz v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 clz v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmeq v0.16b, v0.16b, #0 +# CHECK-NEXT: 1 2 0.50 cmeq v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 cmge d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmge v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 cmge v0.8b, v0.8b, #0 +# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmgt v0.2s, v0.2s, #0 +# CHECK-NEXT: 1 2 0.50 cmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 cmhi d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmhi v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 cmhs d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmhs v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 cmle d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmle v0.2d, v0.2d, #0 +# CHECK-NEXT: 1 2 0.50 cmlt d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmlt v0.8h, v0.8h, #0 +# CHECK-NEXT: 1 2 0.50 cmtst d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmtst v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 cnt v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 cnt v0.8b, v0.8b +# CHECK-NEXT: 1 5 1.00 dup v0.16b, w28 +# CHECK-NEXT: 1 5 1.00 dup v0.2d, x28 +# CHECK-NEXT: 1 5 1.00 dup v0.2s, w28 +# CHECK-NEXT: 1 5 1.00 dup v0.4h, w28 +# CHECK-NEXT: 1 5 1.00 dup v0.4s, w28 +# CHECK-NEXT: 1 5 1.00 dup v0.8b, w28 +# CHECK-NEXT: 1 5 1.00 dup v0.8h, w28 +# CHECK-NEXT: 1 2 0.50 eor v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 ext v0.16b, v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 ext v0.8b, v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 0.50 fabd d29, d24, d20 +# CHECK-NEXT: 1 3 0.50 fabd s29, s24, s20 +# CHECK-NEXT: 1 3 0.50 fabd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fabs v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fabs v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fabs v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fabs v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fabs v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 facge d20, d21, d22 +# CHECK-NEXT: 1 3 0.50 facge s10, s11, s12 +# CHECK-NEXT: 1 3 0.50 facge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 facgt d20, d21, d22 +# CHECK-NEXT: 1 3 0.50 facgt s10, s11, s12 +# CHECK-NEXT: 1 3 0.50 facgt v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 faddp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 faddp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcmeq d20, d21, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmeq d20, d21, d22 +# CHECK-NEXT: 1 3 0.50 fcmeq s10, s11, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmeq s10, s11, s12 +# CHECK-NEXT: 1 3 0.50 fcmeq v0.2s, v0.2s, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmeq v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcmge d20, d21, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmge d20, d21, d22 +# CHECK-NEXT: 1 3 0.50 fcmge s10, s11, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmge s10, s11, s12 +# CHECK-NEXT: 1 3 0.50 fcmge v0.2d, v0.2d, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcmgt d20, d21, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmgt d20, d21, d22 +# CHECK-NEXT: 1 3 0.50 fcmgt s10, s11, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmgt s10, s11, s12 +# CHECK-NEXT: 1 3 0.50 fcmgt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcmle d20, d21, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmle s10, s11, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmle v0.2d, v0.2d, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmlt d20, d21, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmlt s10, s11, #0.0 +# CHECK-NEXT: 1 3 0.50 fcmlt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: 1 3 0.50 fcvtas d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtas s12, s13 +# CHECK-NEXT: 1 3 0.50 fcvtas v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtas v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtas v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtas v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtas v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtau d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtau s12, s13 +# CHECK-NEXT: 1 3 0.50 fcvtau v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtau v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtau v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtau v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtau v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtl v0.2d, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtl v0.4s, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtl2 v0.2d, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtl2 v0.4s, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtms d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtms s22, s13 +# CHECK-NEXT: 1 3 0.50 fcvtms v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtms v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtms v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtms v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtms v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtmu d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtmu s12, s13 +# CHECK-NEXT: 1 3 0.50 fcvtmu v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtmu v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtmu v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtmu v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtmu v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtn v0.2s, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtn v0.4h, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtns d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtns s22, s13 +# CHECK-NEXT: 1 3 0.50 fcvtns v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtns v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtns v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtns v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtns v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtnu d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtnu s12, s13 +# CHECK-NEXT: 1 3 0.50 fcvtnu v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtnu v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtnu v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtnu v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtnu v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtps d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtps s22, s13 +# CHECK-NEXT: 1 3 0.50 fcvtps v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtps v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtps v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtps v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtps v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtpu d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtpu s12, s13 +# CHECK-NEXT: 1 3 0.50 fcvtpu v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtpu v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtpu v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtpu v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtpu v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtxn s22, d13 +# CHECK-NEXT: 1 3 0.50 fcvtxn v0.2s, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtxn2 v0.4s, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtzs d21, d12, #1 +# CHECK-NEXT: 1 3 0.50 fcvtzs d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtzs s12, s13 +# CHECK-NEXT: 1 3 0.50 fcvtzs s21, s12, #1 +# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 fcvtzs v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtzs v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtzs v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 fcvtzs v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d12, #1 +# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d14 +# CHECK-NEXT: 1 3 0.50 fcvtzu s12, s13 +# CHECK-NEXT: 1 3 0.50 fcvtzu s21, s12, #1 +# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 fcvtzu v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fcvtzu v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fcvtzu v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 fcvtzu v0.8h, v0.8h +# CHECK-NEXT: 1 12 1.00 fdiv v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fmax v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fmax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fmax v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fmaxnm v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fmaxnm v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fmaxnm v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fmaxnmp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fmaxnmp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fmaxnmp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fmaxp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fmaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fmaxp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fmin v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fmin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fmin v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fminnm v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fminnm v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fminnm v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fminnmp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fminnmp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fminnmp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fminp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fminp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 0.50 fmla d0, d1, v0.d[1] +# CHECK-NEXT: 1 4 0.50 fmla s0, s1, v0.s[3] +# CHECK-NEXT: 1 4 0.50 fmla v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 0.50 fmls d0, d4, v0.d[1] +# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v0.s[3] +# CHECK-NEXT: 1 4 0.50 fmls v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 fmov v0.2d, #-1.25000000 +# CHECK-NEXT: 1 2 0.50 fmov v0.2s, #13.00000000 +# CHECK-NEXT: 1 2 0.50 fmov v0.4s, #1.00000000 +# CHECK-NEXT: 1 4 0.50 fmul d0, d1, v0.d[1] +# CHECK-NEXT: 1 4 0.50 fmul s0, s1, v0.s[3] +# CHECK-NEXT: 1 4 0.50 fmul v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 0.50 fmulx d0, d4, v0.d[1] +# CHECK-NEXT: 1 4 0.50 fmulx d23, d11, d1 +# CHECK-NEXT: 1 4 0.50 fmulx s20, s22, s15 +# CHECK-NEXT: 1 4 0.50 fmulx s3, s5, v0.s[3] +# CHECK-NEXT: 1 4 0.50 fmulx v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 0.50 fmulx v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 0.50 fmulx v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fneg v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 fneg v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 fneg v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 fneg v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 fneg v0.8h, v0.8h +# CHECK-NEXT: 2 6 1.00 frecpe d13, d13 +# CHECK-NEXT: 2 6 1.00 frecpe s19, s14 +# CHECK-NEXT: 2 6 1.00 frecpe v0.2d, v0.2d +# CHECK-NEXT: 2 6 1.00 frecpe v0.2s, v0.2s +# CHECK-NEXT: 2 6 1.00 frecpe v0.4h, v0.4h +# CHECK-NEXT: 2 6 1.00 frecpe v0.4s, v0.4s +# CHECK-NEXT: 2 6 1.00 frecpe v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 frecps v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 frecps d22, d30, d21 +# CHECK-NEXT: 1 3 0.50 frecps s21, s16, s13 +# CHECK-NEXT: 1 3 0.50 frecpx d16, d19 +# CHECK-NEXT: 1 3 0.50 frecpx s18, s10 +# CHECK-NEXT: 1 3 0.50 frinta v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 frinta v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 frinta v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 frinta v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 frinta v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 frinti v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 frinti v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 frinti v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 frinti v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 frinti v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 frintm v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 frintm v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 frintm v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 frintm v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 frintm v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 frintn v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 frintn v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 frintn v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 frintn v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 frintn v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 frintp v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 frintp v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 frintp v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 frintp v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 frintp v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 frintx v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 frintx v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 frintx v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 frintx v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 frintx v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 frintz v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 frintz v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 frintz v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 frintz v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 frintz v0.8h, v0.8h +# CHECK-NEXT: 2 6 1.00 frsqrte d21, d12 +# CHECK-NEXT: 2 6 1.00 frsqrte s22, s13 +# CHECK-NEXT: 2 6 1.00 frsqrte v0.2d, v0.2d +# CHECK-NEXT: 2 6 1.00 frsqrte v0.2s, v0.2s +# CHECK-NEXT: 2 6 1.00 frsqrte v0.4h, v0.4h +# CHECK-NEXT: 2 6 1.00 frsqrte v0.4s, v0.4s +# CHECK-NEXT: 2 6 1.00 frsqrte v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 frsqrts d8, d22, d18 +# CHECK-NEXT: 1 3 0.50 frsqrts s21, s5, s12 +# CHECK-NEXT: 1 3 0.50 frsqrts v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 63 1.00 fsqrt v0.2d, v0.2d +# CHECK-NEXT: 1 33 1.00 fsqrt v0.2s, v0.2s +# CHECK-NEXT: 1 39 1.00 fsqrt v0.4h, v0.4h +# CHECK-NEXT: 1 33 1.00 fsqrt v0.4s, v0.4s +# CHECK-NEXT: 1 39 1.00 fsqrt v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 fsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 0.50 * ld1 { v0.16b }, [x0] +# CHECK-NEXT: 3 5 1.50 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: 4 5 2.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: 2 4 1.00 * ld1 { v0.4s, v1.4s }, [sp], #32 +# CHECK-NEXT: 3 5 1.50 * ld1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: 4 5 2.00 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: 1 4 0.50 * ld1 { v0.8h }, [x15], x2 +# CHECK-NEXT: 2 4 1.00 * ld1 { v0.8h, v1.8h }, [x15] +# CHECK-NEXT: 2 6 0.50 * ld1 { v0.b }[9], [x0] +# CHECK-NEXT: 2 6 0.50 * ld1 { v0.b }[9], [x0], #1 +# CHECK-NEXT: 2 6 0.50 * ld1r { v0.16b }, [x0] +# CHECK-NEXT: 2 6 0.50 * ld1r { v0.16b }, [x0], #1 +# CHECK-NEXT: 2 6 0.50 * ld1r { v0.8h }, [x15] +# CHECK-NEXT: 2 6 0.50 * ld1r { v0.8h }, [x15], #2 +# CHECK-NEXT: 4 6 1.00 * ld2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: 5 8 1.50 * ld2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: 4 6 1.00 * ld2 { v0.h, v1.h }[7], [x15] +# CHECK-NEXT: 4 6 1.00 * ld2 { v0.h, v1.h }[7], [x15], #4 +# CHECK-NEXT: 4 6 1.00 * ld2r { v0.2d, v1.2d }, [x0] +# CHECK-NEXT: 4 6 1.00 * ld2r { v0.2d, v1.2d }, [x0], #16 +# CHECK-NEXT: 4 6 1.00 * ld2r { v0.4s, v1.4s }, [sp] +# CHECK-NEXT: 4 6 1.00 * ld2r { v0.4s, v1.4s }, [sp], #8 +# CHECK-NEXT: 6 9 1.50 * ld3 { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: 6 8 1.50 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +# CHECK-NEXT: 6 7 1.50 * ld3 { v0.s, v1.s, v2.s }[3], [sp] +# CHECK-NEXT: 6 7 1.50 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 +# CHECK-NEXT: 6 7 1.50 * ld3r { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: 6 7 1.50 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 +# CHECK-NEXT: 6 7 1.50 * ld3r { v0.8b, v1.8b, v2.8b }, [x0] +# CHECK-NEXT: 6 7 1.50 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +# CHECK-NEXT: 12 11 2.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: 12 10 2.00 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: 8 7 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +# CHECK-NEXT: 8 7 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +# CHECK-NEXT: 8 7 2.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +# CHECK-NEXT: 4 5 2.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] +# CHECK-NEXT: 4 5 2.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 +# CHECK-NEXT: 8 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: 8 7 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 +# CHECK-NEXT: 1 3 0.50 mla v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 mls v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 mov b0, v0.b[15] +# CHECK-NEXT: 1 3 0.50 mov d6, v0.d[1] +# CHECK-NEXT: 1 3 0.50 mov h2, v0.h[5] +# CHECK-NEXT: 1 3 0.50 mov s17, v0.s[2] +# CHECK-NEXT: 1 2 0.50 mov v2.b[0], v0.b[0] +# CHECK-NEXT: 1 2 0.50 mov v2.h[1], v0.h[1] +# CHECK-NEXT: 1 2 0.50 mov v2.s[2], v0.s[2] +# CHECK-NEXT: 1 2 0.50 mov v2.d[1], v0.d[1] +# CHECK-NEXT: 2 7 1.00 mov v0.b[0], w8 +# CHECK-NEXT: 2 7 1.00 mov v0.h[1], w8 +# CHECK-NEXT: 2 7 1.00 mov v0.s[2], w8 +# CHECK-NEXT: 2 7 1.00 mov v0.d[1], x8 +# CHECK-NEXT: 1 2 0.50 mov v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 mov v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: 1 2 0.50 movi v0.16b, #31 +# CHECK-NEXT: 1 2 0.50 movi v0.2d, #0xff0000ff0000ffff +# CHECK-NEXT: 1 2 0.50 movi v0.2s, #8, msl #8 +# CHECK-NEXT: 1 2 0.50 movi v0.4s, #255, lsl #24 +# CHECK-NEXT: 1 2 0.50 movi v0.8b, #255 +# CHECK-NEXT: 1 3 0.50 mul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 mvni v0.2s, #0 +# CHECK-NEXT: 1 2 0.50 mvni v0.4s, #16, msl #16 +# CHECK-NEXT: 1 3 0.50 neg d29, d24 +# CHECK-NEXT: 1 3 0.50 neg v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 neg v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 neg v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 neg v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 neg v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 neg v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 neg v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 mvn v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 mvn v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 orn v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 mov v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 orr v0.8h, #31 +# CHECK-NEXT: 1 2 0.50 pmul v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 pmul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 pmull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 pmull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 2 6 1.00 raddhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 2 6 1.00 raddhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 2 6 1.00 raddhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 2 6 1.00 raddhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 2 6 1.00 raddhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 2 6 1.00 raddhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 rbit v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 rbit v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 rev16 v21.8b, v1.8b +# CHECK-NEXT: 1 2 0.50 rev16 v30.16b, v31.16b +# CHECK-NEXT: 1 2 0.50 rev32 v0.4h, v9.4h +# CHECK-NEXT: 1 2 0.50 rev32 v21.8b, v1.8b +# CHECK-NEXT: 1 2 0.50 rev32 v30.16b, v31.16b +# CHECK-NEXT: 1 2 0.50 rev32 v4.8h, v7.8h +# CHECK-NEXT: 1 2 0.50 rev64 v0.16b, v31.16b +# CHECK-NEXT: 1 2 0.50 rev64 v1.8b, v9.8b +# CHECK-NEXT: 1 2 0.50 rev64 v13.4h, v21.4h +# CHECK-NEXT: 1 2 0.50 rev64 v2.8h, v4.8h +# CHECK-NEXT: 1 2 0.50 rev64 v4.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 rev64 v6.4s, v8.4s +# CHECK-NEXT: 2 6 1.00 rshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 2 6 1.00 rshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 2 6 1.00 rshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 2 6 1.00 rshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 2 6 1.00 rshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 2 6 1.00 rshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 2 6 1.00 rsubhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 2 6 1.00 rsubhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 2 6 1.00 rsubhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 2 6 1.00 rsubhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 2 6 1.00 rsubhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 2 6 1.00 rsubhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 saba v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 sabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 sabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 sabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 sabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 sabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 sabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 sabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 sabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 sabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 sabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 sadalp v0.1d, v0.2s +# CHECK-NEXT: 1 2 0.50 sadalp v0.2d, v0.4s +# CHECK-NEXT: 1 2 0.50 sadalp v0.2s, v0.4h +# CHECK-NEXT: 1 2 0.50 sadalp v0.4h, v0.8b +# CHECK-NEXT: 1 2 0.50 sadalp v0.4s, v0.8h +# CHECK-NEXT: 1 2 0.50 sadalp v0.8h, v0.16b +# CHECK-NEXT: 1 2 0.50 saddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 saddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 saddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 saddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 saddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 saddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 saddlp v0.1d, v0.2s +# CHECK-NEXT: 1 2 0.50 saddlp v0.2d, v0.4s +# CHECK-NEXT: 1 2 0.50 saddlp v0.2s, v0.4h +# CHECK-NEXT: 1 2 0.50 saddlp v0.4h, v0.8b +# CHECK-NEXT: 1 2 0.50 saddlp v0.4s, v0.8h +# CHECK-NEXT: 1 2 0.50 saddlp v0.8h, v0.16b +# CHECK-NEXT: 1 2 0.50 saddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 2 0.50 saddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 2 0.50 saddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 2 0.50 saddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 2 0.50 saddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 2 0.50 saddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 3 0.50 scvtf d21, d12 +# CHECK-NEXT: 1 3 0.50 scvtf d21, d12, #64 +# CHECK-NEXT: 1 3 0.50 scvtf s22, s13 +# CHECK-NEXT: 1 3 0.50 scvtf s22, s13, #32 +# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 scvtf v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 scvtf v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 scvtf v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 scvtf v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 shadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 shl d7, d10, #12 +# CHECK-NEXT: 1 3 0.50 shl v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 0.50 shl v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 shl v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 0.50 shl v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 shll v0.2d, v0.2s, #32 +# CHECK-NEXT: 1 3 0.50 shll v0.4s, v0.4h, #16 +# CHECK-NEXT: 1 3 0.50 shll v0.8h, v0.8b, #8 +# CHECK-NEXT: 1 3 0.50 shll v0.2d, v0.2s, #32 +# CHECK-NEXT: 1 3 0.50 shll v0.4s, v0.4h, #16 +# CHECK-NEXT: 1 3 0.50 shll v0.8h, v0.8b, #8 +# CHECK-NEXT: 1 3 0.50 shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: 1 3 0.50 shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: 1 3 0.50 shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: 1 3 0.50 shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: 1 3 0.50 shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: 1 3 0.50 shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: 2 6 1.00 shrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 2 6 1.00 shrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 2 6 1.00 shrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 2 6 1.00 shrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 2 6 1.00 shrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 2 6 1.00 shrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 shsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 shsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 sli d10, d14, #12 +# CHECK-NEXT: 1 3 0.50 sli v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 0.50 sli v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 sli v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 sli v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 0.50 sli v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 sli v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 0.50 sli v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 smax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 smax v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 smax v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 smaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 smaxp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 smaxp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 smin v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 smin v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 smin v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 sminp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 sminp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 sminp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 smlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 smlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 smlal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 smlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 smlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 smlal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 smlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 smlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 smlsl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 smlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 smlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 smlsl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 smull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 smull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 smull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 smull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 smull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 smull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 sqabs b19, b14 +# CHECK-NEXT: 1 2 0.50 sqabs d18, d12 +# CHECK-NEXT: 1 2 0.50 sqabs h21, h15 +# CHECK-NEXT: 1 2 0.50 sqabs s20, s12 +# CHECK-NEXT: 1 2 0.50 sqabs v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 sqabs v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 sqabs v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 sqabs v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sqabs v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 sqabs v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 sqabs v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 sqadd b20, b11, b15 +# CHECK-NEXT: 1 2 0.50 sqadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 sqadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 sqdmlal d19, s24, s12 +# CHECK-NEXT: 1 3 0.50 sqdmlal d8, s9, v0.s[1] +# CHECK-NEXT: 1 3 0.50 sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: 1 3 0.50 sqdmlal s17, h27, h12 +# CHECK-NEXT: 1 3 0.50 sqdmlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 sqdmlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 sqdmlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 sqdmlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 sqdmlsl d12, s23, s13 +# CHECK-NEXT: 1 3 0.50 sqdmlsl d8, s9, v0.s[1] +# CHECK-NEXT: 1 3 0.50 sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: 1 3 0.50 sqdmlsl s14, h12, h25 +# CHECK-NEXT: 1 3 0.50 sqdmlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 sqdmlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 sqdmlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 sqdmlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 sqdmulh h10, h11, h12 +# CHECK-NEXT: 1 3 0.50 sqdmulh h7, h15, v0.h[3] +# CHECK-NEXT: 1 3 0.50 sqdmulh s15, s14, v0.s[1] +# CHECK-NEXT: 1 3 0.50 sqdmulh s20, s21, s2 +# CHECK-NEXT: 1 3 0.50 sqdmulh v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 sqdmulh v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 sqdmull d1, s1, v0.s[1] +# CHECK-NEXT: 1 3 0.50 sqdmull d15, s22, s12 +# CHECK-NEXT: 1 3 0.50 sqdmull s1, h1, v0.h[3] +# CHECK-NEXT: 1 3 0.50 sqdmull s12, h22, h12 +# CHECK-NEXT: 1 3 0.50 sqdmull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 sqdmull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 sqdmull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 sqdmull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 sqneg b19, b14 +# CHECK-NEXT: 1 2 0.50 sqneg d18, d12 +# CHECK-NEXT: 1 2 0.50 sqneg h21, h15 +# CHECK-NEXT: 1 2 0.50 sqneg s20, s12 +# CHECK-NEXT: 1 2 0.50 sqneg v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 sqneg v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 sqneg v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 sqneg v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sqneg v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 sqneg v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 sqneg v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 sqrdmulh h10, h11, h12 +# CHECK-NEXT: 1 3 0.50 sqrdmulh h7, h15, v0.h[3] +# CHECK-NEXT: 1 3 0.50 sqrdmulh s15, s14, v0.s[1] +# CHECK-NEXT: 1 3 0.50 sqrdmulh s20, s21, s2 +# CHECK-NEXT: 1 3 0.50 sqrdmulh v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 sqrdmulh v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 sqrshl d31, d31, d31 +# CHECK-NEXT: 1 2 0.50 sqrshl h3, h4, h15 +# CHECK-NEXT: 1 2 0.50 sqrshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 sqrshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sqrshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 sqrshrn b10, h13, #2 +# CHECK-NEXT: 1 3 0.50 sqrshrn h15, s10, #6 +# CHECK-NEXT: 1 3 0.50 sqrshrn s15, d12, #9 +# CHECK-NEXT: 1 2 0.50 sqrshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 sqrshrun b17, h10, #6 +# CHECK-NEXT: 1 3 0.50 sqrshrun h10, s13, #15 +# CHECK-NEXT: 1 3 0.50 sqrshrun s22, d16, #31 +# CHECK-NEXT: 1 2 0.50 sqrshrun v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrun v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrun v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrun2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrun2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 sqrshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 sqshl b11, b19, #7 +# CHECK-NEXT: 1 2 0.50 sqshl d15, d16, #51 +# CHECK-NEXT: 1 2 0.50 sqshl d31, d31, d31 +# CHECK-NEXT: 1 2 0.50 sqshl h13, h18, #11 +# CHECK-NEXT: 1 2 0.50 sqshl h3, h4, h15 +# CHECK-NEXT: 1 2 0.50 sqshl s14, s17, #22 +# CHECK-NEXT: 1 2 0.50 sqshl v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 sqshl v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 sqshl v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 sqshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 sqshl v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 sqshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sqshl v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 sqshl v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 sqshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 sqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 sqshlu b15, b18, #6 +# CHECK-NEXT: 1 2 0.50 sqshlu d11, d13, #32 +# CHECK-NEXT: 1 2 0.50 sqshlu h19, h17, #6 +# CHECK-NEXT: 1 2 0.50 sqshlu s16, s14, #25 +# CHECK-NEXT: 1 2 0.50 sqshlu v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 sqshlu v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 sqshlu v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 sqshlu v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 sqshlu v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 sqshlu v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 sqshlu v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 sqshrn b10, h15, #5 +# CHECK-NEXT: 1 3 0.50 sqshrn h17, s10, #4 +# CHECK-NEXT: 1 3 0.50 sqshrn s18, d10, #31 +# CHECK-NEXT: 2 6 1.00 sqshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 2 6 1.00 sqshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 2 6 1.00 sqshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 2 6 1.00 sqshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 2 6 1.00 sqshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 2 6 1.00 sqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 sqshrun b15, h10, #7 +# CHECK-NEXT: 1 3 0.50 sqshrun h20, s14, #3 +# CHECK-NEXT: 1 3 0.50 sqshrun s10, d15, #15 +# CHECK-NEXT: 2 6 1.00 sqshrun v0.2s, v0.2d, #3 +# CHECK-NEXT: 2 6 1.00 sqshrun v0.4h, v0.4s, #3 +# CHECK-NEXT: 2 6 1.00 sqshrun v0.8b, v0.8h, #3 +# CHECK-NEXT: 2 6 1.00 sqshrun2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 2 6 1.00 sqshrun2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 2 6 1.00 sqshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 sqsub s20, s10, s7 +# CHECK-NEXT: 1 2 0.50 sqsub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 sqsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 sqsub v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 sqxtn b18, h18 +# CHECK-NEXT: 1 2 0.50 sqxtn h20, s17 +# CHECK-NEXT: 1 2 0.50 sqxtn s19, d14 +# CHECK-NEXT: 1 2 0.50 sqxtn v0.2s, v0.2d +# CHECK-NEXT: 1 2 0.50 sqxtn v0.4h, v0.4s +# CHECK-NEXT: 1 2 0.50 sqxtn v0.8b, v0.8h +# CHECK-NEXT: 1 2 0.50 sqxtn2 v0.16b, v0.8h +# CHECK-NEXT: 1 2 0.50 sqxtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 2 0.50 sqxtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 2 0.50 sqxtun b19, h14 +# CHECK-NEXT: 1 2 0.50 sqxtun h21, s15 +# CHECK-NEXT: 1 2 0.50 sqxtun s20, d12 +# CHECK-NEXT: 1 2 0.50 sqxtun v0.2s, v0.2d +# CHECK-NEXT: 1 2 0.50 sqxtun v0.4h, v0.4s +# CHECK-NEXT: 1 2 0.50 sqxtun v0.8b, v0.8h +# CHECK-NEXT: 1 2 0.50 sqxtun2 v0.16b, v0.8h +# CHECK-NEXT: 1 2 0.50 sqxtun2 v0.4s, v0.2d +# CHECK-NEXT: 1 2 0.50 sqxtun2 v0.8h, v0.4s +# CHECK-NEXT: 1 2 0.50 srhadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 srhadd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 srhadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 sri d10, d12, #14 +# CHECK-NEXT: 1 3 0.50 sri v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 0.50 sri v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 sri v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 sri v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 0.50 sri v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 sri v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 0.50 sri v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 srshl d16, d16, d16 +# CHECK-NEXT: 1 3 0.50 srshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 srshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 srshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 srshr d19, d18, #7 +# CHECK-NEXT: 1 3 0.50 srshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 srsra d15, d11, #19 +# CHECK-NEXT: 1 2 0.50 srsra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 srsra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 srsra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 srsra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 srsra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 srsra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 srsra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 sshl d31, d31, d31 +# CHECK-NEXT: 1 2 0.50 sshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 sshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 sshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 sshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 sshll v0.2d, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 sshll2 v0.4s, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 sshr d15, d16, #12 +# CHECK-NEXT: 1 3 0.50 sshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 0.50 sshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 sshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 sshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 0.50 sshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 sshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 0.50 sshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 ssra d18, d12, #21 +# CHECK-NEXT: 1 2 0.50 ssra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 ssra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 ssra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 ssra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 ssra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 ssra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 ssra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 ssubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 ssubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 ssubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 ssubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 ssubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 ssubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 ssubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 2 0.50 ssubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 2 0.50 ssubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 2 0.50 ssubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 2 0.50 ssubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 2 0.50 ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 2 2 1.00 * st1 { v0.16b }, [x0] +# CHECK-NEXT: 6 4 3.00 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: 8 5 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: 4 3 2.00 * st1 { v0.4s, v1.4s }, [sp], #32 +# CHECK-NEXT: 6 4 3.00 * st1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: 8 5 4.00 * st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: 2 2 1.00 * st1 { v0.8h }, [x15], x2 +# CHECK-NEXT: 4 3 2.00 * st1 { v0.8h, v1.8h }, [x15] +# CHECK-NEXT: 3 4 1.00 * st1 { v0.d }[1], [x0] +# CHECK-NEXT: 3 4 1.00 * st1 { v0.d }[1], [x0], #8 +# CHECK-NEXT: 6 5 2.00 * st2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: 6 6 2.00 * st2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: 6 5 2.00 * st2 { v0.s, v1.s }[3], [sp] +# CHECK-NEXT: 6 5 2.00 * st2 { v0.s, v1.s }[3], [sp], #8 +# CHECK-NEXT: 9 6 3.00 * st3 { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: 9 6 3.00 * st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +# CHECK-NEXT: 9 6 3.00 * st3 { v0.h, v1.h, v2.h }[7], [x15] +# CHECK-NEXT: 9 6 3.00 * st3 { v0.h, v1.h, v2.h }[7], [x15], #6 +# CHECK-NEXT: 14 9 4.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: 12 7 4.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: 12 7 4.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +# CHECK-NEXT: 12 7 4.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 +# CHECK-NEXT: 1 2 0.50 sub d15, d5, d16 +# CHECK-NEXT: 1 2 0.50 sub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 suqadd b19, b14 +# CHECK-NEXT: 1 2 0.50 suqadd d18, d22 +# CHECK-NEXT: 1 2 0.50 suqadd h20, h15 +# CHECK-NEXT: 1 2 0.50 suqadd s21, s12 +# CHECK-NEXT: 1 2 0.50 suqadd v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 suqadd v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 suqadd v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 suqadd v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 suqadd v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 suqadd v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 suqadd v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 tbl v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: 2 4 1.00 tbl v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: 3 6 1.50 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: 4 8 2.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: 1 2 0.50 tbl v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: 2 4 1.00 tbl v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: 3 6 1.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: 4 8 2.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: 1 2 0.50 tbx v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: 2 4 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: 3 6 1.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: 4 8 2.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: 1 2 0.50 tbx v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: 2 4 1.00 tbx v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: 3 6 1.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: 4 8 2.00 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: 1 2 0.50 trn1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 trn1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 trn1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 trn1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 trn1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 trn1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 trn1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 trn2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 trn2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 trn2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 trn2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 trn2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 trn2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 trn2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 uaba v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 uabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 uabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 uabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 uabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 uabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 uabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 uabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 uabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uadalp v0.1d, v0.2s +# CHECK-NEXT: 1 2 0.50 uadalp v0.2d, v0.4s +# CHECK-NEXT: 1 2 0.50 uadalp v0.2s, v0.4h +# CHECK-NEXT: 1 2 0.50 uadalp v0.4h, v0.8b +# CHECK-NEXT: 1 2 0.50 uadalp v0.4s, v0.8h +# CHECK-NEXT: 1 2 0.50 uadalp v0.8h, v0.16b +# CHECK-NEXT: 1 2 0.50 uaddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uaddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 uaddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 uaddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uaddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 uaddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uaddlp v0.1d, v0.2s +# CHECK-NEXT: 1 2 0.50 uaddlp v0.2d, v0.4s +# CHECK-NEXT: 1 2 0.50 uaddlp v0.2s, v0.4h +# CHECK-NEXT: 1 2 0.50 uaddlp v0.4h, v0.8b +# CHECK-NEXT: 1 2 0.50 uaddlp v0.4s, v0.8h +# CHECK-NEXT: 1 2 0.50 uaddlp v0.8h, v0.16b +# CHECK-NEXT: 1 2 0.50 uaddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 2 0.50 uaddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 2 0.50 uaddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 2 0.50 uaddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 2 0.50 uaddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 2 0.50 uaddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 3 0.50 ucvtf d21, d14 +# CHECK-NEXT: 1 3 0.50 ucvtf d21, d14, #64 +# CHECK-NEXT: 1 3 0.50 ucvtf s22, s13 +# CHECK-NEXT: 1 3 0.50 ucvtf s22, s13, #32 +# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 ucvtf v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 ucvtf v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 ucvtf v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 ucvtf v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 uhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 uhsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 umax v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 umax v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 umax v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 umaxp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 umaxp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 umaxp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 umin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 umin v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 umin v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 uminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uminp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 uminp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 umlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 umlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 umlal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 umlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 umlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 umlal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 umlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 umlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 umlsl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 umlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 umlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 umlsl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 umull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 umull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 umull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 umull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 umull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 umull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uqadd h0, h1, h5 +# CHECK-NEXT: 1 2 0.50 uqadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 uqrshl b11, b20, b30 +# CHECK-NEXT: 1 2 0.50 uqrshl s23, s20, s16 +# CHECK-NEXT: 1 2 0.50 uqrshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uqrshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uqrshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uqrshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 uqrshrn b10, h12, #5 +# CHECK-NEXT: 1 3 0.50 uqrshrn h12, s10, #14 +# CHECK-NEXT: 1 3 0.50 uqrshrn s10, d10, #25 +# CHECK-NEXT: 1 2 0.50 uqrshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 uqrshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 uqrshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 uqrshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 uqrshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 uqrshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 uqshl b11, b20, b30 +# CHECK-NEXT: 1 2 0.50 uqshl b18, b15, #6 +# CHECK-NEXT: 1 2 0.50 uqshl d15, d12, #19 +# CHECK-NEXT: 1 2 0.50 uqshl h11, h18, #7 +# CHECK-NEXT: 1 2 0.50 uqshl s14, s19, #18 +# CHECK-NEXT: 1 2 0.50 uqshl s23, s20, s16 +# CHECK-NEXT: 1 2 0.50 uqshl v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 uqshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uqshl v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 uqshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 uqshl v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 uqshl v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 uqshl v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 uqshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uqshl v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 uqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 uqshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 uqshrn b12, h10, #7 +# CHECK-NEXT: 1 3 0.50 uqshrn h10, s14, #5 +# CHECK-NEXT: 1 3 0.50 uqshrn s10, d12, #13 +# CHECK-NEXT: 1 2 0.50 uqshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 uqshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 uqshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 uqshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 uqshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 uqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 uqsub d16, d16, d16 +# CHECK-NEXT: 1 2 0.50 uqsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 2 6 1.00 uqxtn b18, h18 +# CHECK-NEXT: 2 6 1.00 uqxtn h20, s17 +# CHECK-NEXT: 2 6 1.00 uqxtn s19, d14 +# CHECK-NEXT: 2 6 1.00 uqxtn v0.2s, v0.2d +# CHECK-NEXT: 2 6 1.00 uqxtn v0.4h, v0.4s +# CHECK-NEXT: 2 6 1.00 uqxtn v0.8b, v0.8h +# CHECK-NEXT: 2 6 1.00 uqxtn2 v0.16b, v0.8h +# CHECK-NEXT: 2 6 1.00 uqxtn2 v0.4s, v0.2d +# CHECK-NEXT: 2 6 1.00 uqxtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 2 0.50 urecpe v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 urecpe v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 urhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 urhadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 urhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 urshl d8, d7, d4 +# CHECK-NEXT: 1 3 0.50 urshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 urshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 urshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 urshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 urshr d20, d23, #31 +# CHECK-NEXT: 1 3 0.50 urshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 ursqrte v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 ursqrte v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 ursra d18, d10, #13 +# CHECK-NEXT: 1 2 0.50 ursra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 ursra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 ursra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 ursra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 ursra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 ursra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 ursra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 ushl d0, d0, d0 +# CHECK-NEXT: 1 2 0.50 ushl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 ushl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 ushl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 ushll v0.4s, v0.4h, #3 +# CHECK-NEXT: 1 3 0.50 ushll2 v0.8h, v0.16b, #3 +# CHECK-NEXT: 1 3 0.50 ushr d10, d17, #18 +# CHECK-NEXT: 1 3 0.50 ushr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 0.50 ushr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 ushr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 ushr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 0.50 ushr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 ushr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 0.50 ushr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 usqadd b19, b14 +# CHECK-NEXT: 1 2 0.50 usqadd d18, d22 +# CHECK-NEXT: 1 2 0.50 usqadd h20, h15 +# CHECK-NEXT: 1 2 0.50 usqadd s21, s12 +# CHECK-NEXT: 1 2 0.50 usqadd v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 usqadd v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 usqadd v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 usqadd v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 usqadd v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 usqadd v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 usqadd v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 usra d20, d13, #61 +# CHECK-NEXT: 1 2 0.50 usra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 usra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 usra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 usra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 usra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 usra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 usra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 usubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 usubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 usubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 usubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 usubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 usubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 usubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 2 0.50 usubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 2 0.50 usubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 2 0.50 usubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 2 0.50 usubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 2 0.50 usubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 2 0.50 uzp1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uzp1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 uzp1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uzp1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 uzp1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uzp1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 uzp1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 uzp2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 uzp2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 uzp2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uzp2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 uzp2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uzp2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 uzp2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 xtn v0.2s, v0.2d +# CHECK-NEXT: 1 2 0.50 xtn v0.4h, v0.4s +# CHECK-NEXT: 1 2 0.50 xtn v0.8b, v0.8h +# CHECK-NEXT: 1 2 0.50 xtn2 v0.16b, v0.8h +# CHECK-NEXT: 1 2 0.50 xtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 2 0.50 xtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 2 0.50 zip1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 zip1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 zip1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 zip1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 zip1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 zip1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 zip1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 zip2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 zip2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 zip2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 zip2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 zip2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 zip2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 zip2 v0.8h, v0.8h, v0.8h + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - Ampere1BUnitA +# CHECK-NEXT: [0.1] - Ampere1BUnitA +# CHECK-NEXT: [1.0] - Ampere1BUnitB +# CHECK-NEXT: [1.1] - Ampere1BUnitB +# CHECK-NEXT: [2] - Ampere1BUnitBS +# CHECK-NEXT: [3.0] - Ampere1BUnitL +# CHECK-NEXT: [3.1] - Ampere1BUnitL +# CHECK-NEXT: [4.0] - Ampere1BUnitS +# CHECK-NEXT: [4.1] - Ampere1BUnitS +# CHECK-NEXT: [5] - Ampere1BUnitX +# CHECK-NEXT: [6] - Ampere1BUnitY +# CHECK-NEXT: [7] - Ampere1BUnitZ + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] +# CHECK-NEXT: - - - - 11.00 51.00 51.00 29.00 29.00 604.50 584.50 58.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions: +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - abs d29, d24 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - abs v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - abs v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - abs v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - abs v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - abs v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - abs v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - abs v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - add d17, d31, d29 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - add v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - addhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - addhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - addhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - addhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - addhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - addhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - addp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - addp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - and v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - bic v0.4h, #15, lsl #8 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - bic v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - bif v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - bit v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - bsl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cls v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cls v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cls v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cls v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cls v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cls v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - clz v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - clz v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - clz v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - clz v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - clz v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - clz v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmeq d20, d21, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmeq d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmeq v0.16b, v0.16b, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmeq v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmge d20, d21, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmge d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmge v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmge v0.8b, v0.8b, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmgt d20, d21, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmgt d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmgt v0.2s, v0.2s, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmhi d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmhi v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmhs d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmhs v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmle d20, d21, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmle v0.2d, v0.2d, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmlt d20, d21, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmlt v0.8h, v0.8h, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmtst d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cmtst v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cnt v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - cnt v0.8b, v0.8b +# CHECK-NEXT: - - - - 1.00 - - - - - - - dup v0.16b, w28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - dup v0.2d, x28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - dup v0.2s, w28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - dup v0.4h, w28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - dup v0.4s, w28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - dup v0.8b, w28 +# CHECK-NEXT: - - - - 1.00 - - - - - - - dup v0.8h, w28 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - eor v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ext v0.16b, v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ext v0.8b, v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabd d29, d24, d20 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabd s29, s24, s20 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabs v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabs v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabs v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabs v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fabs v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - facge d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - facge s10, s11, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - facge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - facgt d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - facgt s10, s11, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - facgt v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - faddp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - faddp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmeq d20, d21, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmeq d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmeq s10, s11, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmeq s10, s11, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmeq v0.2s, v0.2s, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmeq v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmge d20, d21, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmge d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmge s10, s11, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmge s10, s11, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmge v0.2d, v0.2d, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmgt d20, d21, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmgt d20, d21, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmgt s10, s11, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmgt s10, s11, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmgt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmle d20, d21, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmle s10, s11, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmle v0.2d, v0.2d, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmlt d20, d21, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmlt s10, s11, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcmlt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtas d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtas s12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtas v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtas v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtas v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtas v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtas v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtau d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtau s12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtau v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtau v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtau v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtau v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtau v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtl v0.2d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtl v0.4s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtl2 v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtl2 v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtms d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtms s22, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtms v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtms v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtms v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtms v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtms v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtmu d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtmu s12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtmu v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtmu v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtmu v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtmu v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtmu v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtn v0.4h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtns d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtns s22, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtns v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtns v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtns v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtns v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtns v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtnu d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtnu s12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtnu v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtnu v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtnu v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtnu v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtnu v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtps d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtps s22, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtps v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtps v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtps v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtps v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtps v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtpu d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtpu s12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtpu v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtpu v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtpu v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtpu v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtpu v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtxn s22, d13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtxn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtxn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs d21, d12, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs s12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs s21, s12, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzs v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu d21, d12, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu s12, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu s21, s12, #1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fcvtzu v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 - - fdiv v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmax v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmax v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxnm v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxnm v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxnm v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxnmp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxnmp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxnmp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmaxp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmin v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmin v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminnm v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminnm v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminnm v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminnmp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminnmp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminnmp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fminp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmla d0, d1, v0.d[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmla s0, s1, v0.s[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmla v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmls d0, d4, v0.d[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmls s3, s5, v0.s[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmls v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov v0.2d, #-1.25000000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov v0.2s, #13.00000000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmov v0.4s, #1.00000000 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmul d0, d1, v0.d[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmul s0, s1, v0.s[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmul v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmulx d0, d4, v0.d[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmulx d23, d11, d1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmulx s20, s22, s15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmulx s3, s5, v0.s[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmulx v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmulx v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fmulx v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fneg v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fneg v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fneg v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fneg v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fneg v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 - - frecpe d13, d13 +# CHECK-NEXT: - - - - - - - - - 1.00 - - frecpe s19, s14 +# CHECK-NEXT: - - - - - - - - - 1.00 - - frecpe v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 - - frecpe v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 1.00 - - frecpe v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 1.00 - - frecpe v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 1.00 - - frecpe v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frecps v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frecps d22, d30, d21 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frecps s21, s16, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frecpx d16, d19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frecpx s18, s10 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinta v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinta v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinta v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinta v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinta v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinti v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinti v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinti v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinti v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frinti v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintm v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintm v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintm v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintm v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintm v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintn v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintn v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintn v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintn v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintn v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintp v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintp v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintp v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintp v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintp v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintx v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintx v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintx v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintx v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintx v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintz v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintz v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintz v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintz v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frintz v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 - - frsqrte d21, d12 +# CHECK-NEXT: - - - - - - - - - 1.00 - - frsqrte s22, s13 +# CHECK-NEXT: - - - - - - - - - 1.00 - - frsqrte v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 - - frsqrte v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 1.00 - - frsqrte v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 1.00 - - frsqrte v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 1.00 - - frsqrte v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frsqrts d8, d22, d18 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frsqrts s21, s5, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - frsqrts v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 - - fsqrt v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 - - fsqrt v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 1.00 - - fsqrt v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 1.00 - - fsqrt v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 1.00 - - fsqrt v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ld1 { v0.16b }, [x0] +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: - - - - - 2.00 2.00 - - - - - ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - ld1 { v0.4s, v1.4s }, [sp], #32 +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - ld1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: - - - - - 2.00 2.00 - - - - - ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - ld1 { v0.8h }, [x15], x2 +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - ld1 { v0.8h, v1.8h }, [x15] +# CHECK-NEXT: - - - - - 0.50 0.50 - - 0.50 0.50 - ld1 { v0.b }[9], [x0] +# CHECK-NEXT: - - - - - 0.50 0.50 - - 0.50 0.50 - ld1 { v0.b }[9], [x0], #1 +# CHECK-NEXT: - - - - - 0.50 0.50 - - 0.50 0.50 - ld1r { v0.16b }, [x0] +# CHECK-NEXT: - - - - - 0.50 0.50 - - 0.50 0.50 - ld1r { v0.16b }, [x0], #1 +# CHECK-NEXT: - - - - - 0.50 0.50 - - 0.50 0.50 - ld1r { v0.8h }, [x15] +# CHECK-NEXT: - - - - - 0.50 0.50 - - 0.50 0.50 - ld1r { v0.8h }, [x15], #2 +# CHECK-NEXT: - - - - - 1.00 1.00 - - 1.00 1.00 - ld2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: - - - - - 1.00 1.00 - - 1.50 1.50 - ld2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: - - - - - 1.00 1.00 - - 1.00 1.00 - ld2 { v0.h, v1.h }[7], [x15] +# CHECK-NEXT: - - - - - 1.00 1.00 - - 1.00 1.00 - ld2 { v0.h, v1.h }[7], [x15], #4 +# CHECK-NEXT: - - - - - 1.00 1.00 - - 1.00 1.00 - ld2r { v0.2d, v1.2d }, [x0] +# CHECK-NEXT: - - - - - 1.00 1.00 - - 1.00 1.00 - ld2r { v0.2d, v1.2d }, [x0], #16 +# CHECK-NEXT: - - - - - 1.00 1.00 - - 1.00 1.00 - ld2r { v0.4s, v1.4s }, [sp] +# CHECK-NEXT: - - - - - 1.00 1.00 - - 1.00 1.00 - ld2r { v0.4s, v1.4s }, [sp], #8 +# CHECK-NEXT: - - - - - 1.50 1.50 - - 1.50 1.50 - ld3 { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: - - - - - 1.50 1.50 - - 1.50 1.50 - ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +# CHECK-NEXT: - - - - - 1.50 1.50 - - 1.50 1.50 - ld3 { v0.s, v1.s, v2.s }[3], [sp] +# CHECK-NEXT: - - - - - 1.50 1.50 - - 1.50 1.50 - ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 +# CHECK-NEXT: - - - - - 1.50 1.50 - - 1.50 1.50 - ld3r { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: - - - - - 1.50 1.50 - - 1.50 1.50 - ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 +# CHECK-NEXT: - - - - - 1.50 1.50 - - 1.50 1.50 - ld3r { v0.8b, v1.8b, v2.8b }, [x0] +# CHECK-NEXT: - - - - - 1.50 1.50 - - 1.50 1.50 - ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +# CHECK-NEXT: - - - - - 2.00 2.00 - - 2.00 2.00 - ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: - - - - - 2.00 2.00 - - 2.00 2.00 - ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: - - - - - 2.00 2.00 - - 2.00 2.00 - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +# CHECK-NEXT: - - - - - 2.00 2.00 - - 2.00 2.00 - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +# CHECK-NEXT: - - - - - 2.00 2.00 - - 2.00 2.00 - ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +# CHECK-NEXT: - - - - - 2.00 2.00 - - - - - ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] +# CHECK-NEXT: - - - - - 2.00 2.00 - - - - - ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 +# CHECK-NEXT: - - - - - 2.00 2.00 - - 2.00 2.00 - ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: - - - - - 2.00 2.00 - - 2.00 2.00 - ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mla v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mls v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov b0, v0.b[15] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov d6, v0.d[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov h2, v0.h[5] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov s17, v0.s[2] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov v2.b[0], v0.b[0] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov v2.h[1], v0.h[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov v2.s[2], v0.s[2] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov v2.d[1], v0.d[1] +# CHECK-NEXT: - - - - 1.00 - - - - 0.50 0.50 - mov v0.b[0], w8 +# CHECK-NEXT: - - - - 1.00 - - - - 0.50 0.50 - mov v0.h[1], w8 +# CHECK-NEXT: - - - - 1.00 - - - - 0.50 0.50 - mov v0.s[2], w8 +# CHECK-NEXT: - - - - 1.00 - - - - 0.50 0.50 - mov v0.d[1], x8 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - movi v0.16b, #31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - movi v0.2d, #0xff0000ff0000ffff +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - movi v0.2s, #8, msl #8 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - movi v0.4s, #255, lsl #24 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - movi v0.8b, #255 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mvni v0.2s, #0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mvni v0.4s, #16, msl #16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - neg d29, d24 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - neg v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - neg v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - neg v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - neg v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - neg v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - neg v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - neg v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mvn v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mvn v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - orn v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - mov v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - orr v0.8h, #31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - pmul v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - pmul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - pmull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - pmull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - raddhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - raddhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - raddhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - raddhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - raddhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - raddhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rbit v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rbit v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev16 v21.8b, v1.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev16 v30.16b, v31.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev32 v0.4h, v9.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev32 v21.8b, v1.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev32 v30.16b, v31.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev32 v4.8h, v7.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev64 v0.16b, v31.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev64 v1.8b, v9.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev64 v13.4h, v21.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev64 v2.8h, v4.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev64 v4.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - rev64 v6.4s, v8.4s +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rsubhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rsubhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rsubhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rsubhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rsubhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - rsubhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saba v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sadalp v0.1d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sadalp v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sadalp v0.2s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sadalp v0.4h, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sadalp v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sadalp v0.8h, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddlp v0.1d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddlp v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddlp v0.2s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddlp v0.4h, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddlp v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddlp v0.8h, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - saddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf d21, d12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf d21, d12, #64 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf s22, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf s22, s13, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - scvtf v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shl d7, d10, #12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shl v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shl v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shl v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shl v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll v0.2d, v0.2s, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll v0.4s, v0.4h, #16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll v0.8h, v0.8b, #8 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll v0.2d, v0.2s, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll v0.4s, v0.4h, #16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll v0.8h, v0.8b, #8 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - shrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - shrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - shrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - shrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - shrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - shrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - shsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sli d10, d14, #12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sli v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sli v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sli v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sli v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sli v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sli v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sli v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smax v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smax v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smaxp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smaxp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smin v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smin v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smin v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sminp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sminp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sminp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlsl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smlsl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - smull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs b19, b14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs d18, d12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs h21, h15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs s20, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqabs v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqadd b20, b11, b15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlal d19, s24, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlal d8, s9, v0.s[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlal s17, h27, h12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlsl d12, s23, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlsl d8, s9, v0.s[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlsl s14, h12, h25 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmulh h10, h11, h12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmulh h7, h15, v0.h[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmulh s15, s14, v0.s[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmulh s20, s21, s2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmulh v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmulh v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmull d1, s1, v0.s[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmull d15, s22, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmull s1, h1, v0.h[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmull s12, h22, h12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqdmull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg b19, b14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg d18, d12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg h21, h15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg s20, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqneg v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrdmulh h10, h11, h12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrdmulh h7, h15, v0.h[3] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrdmulh s15, s14, v0.s[1] +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrdmulh s20, s21, s2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrdmulh v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrdmulh v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshl d31, d31, d31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshl h3, h4, h15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn b10, h13, #2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn h15, s10, #6 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn s15, d12, #9 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun b17, h10, #6 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun h10, s13, #15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun s22, d16, #31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqrshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl b11, b19, #7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl d15, d16, #51 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl d31, d31, d31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl h13, h18, #11 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl h3, h4, h15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl s14, s17, #22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu b15, b18, #6 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu d11, d13, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu h19, h17, #6 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu s16, s14, #25 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshlu v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshrn b10, h15, #5 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshrn h17, s10, #4 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshrn s18, d10, #31 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshrun b15, h10, #7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshrun h20, s14, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqshrun s10, d15, #15 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrun v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrun v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrun v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrun2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrun2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - sqshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqsub s20, s10, s7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqsub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqsub v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn b18, h18 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn h20, s17 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn s19, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn v0.4h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn v0.8b, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn2 v0.16b, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun b19, h14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun h21, s15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun s20, d12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun v0.2s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun v0.4h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun v0.8b, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun2 v0.16b, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sqxtun2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srhadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srhadd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srhadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sri d10, d12, #14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sri v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sri v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sri v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sri v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sri v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sri v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sri v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshl d16, d16, d16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshr d19, d18, #7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshr v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshr v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshr v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srshr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srsra d15, d11, #19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srsra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srsra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srsra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srsra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srsra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srsra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - srsra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshl d31, d31, d31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshll v0.2d, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshll2 v0.4s, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshr d15, d16, #12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshr v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshr v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshr v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sshr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssra d18, d12, #21 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 st1 { v0.16b }, [x0] +# CHECK-NEXT: - - - - - - - 1.50 1.50 - - 3.00 st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: - - - - - - - 2.00 2.00 - - 4.00 st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 st1 { v0.4s, v1.4s }, [sp], #32 +# CHECK-NEXT: - - - - - - - 1.50 1.50 - - 3.00 st1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: - - - - - - - 2.00 2.00 - - 4.00 st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - 1.00 st1 { v0.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - 2.00 st1 { v0.8h, v1.8h }, [x15] +# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 1.00 st1 { v0.d }[1], [x0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 1.00 st1 { v0.d }[1], [x0], #8 +# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 1.00 2.00 st2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 1.00 2.00 st2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 1.00 2.00 st2 { v0.s, v1.s }[3], [sp] +# CHECK-NEXT: - - - - - - - 1.00 1.00 1.00 1.00 2.00 st2 { v0.s, v1.s }[3], [sp], #8 +# CHECK-NEXT: - - - - - - - 1.50 1.50 1.50 1.50 3.00 st3 { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: - - - - - - - 1.50 1.50 1.50 1.50 3.00 st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - 1.50 1.50 1.50 1.50 3.00 st3 { v0.h, v1.h, v2.h }[7], [x15] +# CHECK-NEXT: - - - - - - - 1.50 1.50 1.50 1.50 3.00 st3 { v0.h, v1.h, v2.h }[7], [x15], #6 +# CHECK-NEXT: - - - - - - - 2.00 2.00 3.00 3.00 4.00 st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 2.00 4.00 st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 2.00 4.00 st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +# CHECK-NEXT: - - - - - - - 2.00 2.00 2.00 2.00 4.00 st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sub d15, d5, d16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - sub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd b19, b14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd d18, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd h20, h15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd s21, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - suqadd v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - tbl v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - tbl v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - tbl v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - tbl v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - tbx v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - tbx v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - tbx v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - tbx v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - trn2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaba v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uadalp v0.1d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uadalp v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uadalp v0.2s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uadalp v0.4h, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uadalp v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uadalp v0.8h, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddlp v0.1d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddlp v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddlp v0.2s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddlp v0.4h, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddlp v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddlp v0.8h, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uaddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf d21, d14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf d21, d14, #64 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf s22, s13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf s22, s13, #32 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ucvtf v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uhsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umax v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umax v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umax v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umaxp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umaxp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umaxp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umin v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umin v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uminp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uminp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlsl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umlsl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - umull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqadd h0, h1, h5 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshl b11, b20, b30 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshl s23, s20, s16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn b10, h12, #5 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn h12, s10, #14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn s10, d10, #25 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqrshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl b11, b20, b30 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl b18, b15, #6 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl d15, d12, #19 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl h11, h18, #7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl s14, s19, #18 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl s23, s20, s16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn b12, h10, #7 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn h10, s14, #5 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn s10, d12, #13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqsub d16, d16, d16 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uqsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn b18, h18 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn h20, s17 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn s19, d14 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn v0.4h, v0.4s +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn v0.8b, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn2 v0.16b, v0.8h +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - uqxtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urecpe v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urecpe v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urhadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshl d8, d7, d4 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshr d20, d23, #31 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshr v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshr v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshr v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - urshr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursqrte v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursqrte v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursra d18, d10, #13 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ursra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushl d0, d0, d0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushll v0.4s, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushll2 v0.8h, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushr d10, d17, #18 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushr v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushr v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushr v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - ushr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd b19, b14 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd d18, d22 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd h20, h15 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd s21, s12 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usqadd v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usra d20, d13, #61 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - usubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - uzp2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - xtn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - xtn v0.4h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - xtn v0.8b, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - xtn2 v0.16b, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - xtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - xtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - zip2 v0.8h, v0.8h, v0.8h diff --git a/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/shifted-register.s b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/shifted-register.s new file mode 100644 index 00000000000000..27e0279a701013 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/shifted-register.s @@ -0,0 +1,31 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -march=aarch64 -mcpu=ampere1b -resource-pressure=false < %s | FileCheck %s + + add w0, w1, w2, lsl #0 + sub x3, x4, x5, lsl #1 + adds x6, x7, x8, lsr #2 + subs x9, x10, x11, asr #3 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 400 +# CHECK-NEXT: Total Cycles: 156 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 12 +# CHECK-NEXT: uOps Per Cycle: 3.85 +# CHECK-NEXT: IPC: 2.56 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.25 add w0, w1, w2 +# CHECK-NEXT: 1 1 0.25 sub x3, x4, x5, lsl #1 +# CHECK-NEXT: 2 2 0.50 adds x6, x7, x8, lsr #2 +# CHECK-NEXT: 2 2 0.50 subs x9, x10, x11, asr #3 From 2d7fdfa61f8b037ce65e9c5482f422b37d6c0b99 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 14 Feb 2024 09:35:40 -0500 Subject: [PATCH 139/240] [gn] port 09e98950bfcf (InstallAPI) --- llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn | 3 +++ llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn | 10 ++++++++++ llvm/utils/gn/secondary/clang/test/BUILD.gn | 1 + 3 files changed, 14 insertions(+) create mode 100644 llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn diff --git a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn index 948d1405676b70..5c4d7e1f4f5593 100644 --- a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn @@ -8,6 +8,7 @@ static_library("Frontend") { "//clang/lib/Basic", "//clang/lib/Driver", "//clang/lib/Edit", + "//clang/lib/InstallAPI", "//clang/lib/Lex", "//clang/lib/Parse", "//clang/lib/Sema", @@ -18,6 +19,7 @@ static_library("Frontend") { "//llvm/lib/ProfileData", "//llvm/lib/Support", "//llvm/lib/TargetParser", + "//llvm/lib/TextAPI", ] sources = [ "ASTConsumers.cpp", @@ -38,6 +40,7 @@ static_library("Frontend") { "InitPreprocessor.cpp", "InterfaceStubFunctionsConsumer.cpp", "LayoutOverrideSource.cpp", + "InstallAPIConsumer.cpp", "LogDiagnosticPrinter.cpp", "ModuleDependencyCollector.cpp", "MultiplexConsumer.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn new file mode 100644 index 00000000000000..4d79ac805ac193 --- /dev/null +++ b/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn @@ -0,0 +1,10 @@ +static_library("InstallAPI") { + output_name = "clangInstallAPI" + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang/lib/AST", + "//llvm/lib/Support", + "//llvm/lib/TextAPI", + ] + sources = [ "Context.cpp" ] +} diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn index 3e19ee5d08d761..c7df8039723b1e 100644 --- a/llvm/utils/gn/secondary/clang/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn @@ -187,6 +187,7 @@ group("test") { "//llvm/tools/llvm-profdata", "//llvm/tools/llvm-rc:symlinks", "//llvm/tools/llvm-readobj:symlinks", + "//llvm/tools/llvm-readtapi:symlinks", "//llvm/tools/llvm-symbolizer:symlinks", "//llvm/tools/opt", "//llvm/tools/yaml2obj", From a50bd0d799e3f3fbb759ab842691a0bb570f8614 Mon Sep 17 00:00:00 2001 From: Orlando Cazalet-Hyams Date: Wed, 14 Feb 2024 14:48:16 +0000 Subject: [PATCH 140/240] [RemoveDIs] Replicate dbg intrinsic movement pattern in SelectOptimize (#81737) Fix crash mentioned in comments on d759618df76361a8e490eeae5c5399e0738cbfd0. The assertion being hit was complaining that we had dangling DPValues; the DPValues attached to the terminator of StartBlock become dangling after the terminator is erased, and they're never "flushed" back onto the new terminator once it's added. Doing that makes the crash go away, but doesn't replicate existing dbg.* behaviour. See the comment in the patch. This change both fixes the crash (because there are now no DPValues left on the terminator to dangle) and replicates existing behaviour (moves those DPValues down to the new block). --- llvm/lib/CodeGen/SelectOptimize.cpp | 6 ++ .../select-optimize-trailing-dbg-records.ll | 63 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 llvm/test/DebugInfo/AArch64/select-optimize-trailing-dbg-records.ll diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 31c4b63698b5de..5609f481b22a71 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -621,6 +621,12 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { SelectLike LastSI = ASI.back(); BasicBlock *StartBlock = SI.getI()->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI())); + // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With + // RemoveDIs turned on, SplitPt would instead point to the next + // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs, + // tell splitBasicBlock that we want to include any DPValues attached to + // SplitPt in the splice. + SplitPt.setHeadBit(true); BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock)); // Delete the unconditional branch that was just created by the split. diff --git a/llvm/test/DebugInfo/AArch64/select-optimize-trailing-dbg-records.ll b/llvm/test/DebugInfo/AArch64/select-optimize-trailing-dbg-records.ll new file mode 100644 index 00000000000000..4ae1fb4fc7bcc0 --- /dev/null +++ b/llvm/test/DebugInfo/AArch64/select-optimize-trailing-dbg-records.ll @@ -0,0 +1,63 @@ +; RUN: opt %s -passes='require,function(select-optimize)' -o - -S \ +; RUN: | FileCheck %s +; RUN: opt %s --try-experimental-debuginfo-iterators -passes='require,function(select-optimize)' -o - -S \ +; RUN: | FileCheck %s + +;; Check that the dbg.value is moved into the start of the end-block of the +;; inserted if-block. + +; CHECK: select.end: +; CHECK-NEXT: %[[PHI:.*]] = phi i32 +; CHECK-NEXT: dbg.value(metadata i32 %[[PHI]], + +source_filename = "test.ll" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-fuchsia" + +%struct.hb_glyph_info_t = type { i32, i32, i32, %union._hb_var_int_t, %union._hb_var_int_t } +%union._hb_var_int_t = type { i32 } + +define void @_Z22_hb_ot_shape_normalizePK18hb_ot_shape_plan_tP11hb_buffer_tP9hb_font_t() { +entry: + br label %while.body193 + +while.body193: ; preds = %while.body193, %entry + %starter.0337 = phi i32 [ %spec.select322, %while.body193 ], [ 0, %entry ] + %idxprom207 = zext i32 %starter.0337 to i64 + %arrayidx208 = getelementptr %struct.hb_glyph_info_t, ptr null, i64 %idxprom207 + %0 = load i32, ptr %arrayidx208, align 4 + %call247.val = load i16, ptr null, align 4 + %cmp249327 = icmp ult i16 %call247.val, 0 + %cmp249 = select i1 false, i1 false, i1 %cmp249327 + %spec.select322 = select i1 %cmp249, i32 0, i32 %starter.0337 + tail call void @llvm.dbg.value(metadata i32 %spec.select322, metadata !13, metadata !DIExpression()), !dbg !20 + br label %while.body193 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!12} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !3, imports: !2, splitDebugInlining: false, nameTableKind: GNU) +!1 = !DIFile(filename: "../../third_party/harfbuzz-ng/src/src/hb-ot-shape-normalize.cc", directory: ".") +!2 = !{} +!3 = !{!4, !9} +!4 = !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) +!5 = distinct !DIGlobalVariable(scope: null, file: !1, line: 383, type: !6, isLocal: true, isDefinition: true) +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 112, elements: !2) +!7 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8) +!8 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_unsigned_char) +!9 = !DIGlobalVariableExpression(var: !10, expr: !DIExpression()) +!10 = distinct !DIGlobalVariable(scope: null, file: !1, line: 410, type: !11, isLocal: true, isDefinition: true) +!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 96, elements: !2) +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !DILocalVariable(name: "starter", scope: !14, file: !1, line: 441, type: !19) +!14 = distinct !DILexicalBlock(scope: !15, file: !1, line: 435, column: 3) +!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 431, column: 7) +!16 = distinct !DISubprogram(name: "_hb_ot_shape_normalize", linkageName: "_Z22_hb_ot_shape_normalizePK18hb_ot_shape_plan_tP11hb_buffer_tP9hb_font_t", scope: !1, file: !1, line: 291, type: !17, scopeLine: 294, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!17 = distinct !DISubroutineType(types: !18) +!18 = !{null} +!19 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!20 = !DILocation(line: 0, scope: !14) From b37bd78e411ce642a4ca9e0c2394cef8c2f389c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Wed, 14 Feb 2024 15:26:58 +0100 Subject: [PATCH 141/240] [clang][Interp][NFC] Add missing special cases for implicit functions We have this special case in getSource() and getRange(), but we were missing it in getExpr() and getLocation(). --- clang/lib/AST/Interp/InterpFrame.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/AST/Interp/InterpFrame.cpp b/clang/lib/AST/Interp/InterpFrame.cpp index dd05dac1703fd6..bf2cca733b66bb 100644 --- a/clang/lib/AST/Interp/InterpFrame.cpp +++ b/clang/lib/AST/Interp/InterpFrame.cpp @@ -228,10 +228,16 @@ SourceInfo InterpFrame::getSource(CodePtr PC) const { } const Expr *InterpFrame::getExpr(CodePtr PC) const { + if (Func && (!Func->hasBody() || Func->getDecl()->isImplicit()) && Caller) + return Caller->getExpr(RetPC); + return S.getExpr(Func, PC); } SourceLocation InterpFrame::getLocation(CodePtr PC) const { + if (Func && (!Func->hasBody() || Func->getDecl()->isImplicit()) && Caller) + return Caller->getLocation(RetPC); + return S.getLocation(Func, PC); } From 232cf9498f15626029bad1c2a7cdaebc4bb11d95 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 14 Feb 2024 15:05:58 +0000 Subject: [PATCH 142/240] [mlir] update bazel for bd8fcf75df11406527de423daa63e21c3ec8609b --- .../llvm-project-overlay/mlir/BUILD.bazel | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 9fe7d1741434c7..9ad33aeb8b1e77 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1041,6 +1041,25 @@ cc_binary( ], ) +cc_binary( + name = "_mlirDialectsLLVM.so", + srcs = ["lib/Bindings/Python/DialectLLVM.cpp"], + copts = PYBIND11_COPTS, + features = PYBIND11_FEATURES, + linkshared = 1, + linkstatic = 0, + tags = [ + "manual", # External dependency + "nobuildkite", + ], + deps = [ + ":CAPIIR", + ":CAPILLVM", + ":MLIRBindingsPythonHeadersAndDeps", + "//third_party/pybind11", + ], +) + cc_binary( name = "_mlirDialectsQuant.so", srcs = ["lib/Bindings/Python/DialectQuant.cpp"], From 9c06b079cb09639f1dc5ecd1a7a193c4267b0b63 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 14 Feb 2024 15:10:04 +0000 Subject: [PATCH 143/240] [AMDGPU] Refactor export instruction definitions. NFC. (#81738) Using multiclasses for the Real instruction definitions has a couple of benefits: - It avoids repeating information that was already specified when defining the corresponding pseudo, like the row and done bits. - It allows commoning up the Real definitions for architectures which are mostly the same, like GFX11 and GFX12. --- llvm/lib/Target/AMDGPU/EXPInstructions.td | 126 ++++++++++------------ 1 file changed, 58 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/EXPInstructions.td b/llvm/lib/Target/AMDGPU/EXPInstructions.td index 4cfee7d013ef1a..0a1e544949104a 100644 --- a/llvm/lib/Target/AMDGPU/EXPInstructions.td +++ b/llvm/lib/Target/AMDGPU/EXPInstructions.td @@ -10,7 +10,7 @@ // EXP classes //===----------------------------------------------------------------------===// -class EXPCommon : InstSI< +class EXPCommon : InstSI< (outs), (ins exp_tgt:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3, @@ -18,13 +18,16 @@ class EXPCommon : InstSI< asm> { let EXP = 1; let EXP_CNT = 1; - let mayLoad = done; + let mayLoad = _done; let mayStore = 1; let maybeAtomic = 0; let UseNamedOperandTable = 1; - let Uses = !if(row, [EXEC, M0], [EXEC]); + let Uses = !if(_row, [EXEC, M0], [EXEC]); let SchedRW = [WriteExport]; let DisableWQM = 1; + + bit row = _row; + bit done = _done; } class EXP_Pseudo @@ -34,17 +37,17 @@ class EXP_Pseudo } // Real instruction with optional asm operands "compr" and "vm". -class EXP_Real_ComprVM - : EXPCommon<0, done, "exp$tgt $src0, $src1, $src2, $src3" - #!if(done, " done", "")#"$compr$vm">, +class EXP_Real_ComprVM(pseudo)> + : EXPCommon<0, ps.done, "exp$tgt $src0, $src1, $src2, $src3" + #!if(ps.done, " done", "")#"$compr$vm">, SIMCInstr { let AsmMatchConverter = "cvtExp"; } // Real instruction with optional asm operand "row_en". -class EXP_Real_Row - : EXPCommon, +class EXP_Real_Row(pseudo)> + : EXPCommon, SIMCInstr { let AsmMatchConverter = "cvtExp"; } @@ -63,82 +66,69 @@ def EXP_ROW_DONE : EXP_Pseudo<1, 1>; } // let SubtargetPredicate = isNotGFX90APlus //===----------------------------------------------------------------------===// -// SI +// SI, VI, GFX10. //===----------------------------------------------------------------------===// -class EXP_Real_si - : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.SI>, EXPe_ComprVM { - let AssemblerPredicate = isGFX6GFX7; - let DecoderNamespace = "GFX6GFX7"; - let done = _done; +multiclass EXP_Real_si { + defvar ps = !cast(NAME); + def _si : EXP_Real_ComprVM, EXPe_ComprVM { + let AssemblerPredicate = isGFX6GFX7; + let DecoderNamespace = "GFX6GFX7"; + let done = ps.done; + } } -def EXP_si : EXP_Real_si<0, "EXP">; -def EXP_DONE_si : EXP_Real_si<1, "EXP_DONE">; - -//===----------------------------------------------------------------------===// -// VI -//===----------------------------------------------------------------------===// - -class EXP_Real_vi - : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.VI>, EXPe_vi { - let AssemblerPredicate = isGFX8GFX9; - let SubtargetPredicate = isNotGFX90APlus; - let DecoderNamespace = "GFX8"; - let done = _done; +multiclass EXP_Real_vi { + defvar ps = !cast(NAME); + def _vi : EXP_Real_ComprVM, EXPe_vi { + let AssemblerPredicate = isGFX8GFX9; + let SubtargetPredicate = isNotGFX90APlus; + let DecoderNamespace = "GFX8"; + let done = ps.done; + } } -def EXP_vi : EXP_Real_vi<0, "EXP">; -def EXP_DONE_vi : EXP_Real_vi<1, "EXP_DONE">; - -//===----------------------------------------------------------------------===// -// GFX10 -//===----------------------------------------------------------------------===// - -class EXP_Real_gfx10 - : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.GFX10>, EXPe_ComprVM { - let AssemblerPredicate = isGFX10Only; - let DecoderNamespace = "GFX10"; - let done = _done; +multiclass EXP_Real_gfx10 { + defvar ps = !cast(NAME); + def _gfx10 : EXP_Real_ComprVM, EXPe_ComprVM { + let AssemblerPredicate = isGFX10Only; + let DecoderNamespace = "GFX10"; + let done = ps.done; + } } -def EXP_gfx10 : EXP_Real_gfx10<0, "EXP">; -def EXP_DONE_gfx10 : EXP_Real_gfx10<1, "EXP_DONE">; +defm EXP : EXP_Real_si, EXP_Real_vi, EXP_Real_gfx10; +defm EXP_DONE : EXP_Real_si, EXP_Real_vi, EXP_Real_gfx10; //===----------------------------------------------------------------------===// -// GFX11 +// GFX11, GFX12. //===----------------------------------------------------------------------===// -class EXP_Real_gfx11 - : EXP_Real_Row<_row, _done, pseudo, SIEncodingFamily.GFX11>, EXPe_Row { - let AssemblerPredicate = isGFX11Only; - let DecoderNamespace = "GFX11"; - let row = _row; - let done = _done; +multiclass EXP_Real_gfx11 { + defvar ps = !cast(NAME); + def _gfx11 : EXP_Real_Row, EXPe_Row { + let AssemblerPredicate = isGFX11Only; + let DecoderNamespace = "GFX11"; + let row = ps.row; + let done = ps.done; + } } -def EXP_gfx11 : EXP_Real_gfx11<0, 0, "EXP">; -def EXP_DONE_gfx11 : EXP_Real_gfx11<0, 1, "EXP_DONE">; -def EXP_ROW_gfx11 : EXP_Real_gfx11<1, 0, "EXP_ROW">; -def EXP_ROW_DONE_gfx11 : EXP_Real_gfx11<1, 1, "EXP_ROW_DONE">; - -//===----------------------------------------------------------------------===// -// GFX12+ -//===----------------------------------------------------------------------===// - -class VEXPORT_Real_gfx12 - : EXP_Real_Row<_row, _done, pseudo, SIEncodingFamily.GFX12, "export">, +multiclass VEXPORT_Real_gfx12 { + defvar ps = !cast(NAME); + def _gfx12 : EXP_Real_Row, EXPe_Row, MnemonicAlias<"exp", "export">, Requires<[isGFX12Plus]> { - let AssemblerPredicate = isGFX12Plus; - let DecoderNamespace = "GFX12"; - let row = _row; - let done = _done; + let AssemblerPredicate = isGFX12Only; + let DecoderNamespace = "GFX12"; + let row = ps.row; + let done = ps.done; + } } -def EXPORT_gfx12 : VEXPORT_Real_gfx12<0, 0, "EXP">; -def EXPORT_DONE_gfx12 : VEXPORT_Real_gfx12<0, 1, "EXP_DONE">; -def EXPORT_ROW_gfx12 : VEXPORT_Real_gfx12<1, 0, "EXP_ROW">; -def EXPORT_ROW_DONE_gfx12 : VEXPORT_Real_gfx12<1, 1, "EXP_ROW_DONE">; +defm EXP : EXP_Real_gfx11, VEXPORT_Real_gfx12; +defm EXP_DONE : EXP_Real_gfx11, VEXPORT_Real_gfx12; +defm EXP_ROW : EXP_Real_gfx11, VEXPORT_Real_gfx12; +defm EXP_ROW_DONE : EXP_Real_gfx11, VEXPORT_Real_gfx12; //===----------------------------------------------------------------------===// // EXP Patterns From 457c17944c6eb3d89ae6a765e4795c1cc3148506 Mon Sep 17 00:00:00 2001 From: Chris B Date: Wed, 14 Feb 2024 09:15:21 -0600 Subject: [PATCH 144/240] [NFC] Add API documentation and annotations (#78635) This change adds SM 6.2 availability annotation to 16-bit APIs (16-bit types require SM 6.2), and adds Doxygen API documentation. --- clang/lib/Headers/hlsl/hlsl_basic_types.h | 7 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 172 +++++++++++++++++++++- 2 files changed, 177 insertions(+), 2 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_basic_types.h b/clang/lib/Headers/hlsl/hlsl_basic_types.h index 9ea605cfa840a6..e96fa90b1ce469 100644 --- a/clang/lib/Headers/hlsl/hlsl_basic_types.h +++ b/clang/lib/Headers/hlsl/hlsl_basic_types.h @@ -12,6 +12,13 @@ namespace hlsl { // built-in scalar data types: +/// \typedef template using vector = Ty +/// __attribute__((ext_vector_type(Size))) +/// +/// \tparam Ty The base type of the vector may be any builtin integral or +/// floating point type. +/// \tparam Size The size of the vector may be any value between 1 and 4. + #ifdef __HLSL_ENABLE_16_BIT // 16-bit integer. typedef unsigned short uint16_t; diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index da153d8f8e0349..a8b36d29c78607 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -24,22 +24,35 @@ namespace hlsl { //===----------------------------------------------------------------------===// // abs builtins //===----------------------------------------------------------------------===// + +/// \fn T abs(T Val) +/// \brief Returns the absolute value of the input value, \a Val. +/// \param Val The input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) int16_t abs(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) int16_t2 abs(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) int16_t3 abs(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) int16_t4 abs(int16_t4); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) half abs(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) half2 abs(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) half3 abs(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) half4 abs(half4); #endif @@ -83,13 +96,23 @@ double4 abs(double4); //===----------------------------------------------------------------------===// // ceil builtins //===----------------------------------------------------------------------===// + +/// \fn T ceil(T Val) +/// \brief Returns the smallest integer value that is greater than or equal to +/// the input value, \a Val. +/// \param Val The input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) half ceil(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) half2 ceil(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) half3 ceil(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) half4 ceil(half4); #endif @@ -115,13 +138,22 @@ double4 ceil(double4); //===----------------------------------------------------------------------===// // cos builtins //===----------------------------------------------------------------------===// + +/// \fn T cos(T Val) +/// \brief Returns the cosine of the input value, \a Val. +/// \param Val The input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) half cos(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) half2 cos(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) half3 cos(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_cos) half4 cos(half4); #endif @@ -147,13 +179,23 @@ double4 cos(double4); //===----------------------------------------------------------------------===// // floor builtins //===----------------------------------------------------------------------===// + +/// \fn T floor(T Val) +/// \brief Returns the largest integer that is less than or equal to the input +/// value, \a Val. +/// \param Val The input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) half floor(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) half2 floor(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) half3 floor(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) half4 floor(half4); #endif @@ -179,13 +221,25 @@ double4 floor(double4); //===----------------------------------------------------------------------===// // log builtins //===----------------------------------------------------------------------===// + +/// \fn T log(T Val) +/// \brief The base-e logarithm of the input value, \a Val parameter. +/// \param Val The input value. +/// +/// If \a Val is negative, this result is undefined. If \a Val is 0, this +/// function returns negative infinity. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) half log(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) half2 log(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) half3 log(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log) half4 log(half4); #endif @@ -211,13 +265,25 @@ double4 log(double4); //===----------------------------------------------------------------------===// // log10 builtins //===----------------------------------------------------------------------===// + +/// \fn T log10(T Val) +/// \brief The base-10 logarithm of the input value, \a Val parameter. +/// \param Val The input value. +/// +/// If \a Val is negative, this result is undefined. If \a Val is 0, this +/// function returns negative infinity. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) half log10(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) half2 log10(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) half3 log10(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log10) half4 log10(half4); #endif @@ -243,13 +309,25 @@ double4 log10(double4); //===----------------------------------------------------------------------===// // log2 builtins //===----------------------------------------------------------------------===// + +/// \fn T log2(T Val) +/// \brief The base-2 logarithm of the input value, \a Val parameter. +/// \param Val The input value. +/// +/// If \a Val is negative, this result is undefined. If \a Val is 0, this +/// function returns negative infinity. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) half log2(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) half2 log2(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) half3 log2(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_log2) half4 log2(half4); #endif @@ -275,31 +353,49 @@ double4 log2(double4); //===----------------------------------------------------------------------===// // max builtins //===----------------------------------------------------------------------===// + +/// \fn T max(T X, T Y) +/// \brief Return the greater of \a X and \a Y. +/// \param X The X input value. +/// \param Y The Y input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) half max(half, half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) half2 max(half2, half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) half3 max(half3, half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) half4 max(half4, half4); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int16_t max(int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int16_t2 max(int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int16_t3 max(int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int16_t4 max(int16_t4, int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint16_t max(uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint16_t2 max(uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint16_t3 max(uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint16_t4 max(uint16_t4, uint16_t4); #endif @@ -361,31 +457,49 @@ double4 max(double4, double4); //===----------------------------------------------------------------------===// // min builtins //===----------------------------------------------------------------------===// + +/// \fn T min(T X, T Y) +/// \brief Return the lesser of \a X and \a Y. +/// \param X The X input value. +/// \param Y The Y input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) half min(half, half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) half2 min(half2, half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) half3 min(half3, half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) half4 min(half4, half4); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int16_t min(int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int16_t2 min(int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int16_t3 min(int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int16_t4 min(int16_t4, int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint16_t min(uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint16_t2 min(uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint16_t3 min(uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint16_t4 min(uint16_t4, uint16_t4); #endif @@ -447,13 +561,23 @@ double4 min(double4, double4); //===----------------------------------------------------------------------===// // pow builtins //===----------------------------------------------------------------------===// + +/// \fn T pow(T Val, T Pow) +/// \brief Return the value \a Val, raised to the power \a Pow. +/// \param Val The input value. +/// \param Pow The specified power. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) half pow(half, half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) half2 pow(half2, half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) half3 pow(half3, half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_pow) half4 pow(half4, half4); #endif @@ -479,22 +603,35 @@ double4 pow(double4, double4); //===----------------------------------------------------------------------===// // reversebits builtins //===----------------------------------------------------------------------===// + +/// \fn T reversebits(T Val) +/// \brief Return the value \a Val with the bit order reversed. +/// \param Val The input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) int16_t reversebits(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) int16_t2 reversebits(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) int16_t3 reversebits(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) int16_t4 reversebits(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) uint16_t reversebits(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) uint16_t2 reversebits(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) uint16_t3 reversebits(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse) uint16_t4 reversebits(uint16_t4); #endif @@ -538,13 +675,22 @@ uint64_t4 reversebits(uint64_t4); //===----------------------------------------------------------------------===// // sin builtins //===----------------------------------------------------------------------===// + +/// \fn T sin(T Val) +/// \brief Returns the sine of the input value, \a Val. +/// \param Val The input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) half sin(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) half2 sin(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) half3 sin(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sin) half4 sin(half4); #endif @@ -570,7 +716,13 @@ double4 sin(double4); //===----------------------------------------------------------------------===// // sqrt builtins //===----------------------------------------------------------------------===// + +/// \fn T sqrt(T Val) +/// \brief Returns the square root of the input value, \a Val. +/// \param Val The input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_sqrtf16) half sqrt(half In); #endif @@ -584,13 +736,22 @@ double sqrt(double In); //===----------------------------------------------------------------------===// // trunc builtins //===----------------------------------------------------------------------===// + +/// \fn T trunc(T Val) +/// \brief Returns the truncated integer value of the input value, \a Val. +/// \param Val The input value. + #ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) half trunc(half); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) half2 trunc(half2); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) half3 trunc(half3); +_HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_trunc) half4 trunc(half4); #endif @@ -616,9 +777,16 @@ double4 trunc(double4); //===----------------------------------------------------------------------===// // Wave* builtins //===----------------------------------------------------------------------===// + +/// \brief Counts the number of boolean variables which evaluate to true across +/// all active lanes in the current wave. +/// +/// \param Val The input boolean value. +/// \return The number of lanes for which the boolean variable evaluates to +/// true, across all active lanes in the current wave. _HLSL_AVAILABILITY(shadermodel, 6.0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_count_bits) -uint WaveActiveCountBits(bool bBit); +uint WaveActiveCountBits(bool Val); } // namespace hlsl #endif //_HLSL_HLSL_INTRINSICS_H_ From 995c9061ed13c5e494ef5883cfd8c813eb5c32c3 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Wed, 14 Feb 2024 16:35:26 +0100 Subject: [PATCH 145/240] [bazel][mlir] Fix after 232cf9498f15626029bad1c2a7cdaebc4bb11d95 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 9ad33aeb8b1e77..54c9f19c6ab1e9 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1056,7 +1056,7 @@ cc_binary( ":CAPIIR", ":CAPILLVM", ":MLIRBindingsPythonHeadersAndDeps", - "//third_party/pybind11", + "@pybind11", ], ) From ea2d9383a23ca17b9240ad64c2adc5f2b5a73dc0 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 14 Feb 2024 16:51:28 +0100 Subject: [PATCH 146/240] [mlir][Transforms][NFC] Improve listener layering in dialect conversion (#81236) Context: Conversion patterns provide a `ConversionPatternRewriter` to modify the IR. `ConversionPatternRewriter` provides the public API. Most function calls are forwarded/handled by `ConversionPatternRewriterImpl`. The dialect conversion uses the listener infrastructure to get notified about op/block insertions. In the current design, `ConversionPatternRewriter` inherits from both `PatternRewriter` and `Listener`. The conversion rewriter registers itself as a listener. This is problematic because listener functions such as `notifyOperationInserted` are now part of the public API and can be called from conversion patterns; that would bring the dialect conversion into an inconsistent state. With this commit, `ConversionPatternRewriter` no longer inherits from `Listener`. Instead `ConversionPatternRewriterImpl` inherits from `Listener`. This removes the problematic public API and also simplifies the code a bit: block/op insertion notifications were previously forwarded to the `ConversionPatternRewriterImpl`. This is no longer needed. --- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 4 +- .../mlir/Transforms/DialectConversion.h | 17 +----- .../Async/Transforms/AsyncToAsyncRuntime.cpp | 4 +- .../Transforms/Utils/DialectConversion.cpp | 53 +++++++++---------- 4 files changed, 29 insertions(+), 49 deletions(-) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index bd8d3d92d480b6..1c4f82e2de818b 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -739,12 +739,12 @@ struct HLFIRListener : public mlir::OpBuilder::Listener { void notifyOperationInserted(mlir::Operation *op, mlir::OpBuilder::InsertPoint previous) override { builder.notifyOperationInserted(op, previous); - rewriter.notifyOperationInserted(op, previous); + rewriter.getListener()->notifyOperationInserted(op, previous); } virtual void notifyBlockInserted(mlir::Block *block, mlir::Region *previous, mlir::Region::iterator previousIt) override { builder.notifyBlockInserted(block, previous, previousIt); - rewriter.notifyBlockInserted(block, previous, previousIt); + rewriter.getListener()->notifyBlockInserted(block, previous, previousIt); } fir::FirOpBuilder &builder; mlir::ConversionPatternRewriter &rewriter; diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 091131651bbf56..851d639ae68a77 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -655,8 +655,7 @@ struct ConversionPatternRewriterImpl; /// This class implements a pattern rewriter for use with ConversionPatterns. It /// extends the base PatternRewriter and provides special conversion specific /// hooks. -class ConversionPatternRewriter final : public PatternRewriter, - public RewriterBase::Listener { +class ConversionPatternRewriter final : public PatternRewriter { public: explicit ConversionPatternRewriter(MLIRContext *ctx); ~ConversionPatternRewriter() override; @@ -735,10 +734,6 @@ class ConversionPatternRewriter final : public PatternRewriter, /// implemented for dialect conversion. void eraseBlock(Block *block) override; - /// PatternRewriter hook creating a new block. - void notifyBlockInserted(Block *block, Region *previous, - Region::iterator previousIt) override; - /// PatternRewriter hook for splitting a block into two parts. Block *splitBlock(Block *block, Block::iterator before) override; @@ -747,9 +742,6 @@ class ConversionPatternRewriter final : public PatternRewriter, ValueRange argValues = std::nullopt) override; using PatternRewriter::inlineBlockBefore; - /// PatternRewriter hook for inserting a new operation. - void notifyOperationInserted(Operation *op, InsertPoint previous) override; - /// PatternRewriter hook for updating the given operation in-place. /// Note: These methods only track updates to the given operation itself, /// and not nested regions. Updates to regions will still require notification @@ -762,18 +754,11 @@ class ConversionPatternRewriter final : public PatternRewriter, /// PatternRewriter hook for updating the given operation in-place. void cancelOpModification(Operation *op) override; - /// PatternRewriter hook for notifying match failure reasons. - void - notifyMatchFailure(Location loc, - function_ref reasonCallback) override; - using PatternRewriter::notifyMatchFailure; - /// Return a reference to the internal implementation. detail::ConversionPatternRewriterImpl &getImpl(); private: // Hide unsupported pattern rewriter API. - using OpBuilder::getListener; using OpBuilder::setListener; void moveOpBefore(Operation *op, Block *block, diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp index 828f53c16d8f86..31e81107f655c0 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp @@ -582,7 +582,7 @@ class AwaitOpLoweringBase : public OpConversionPattern { // Inside regular functions we use the blocking wait operation to wait for // the async object (token, value or group) to become available. if (!isInCoroutine) { - ImplicitLocOpBuilder builder(loc, op, &rewriter); + ImplicitLocOpBuilder builder(loc, rewriter); builder.create(loc, operand); // Assert that the awaited operands is not in the error state. @@ -601,7 +601,7 @@ class AwaitOpLoweringBase : public OpConversionPattern { CoroMachinery &coro = funcCoro->getSecond(); Block *suspended = op->getBlock(); - ImplicitLocOpBuilder builder(loc, op, &rewriter); + ImplicitLocOpBuilder builder(loc, rewriter); MLIRContext *ctx = op->getContext(); // Save the coroutine state and resume on a runtime managed thread when diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index a5a77e00fbfb5f..dbf5bf50d60e7f 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -825,7 +825,7 @@ void ArgConverter::insertConversion(Block *newBlock, //===----------------------------------------------------------------------===// namespace mlir { namespace detail { -struct ConversionPatternRewriterImpl { +struct ConversionPatternRewriterImpl : public RewriterBase::Listener { explicit ConversionPatternRewriterImpl(PatternRewriter &rewriter) : argConverter(rewriter, unresolvedMaterializations), notifyCallback(nullptr) {} @@ -903,15 +903,19 @@ struct ConversionPatternRewriterImpl { // Rewriter Notification Hooks //===--------------------------------------------------------------------===// - /// PatternRewriter hook for replacing the results of an operation. + //// Notifies that an op was inserted. + void notifyOperationInserted(Operation *op, + OpBuilder::InsertPoint previous) override; + + /// Notifies that an op is about to be replaced with the given values. void notifyOpReplaced(Operation *op, ValueRange newValues); /// Notifies that a block is about to be erased. void notifyBlockIsBeingErased(Block *block); - /// Notifies that a block was created. - void notifyInsertedBlock(Block *block, Region *previous, - Region::iterator previousIt); + /// Notifies that a block was inserted. + void notifyBlockInserted(Block *block, Region *previous, + Region::iterator previousIt) override; /// Notifies that a block was split. void notifySplitBlock(Block *block, Block *continuation); @@ -921,8 +925,9 @@ struct ConversionPatternRewriterImpl { Block::iterator before); /// Notifies that a pattern match failed for the given reason. - void notifyMatchFailure(Location loc, - function_ref reasonCallback); + void + notifyMatchFailure(Location loc, + function_ref reasonCallback) override; //===--------------------------------------------------------------------===// // State @@ -1363,6 +1368,16 @@ LogicalResult ConversionPatternRewriterImpl::convertNonEntryRegionTypes( //===----------------------------------------------------------------------===// // Rewriter Notification Hooks +void ConversionPatternRewriterImpl::notifyOperationInserted( + Operation *op, OpBuilder::InsertPoint previous) { + assert(!previous.isSet() && "expected newly created op"); + LLVM_DEBUG({ + logger.startLine() << "** Insert : '" << op->getName() << "'(" << op + << ")\n"; + }); + createdOps.push_back(op); +} + void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op, ValueRange newValues) { assert(newValues.size() == op->getNumResults()); @@ -1398,7 +1413,7 @@ void ConversionPatternRewriterImpl::notifyBlockIsBeingErased(Block *block) { blockActions.push_back(BlockAction::getErase(block, {region, origNextBlock})); } -void ConversionPatternRewriterImpl::notifyInsertedBlock( +void ConversionPatternRewriterImpl::notifyBlockInserted( Block *block, Region *previous, Region::iterator previousIt) { if (!previous) { // This is a newly created block. @@ -1437,7 +1452,7 @@ void ConversionPatternRewriterImpl::notifyMatchFailure( ConversionPatternRewriter::ConversionPatternRewriter(MLIRContext *ctx) : PatternRewriter(ctx), impl(new detail::ConversionPatternRewriterImpl(*this)) { - setListener(this); + setListener(impl.get()); } ConversionPatternRewriter::~ConversionPatternRewriter() = default; @@ -1540,11 +1555,6 @@ ConversionPatternRewriter::getRemappedValues(ValueRange keys, results); } -void ConversionPatternRewriter::notifyBlockInserted( - Block *block, Region *previous, Region::iterator previousIt) { - impl->notifyInsertedBlock(block, previous, previousIt); -} - Block *ConversionPatternRewriter::splitBlock(Block *block, Block::iterator before) { auto *continuation = block->splitBlock(before); @@ -1572,16 +1582,6 @@ void ConversionPatternRewriter::inlineBlockBefore(Block *source, Block *dest, eraseBlock(source); } -void ConversionPatternRewriter::notifyOperationInserted(Operation *op, - InsertPoint previous) { - assert(!previous.isSet() && "expected newly created op"); - LLVM_DEBUG({ - impl->logger.startLine() - << "** Insert : '" << op->getName() << "'(" << op << ")\n"; - }); - impl->createdOps.push_back(op); -} - void ConversionPatternRewriter::startOpModification(Operation *op) { #ifndef NDEBUG impl->pendingRootUpdates.insert(op); @@ -1614,11 +1614,6 @@ void ConversionPatternRewriter::cancelOpModification(Operation *op) { rootUpdates.erase(rootUpdates.begin() + updateIdx); } -void ConversionPatternRewriter::notifyMatchFailure( - Location loc, function_ref reasonCallback) { - impl->notifyMatchFailure(loc, reasonCallback); -} - void ConversionPatternRewriter::moveOpBefore(Operation *op, Block *block, Block::iterator iterator) { llvm_unreachable( From 1c10821022f1799452065fb57474e894e2562b7f Mon Sep 17 00:00:00 2001 From: David Sherwood <57997763+david-arm@users.noreply.github.com> Date: Wed, 14 Feb 2024 16:07:51 +0000 Subject: [PATCH 147/240] [LoopVectorize] Fix divide-by-zero bug (#80836) (#81721) When attempting to use the estimated trip count to refine the costs of the runtime memory checks we should also check for sane trip counts to prevent divide-by-zero faults on some platforms. Fixes #80836 --- .../Transforms/Vectorize/LoopVectorize.cpp | 10 +++-- .../AArch64/low_trip_memcheck_cost.ll | 38 +++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b39bf21cacdbb5..98b177cf5d2d0e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2010,16 +2010,18 @@ class GeneratedRTChecks { BestTripCount = *EstimatedTC; } + BestTripCount = std::max(BestTripCount, 1U); InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount; // Let's ensure the cost is always at least 1. NewMemCheckCost = std::max(*NewMemCheckCost.getValue(), (InstructionCost::CostType)1); - LLVM_DEBUG(dbgs() - << "We expect runtime memory checks to be hoisted " - << "out of the outer loop. Cost reduced from " - << MemCheckCost << " to " << NewMemCheckCost << '\n'); + if (BestTripCount > 1) + LLVM_DEBUG(dbgs() + << "We expect runtime memory checks to be hoisted " + << "out of the outer loop. Cost reduced from " + << MemCheckCost << " to " << NewMemCheckCost << '\n'); MemCheckCost = NewMemCheckCost; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll index 8a796bb3065b19..800c55d6740bc8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll @@ -177,6 +177,43 @@ outer.exit: } +define void @outer_pgo_minus1(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) { +; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_minus1' +; CHECK: Calculating cost of runtime checks: +; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced +; CHECK: Total cost of runtime checks: 6 +; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16 +entry: + br label %outer.loop + +outer.loop: + %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ] + %mul.us = mul nsw i64 %outer.iv, %n + br label %inner.loop + +inner.loop: + %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ] + %add.us = add nuw nsw i64 %inner.iv, %mul.us + %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us + %0 = load i8, ptr %arrayidx.us, align 1 + %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us + %1 = load i8, ptr %arrayidx7.us, align 1 + %add9.us = add i8 %1, %0 + store i8 %add9.us, ptr %arrayidx7.us, align 1 + %inner.iv.next = add nuw nsw i64 %inner.iv, 1 + %exitcond.not = icmp eq i64 %inner.iv.next, %n + br i1 %exitcond.not, label %inner.exit, label %inner.loop + +inner.exit: + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %exitcond26.not = icmp eq i64 %outer.iv.next, %m + br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !1 + +outer.exit: + ret void +} + + define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %n) { ; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3_full_range_checks' ; CHECK: Calculating cost of runtime checks: @@ -215,3 +252,4 @@ outer.exit: !0 = !{!"branch_weights", i32 10, i32 20} +!1 = !{!"branch_weights", i32 1, i32 -1} From 8faefe36ed57c2dab2b50e76fd27045b908f8c1d Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 14 Feb 2024 17:15:30 +0100 Subject: [PATCH 148/240] [mlir][Transforms][NFC] Modularize block actions (#81237) Throughout the rewrite process, the dialect conversion maintains a list of "block actions" that can be rolled back upon failure. This commit encapsulates the existing block actions into separate classes, making it easier to add additional actions in the future. This commit also renames "block actions" to "IR rewrites". In a subsequent commit, an "operation rewrite" class that allows rolling back movements of single operations is added. This is to support `moveOpBefore` in the dialect conversion. Rewrites have two methods: `commit()` commits an action. It can no longer be rolled back afterwards. `rollback()` undoes a rewrite. It can no longer be committed afterwards. --- .../Transforms/Utils/DialectConversion.cpp | 504 +++++++++++------- 1 file changed, 306 insertions(+), 198 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index dbf5bf50d60e7f..9875f8668b65a8 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -154,13 +154,12 @@ namespace { struct RewriterState { RewriterState(unsigned numCreatedOps, unsigned numUnresolvedMaterializations, unsigned numReplacements, unsigned numArgReplacements, - unsigned numBlockActions, unsigned numIgnoredOperations, + unsigned numRewrites, unsigned numIgnoredOperations, unsigned numRootUpdates) : numCreatedOps(numCreatedOps), numUnresolvedMaterializations(numUnresolvedMaterializations), numReplacements(numReplacements), - numArgReplacements(numArgReplacements), - numBlockActions(numBlockActions), + numArgReplacements(numArgReplacements), numRewrites(numRewrites), numIgnoredOperations(numIgnoredOperations), numRootUpdates(numRootUpdates) {} @@ -176,8 +175,8 @@ struct RewriterState { /// The current number of argument replacements queued. unsigned numArgReplacements; - /// The current number of block actions performed. - unsigned numBlockActions; + /// The current number of rewrites performed. + unsigned numRewrites; /// The current number of ignored operations. unsigned numIgnoredOperations; @@ -235,86 +234,6 @@ struct OpReplacement { const TypeConverter *converter; }; -//===----------------------------------------------------------------------===// -// BlockAction - -/// The kind of the block action performed during the rewrite. Actions can be -/// undone if the conversion fails. -enum class BlockActionKind { - Create, - Erase, - Inline, - Move, - Split, - TypeConversion -}; - -/// Original position of the given block in its parent region. During undo -/// actions, the block needs to be placed before `insertBeforeBlock`. -struct BlockPosition { - Region *region; - Block *insertBeforeBlock; -}; - -/// Information needed to undo inlining actions. -/// - the source block -/// - the first inlined operation (could be null if the source block was empty) -/// - the last inlined operation (could be null if the source block was empty) -struct InlineInfo { - Block *sourceBlock; - Operation *firstInlinedInst; - Operation *lastInlinedInst; -}; - -/// The storage class for an undoable block action (one of BlockActionKind), -/// contains the information necessary to undo this action. -struct BlockAction { - static BlockAction getCreate(Block *block) { - return {BlockActionKind::Create, block, {}}; - } - static BlockAction getErase(Block *block, BlockPosition originalPosition) { - return {BlockActionKind::Erase, block, {originalPosition}}; - } - static BlockAction getInline(Block *block, Block *srcBlock, - Block::iterator before) { - BlockAction action{BlockActionKind::Inline, block, {}}; - action.inlineInfo = {srcBlock, - srcBlock->empty() ? nullptr : &srcBlock->front(), - srcBlock->empty() ? nullptr : &srcBlock->back()}; - return action; - } - static BlockAction getMove(Block *block, BlockPosition originalPosition) { - return {BlockActionKind::Move, block, {originalPosition}}; - } - static BlockAction getSplit(Block *block, Block *originalBlock) { - BlockAction action{BlockActionKind::Split, block, {}}; - action.originalBlock = originalBlock; - return action; - } - static BlockAction getTypeConversion(Block *block) { - return BlockAction{BlockActionKind::TypeConversion, block, {}}; - } - - // The action kind. - BlockActionKind kind; - - // A pointer to the block that was created by the action. - Block *block; - - union { - // In use if kind == BlockActionKind::Inline or BlockActionKind::Erase, and - // contains a pointer to the region that originally contained the block as - // well as the position of the block in that region. - BlockPosition originalPosition; - // In use if kind == BlockActionKind::Split and contains a pointer to the - // block that was split into two parts. - Block *originalBlock; - // In use if kind == BlockActionKind::Inline, and contains the information - // needed to undo the inlining. - InlineInfo inlineInfo; - }; -}; - //===----------------------------------------------------------------------===// // UnresolvedMaterialization @@ -820,6 +739,251 @@ void ArgConverter::insertConversion(Block *newBlock, conversionInfo.insert({newBlock, std::move(info)}); } +//===----------------------------------------------------------------------===// +// IR rewrites +//===----------------------------------------------------------------------===// + +namespace { +/// An IR rewrite that can be committed (upon success) or rolled back (upon +/// failure). +/// +/// The dialect conversion keeps track of IR modifications (requested by the +/// user through the rewriter API) in `IRRewrite` objects. Some kind of rewrites +/// are directly applied to the IR as the rewriter API is used, some are applied +/// partially, and some are delayed until the `IRRewrite` objects are committed. +class IRRewrite { +public: + /// The kind of the rewrite. Rewrites can be undone if the conversion fails. + enum class Kind { + CreateBlock, + EraseBlock, + InlineBlock, + MoveBlock, + SplitBlock, + BlockTypeConversion + }; + + virtual ~IRRewrite() = default; + + /// Roll back the rewrite. + virtual void rollback() = 0; + + /// Commit the rewrite. + virtual void commit() {} + + Kind getKind() const { return kind; } + + static bool classof(const IRRewrite *rewrite) { return true; } + +protected: + IRRewrite(Kind kind, ConversionPatternRewriterImpl &rewriterImpl) + : kind(kind), rewriterImpl(rewriterImpl) {} + + const Kind kind; + ConversionPatternRewriterImpl &rewriterImpl; +}; + +/// A block rewrite. +class BlockRewrite : public IRRewrite { +public: + /// Return the block that this rewrite operates on. + Block *getBlock() const { return block; } + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() >= Kind::CreateBlock && + rewrite->getKind() <= Kind::BlockTypeConversion; + } + +protected: + BlockRewrite(Kind kind, ConversionPatternRewriterImpl &rewriterImpl, + Block *block) + : IRRewrite(kind, rewriterImpl), block(block) {} + + // The block that this rewrite operates on. + Block *block; +}; + +/// Creation of a block. Block creations are immediately reflected in the IR. +/// There is no extra work to commit the rewrite. During rollback, the newly +/// created block is erased. +class CreateBlockRewrite : public BlockRewrite { +public: + CreateBlockRewrite(ConversionPatternRewriterImpl &rewriterImpl, Block *block) + : BlockRewrite(Kind::CreateBlock, rewriterImpl, block) {} + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() == Kind::CreateBlock; + } + + void rollback() override { + // Unlink all of the operations within this block, they will be deleted + // separately. + auto &blockOps = block->getOperations(); + while (!blockOps.empty()) + blockOps.remove(blockOps.begin()); + block->dropAllDefinedValueUses(); + block->erase(); + } +}; + +/// Erasure of a block. Block erasures are partially reflected in the IR. Erased +/// blocks are immediately unlinked, but only erased when the rewrite is +/// committed. This makes it easier to rollback a block erasure: the block is +/// simply inserted into its original location. +class EraseBlockRewrite : public BlockRewrite { +public: + EraseBlockRewrite(ConversionPatternRewriterImpl &rewriterImpl, Block *block, + Region *region, Block *insertBeforeBlock) + : BlockRewrite(Kind::EraseBlock, rewriterImpl, block), region(region), + insertBeforeBlock(insertBeforeBlock) {} + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() == Kind::EraseBlock; + } + + ~EraseBlockRewrite() override { + assert(!block && "rewrite was neither rolled back nor committed"); + } + + void rollback() override { + // The block (owned by this rewrite) was not actually erased yet. It was + // just unlinked. Put it back into its original position. + assert(block && "expected block"); + auto &blockList = region->getBlocks(); + Region::iterator before = insertBeforeBlock + ? Region::iterator(insertBeforeBlock) + : blockList.end(); + blockList.insert(before, block); + block = nullptr; + } + + void commit() override { + // Erase the block. + assert(block && "expected block"); + delete block; + block = nullptr; + } + +private: + // The region in which this block was previously contained. + Region *region; + + // The original successor of this block before it was unlinked. "nullptr" if + // this block was the only block in the region. + Block *insertBeforeBlock; +}; + +/// Inlining of a block. This rewrite is immediately reflected in the IR. +/// Note: This rewrite represents only the inlining of the operations. The +/// erasure of the inlined block is a separate rewrite. +class InlineBlockRewrite : public BlockRewrite { +public: + InlineBlockRewrite(ConversionPatternRewriterImpl &rewriterImpl, Block *block, + Block *sourceBlock, Block::iterator before) + : BlockRewrite(Kind::InlineBlock, rewriterImpl, block), + sourceBlock(sourceBlock), + firstInlinedInst(sourceBlock->empty() ? nullptr + : &sourceBlock->front()), + lastInlinedInst(sourceBlock->empty() ? nullptr : &sourceBlock->back()) { + } + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() == Kind::InlineBlock; + } + + void rollback() override { + // Put the operations from the destination block (owned by the rewrite) + // back into the source block. + if (firstInlinedInst) { + assert(lastInlinedInst && "expected operation"); + sourceBlock->getOperations().splice(sourceBlock->begin(), + block->getOperations(), + Block::iterator(firstInlinedInst), + ++Block::iterator(lastInlinedInst)); + } + } + +private: + // The block that originally contained the operations. + Block *sourceBlock; + + // The first inlined operation. + Operation *firstInlinedInst; + + // The last inlined operation. + Operation *lastInlinedInst; +}; + +/// Moving of a block. This rewrite is immediately reflected in the IR. +class MoveBlockRewrite : public BlockRewrite { +public: + MoveBlockRewrite(ConversionPatternRewriterImpl &rewriterImpl, Block *block, + Region *region, Block *insertBeforeBlock) + : BlockRewrite(Kind::MoveBlock, rewriterImpl, block), region(region), + insertBeforeBlock(insertBeforeBlock) {} + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() == Kind::MoveBlock; + } + + void rollback() override { + // Move the block back to its original position. + Region::iterator before = + insertBeforeBlock ? Region::iterator(insertBeforeBlock) : region->end(); + region->getBlocks().splice(before, block->getParent()->getBlocks(), block); + } + +private: + // The region in which this block was previously contained. + Region *region; + + // The original successor of this block before it was moved. "nullptr" if + // this block was the only block in the region. + Block *insertBeforeBlock; +}; + +/// Splitting of a block. This rewrite is immediately reflected in the IR. +class SplitBlockRewrite : public BlockRewrite { +public: + SplitBlockRewrite(ConversionPatternRewriterImpl &rewriterImpl, Block *block, + Block *originalBlock) + : BlockRewrite(Kind::SplitBlock, rewriterImpl, block), + originalBlock(originalBlock) {} + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() == Kind::SplitBlock; + } + + void rollback() override { + // Merge back the block that was split out. + originalBlock->getOperations().splice(originalBlock->end(), + block->getOperations()); + block->dropAllDefinedValueUses(); + block->erase(); + } + +private: + // The original block from which this block was split. + Block *originalBlock; +}; + +/// Block type conversion. This rewrite is partially reflected in the IR. +class BlockTypeConversionRewrite : public BlockRewrite { +public: + BlockTypeConversionRewrite(ConversionPatternRewriterImpl &rewriterImpl, + Block *block) + : BlockRewrite(Kind::BlockTypeConversion, rewriterImpl, block) {} + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() == Kind::BlockTypeConversion; + } + + // TODO: Block type conversions are currently committed in + // `ArgConverter::applyRewrites`. This should be done in the "commit" method. + void rollback() override; +}; +} // namespace + //===----------------------------------------------------------------------===// // ConversionPatternRewriterImpl //===----------------------------------------------------------------------===// @@ -848,13 +1012,17 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// Reset the state of the rewriter to a previously saved point. void resetState(RewriterState state); - /// Erase any blocks that were unlinked from their regions and stored in block - /// actions. - void eraseDanglingBlocks(); + /// Append a rewrite. Rewrites are committed upon success and rolled back upon + /// failure. + template + void appendRewrite(Args &&...args) { + rewrites.push_back( + std::make_unique(*this, std::forward(args)...)); + } - /// Undo the block actions (motions, splits) one by one in reverse order until - /// "numActionsToKeep" actions remains. - void undoBlockActions(unsigned numActionsToKeep = 0); + /// Undo the rewrites (motions, splits) one by one in reverse order until + /// "numRewritesToKeep" rewrites remains. + void undoRewrites(unsigned numRewritesToKeep = 0); /// Remap the given values to those with potentially different types. Returns /// success if the values could be remapped, failure otherwise. `valueDiagTag` @@ -954,7 +1122,7 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { SmallVector argReplacements; /// Ordered list of block operations (creations, splits, motions). - SmallVector blockActions; + SmallVector> rewrites; /// A set of operations that should no longer be considered for legalization, /// but were not directly replace/erased/etc. by a pattern. These are @@ -995,6 +1163,11 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { } // namespace detail } // namespace mlir +void BlockTypeConversionRewrite::rollback() { + // Undo the type conversion. + rewriterImpl.argConverter.discardRewrites(block); +} + /// Detach any operations nested in the given operation from their parent /// blocks, and erase the given operation. This can be used when the nested /// operations are scheduled for erasure themselves, so deleting the regions of @@ -1020,7 +1193,7 @@ void ConversionPatternRewriterImpl::discardRewrites() { for (auto &state : rootUpdates) state.resetOperation(); - undoBlockActions(); + undoRewrites(); // Remove any newly created ops. for (UnresolvedMaterialization &materialization : unresolvedMaterializations) @@ -1083,8 +1256,9 @@ void ConversionPatternRewriterImpl::applyRewrites() { argConverter.applyRewrites(mapping); - // Now that the ops have been erased, also erase dangling blocks. - eraseDanglingBlocks(); + // Commit all rewrites. + for (auto &rewrite : rewrites) + rewrite->commit(); } //===----------------------------------------------------------------------===// @@ -1093,8 +1267,7 @@ void ConversionPatternRewriterImpl::applyRewrites() { RewriterState ConversionPatternRewriterImpl::getCurrentState() { return RewriterState(createdOps.size(), unresolvedMaterializations.size(), replacements.size(), argReplacements.size(), - blockActions.size(), ignoredOps.size(), - rootUpdates.size()); + rewrites.size(), ignoredOps.size(), rootUpdates.size()); } void ConversionPatternRewriterImpl::resetState(RewriterState state) { @@ -1109,8 +1282,8 @@ void ConversionPatternRewriterImpl::resetState(RewriterState state) { mapping.erase(replacedArg); argReplacements.resize(state.numArgReplacements); - // Undo any block actions. - undoBlockActions(state.numBlockActions); + // Undo any rewrites. + undoRewrites(state.numRewrites); // Reset any replaced operations and undo any saved mappings. for (auto &repl : llvm::drop_begin(replacements, state.numReplacements)) @@ -1149,76 +1322,11 @@ void ConversionPatternRewriterImpl::resetState(RewriterState state) { operationsWithChangedResults.pop_back(); } -void ConversionPatternRewriterImpl::eraseDanglingBlocks() { - for (auto &action : blockActions) - if (action.kind == BlockActionKind::Erase) - delete action.block; -} - -void ConversionPatternRewriterImpl::undoBlockActions( - unsigned numActionsToKeep) { - for (auto &action : - llvm::reverse(llvm::drop_begin(blockActions, numActionsToKeep))) { - switch (action.kind) { - // Delete the created block. - case BlockActionKind::Create: { - // Unlink all of the operations within this block, they will be deleted - // separately. - auto &blockOps = action.block->getOperations(); - while (!blockOps.empty()) - blockOps.remove(blockOps.begin()); - action.block->dropAllDefinedValueUses(); - action.block->erase(); - break; - } - // Put the block (owned by action) back into its original position. - case BlockActionKind::Erase: { - auto &blockList = action.originalPosition.region->getBlocks(); - Block *insertBeforeBlock = action.originalPosition.insertBeforeBlock; - blockList.insert((insertBeforeBlock ? Region::iterator(insertBeforeBlock) - : blockList.end()), - action.block); - break; - } - // Put the instructions from the destination block (owned by the action) - // back into the source block. - case BlockActionKind::Inline: { - Block *sourceBlock = action.inlineInfo.sourceBlock; - if (action.inlineInfo.firstInlinedInst) { - assert(action.inlineInfo.lastInlinedInst && "expected operation"); - sourceBlock->getOperations().splice( - sourceBlock->begin(), action.block->getOperations(), - Block::iterator(action.inlineInfo.firstInlinedInst), - ++Block::iterator(action.inlineInfo.lastInlinedInst)); - } - break; - } - // Move the block back to its original position. - case BlockActionKind::Move: { - Region *originalRegion = action.originalPosition.region; - Block *insertBeforeBlock = action.originalPosition.insertBeforeBlock; - originalRegion->getBlocks().splice( - (insertBeforeBlock ? Region::iterator(insertBeforeBlock) - : originalRegion->end()), - action.block->getParent()->getBlocks(), action.block); - break; - } - // Merge back the block that was split out. - case BlockActionKind::Split: { - action.originalBlock->getOperations().splice( - action.originalBlock->end(), action.block->getOperations()); - action.block->dropAllDefinedValueUses(); - action.block->erase(); - break; - } - // Undo the type conversion. - case BlockActionKind::TypeConversion: { - argConverter.discardRewrites(action.block); - break; - } - } - } - blockActions.resize(numActionsToKeep); +void ConversionPatternRewriterImpl::undoRewrites(unsigned numRewritesToKeep) { + for (auto &rewrite : + llvm::reverse(llvm::drop_begin(rewrites, numRewritesToKeep))) + rewrite->rollback(); + rewrites.resize(numRewritesToKeep); } LogicalResult ConversionPatternRewriterImpl::remapValues( @@ -1309,7 +1417,7 @@ FailureOr ConversionPatternRewriterImpl::convertBlockSignature( return failure(); if (Block *newBlock = *result) { if (newBlock != block) - blockActions.push_back(BlockAction::getTypeConversion(newBlock)); + appendRewrite(newBlock); } return result; } @@ -1410,28 +1518,28 @@ void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op, void ConversionPatternRewriterImpl::notifyBlockIsBeingErased(Block *block) { Region *region = block->getParent(); Block *origNextBlock = block->getNextNode(); - blockActions.push_back(BlockAction::getErase(block, {region, origNextBlock})); + appendRewrite(block, region, origNextBlock); } void ConversionPatternRewriterImpl::notifyBlockInserted( Block *block, Region *previous, Region::iterator previousIt) { if (!previous) { // This is a newly created block. - blockActions.push_back(BlockAction::getCreate(block)); + appendRewrite(block); return; } Block *prevBlock = previousIt == previous->end() ? nullptr : &*previousIt; - blockActions.push_back(BlockAction::getMove(block, {previous, prevBlock})); + appendRewrite(block, previous, prevBlock); } void ConversionPatternRewriterImpl::notifySplitBlock(Block *block, Block *continuation) { - blockActions.push_back(BlockAction::getSplit(continuation, block)); + appendRewrite(continuation, block); } void ConversionPatternRewriterImpl::notifyBlockBeingInlined( Block *block, Block *srcBlock, Block::iterator before) { - blockActions.push_back(BlockAction::getInline(block, srcBlock, before)); + appendRewrite(block, srcBlock, before); } void ConversionPatternRewriterImpl::notifyMatchFailure( @@ -1501,8 +1609,8 @@ void ConversionPatternRewriter::eraseBlock(Block *block) { for (Operation &op : *block) eraseOp(&op); - // Unlink the block from its parent region. The block is kept in the block - // action and will be actually destroyed when rewrites are applied. This + // Unlink the block from its parent region. The block is kept in the rewrite + // object and will be actually destroyed when rewrites are applied. This // allows us to keep the operations in the block live and undo the removal by // re-inserting the block. block->getParent()->getBlocks().remove(block); @@ -1700,11 +1808,11 @@ class OperationLegalizer { RewriterState &curState); /// Legalizes the actions registered during the execution of a pattern. - LogicalResult legalizePatternBlockActions(Operation *op, - ConversionPatternRewriter &rewriter, - ConversionPatternRewriterImpl &impl, - RewriterState &state, - RewriterState &newState); + LogicalResult + legalizePatternBlockRewrites(Operation *op, + ConversionPatternRewriter &rewriter, + ConversionPatternRewriterImpl &impl, + RewriterState &state, RewriterState &newState); LogicalResult legalizePatternCreatedOperations( ConversionPatternRewriter &rewriter, ConversionPatternRewriterImpl &impl, RewriterState &state, RewriterState &newState); @@ -1986,8 +2094,8 @@ OperationLegalizer::legalizePatternResult(Operation *op, const Pattern &pattern, // Legalize each of the actions registered during application. RewriterState newState = impl.getCurrentState(); - if (failed(legalizePatternBlockActions(op, rewriter, impl, curState, - newState)) || + if (failed(legalizePatternBlockRewrites(op, rewriter, impl, curState, + newState)) || failed(legalizePatternRootUpdates(rewriter, impl, curState, newState)) || failed(legalizePatternCreatedOperations(rewriter, impl, curState, newState))) { @@ -1998,7 +2106,7 @@ OperationLegalizer::legalizePatternResult(Operation *op, const Pattern &pattern, return success(); } -LogicalResult OperationLegalizer::legalizePatternBlockActions( +LogicalResult OperationLegalizer::legalizePatternBlockRewrites( Operation *op, ConversionPatternRewriter &rewriter, ConversionPatternRewriterImpl &impl, RewriterState &state, RewriterState &newState) { @@ -2006,22 +2114,22 @@ LogicalResult OperationLegalizer::legalizePatternBlockActions( // If the pattern moved or created any blocks, make sure the types of block // arguments get legalized. - for (int i = state.numBlockActions, e = newState.numBlockActions; i != e; - ++i) { - auto &action = impl.blockActions[i]; - if (action.kind == BlockActionKind::TypeConversion || - action.kind == BlockActionKind::Erase) + for (int i = state.numRewrites, e = newState.numRewrites; i != e; ++i) { + BlockRewrite *rewrite = dyn_cast(impl.rewrites[i].get()); + if (!rewrite) + continue; + Block *block = rewrite->getBlock(); + if (isa(rewrite)) continue; // Only check blocks outside of the current operation. - Operation *parentOp = action.block->getParentOp(); - if (!parentOp || parentOp == op || action.block->getNumArguments() == 0) + Operation *parentOp = block->getParentOp(); + if (!parentOp || parentOp == op || block->getNumArguments() == 0) continue; // If the region of the block has a type converter, try to convert the block // directly. - if (auto *converter = - impl.argConverter.getConverter(action.block->getParent())) { - if (failed(impl.convertBlockSignature(action.block, converter))) { + if (auto *converter = impl.argConverter.getConverter(block->getParent())) { + if (failed(impl.convertBlockSignature(block, converter))) { LLVM_DEBUG(logFailure(impl.logger, "failed to convert types of moved " "block")); return failure(); @@ -2042,9 +2150,9 @@ LogicalResult OperationLegalizer::legalizePatternBlockActions( // If this operation should be considered for re-legalization, try it. if (operationsToIgnore.insert(parentOp).second && failed(legalize(parentOp, rewriter))) { - LLVM_DEBUG(logFailure( - impl.logger, "operation '{0}'({1}) became illegal after block action", - parentOp->getName(), parentOp)); + LLVM_DEBUG(logFailure(impl.logger, + "operation '{0}'({1}) became illegal after rewrite", + parentOp->getName(), parentOp)); return failure(); } } From a93a4ec7dd205b965ee5597314bb376520cd736c Mon Sep 17 00:00:00 2001 From: Orlando Cazalet-Hyams Date: Wed, 14 Feb 2024 15:23:02 +0000 Subject: [PATCH 149/240] Reapply "[DebugInfo][RemoveDIs] Turn on non-instrinsic debug-info by default" This reapplies commit bdde5f9 by undoing the revert fd3a0c185f17. The previous reapplication d759618df763 was reverted due to a crash (reproducer in comments for d759618df763) which was fixed in #81737. As noted in the original commit, this commit may break downstream tests. If this commit is breaking your downstream tests, please see comment 12 in [0], which documents the kind of variation in tests we'd expect to see from this change and what to do about it. [0] https://discourse.llvm.org/t/rfc-instruction-api-changes-needed-to-eliminate-debug-intrinsics-from-ir/68939 --- llvm/lib/IR/BasicBlock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index fe9d0d08c5fe97..bf02eba9fb448d 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -34,7 +34,7 @@ cl::opt UseNewDbgInfoFormat("experimental-debuginfo-iterators", cl::desc("Enable communicating debuginfo positions " "through iterators, eliminating intrinsics"), - cl::init(false)); + cl::init(true)); DPMarker *BasicBlock::createMarker(Instruction *I) { assert(IsNewDbgInfoFormat && From 2347a47622718259c95993c1cab604ad82854b6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Wed, 14 Feb 2024 15:29:18 +0100 Subject: [PATCH 150/240] [clang][Interp][NFC] Make a local variable const --- clang/lib/AST/Interp/Source.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/Interp/Source.cpp b/clang/lib/AST/Interp/Source.cpp index 4e032c92d26df1..45cd0ad4fd4273 100644 --- a/clang/lib/AST/Interp/Source.cpp +++ b/clang/lib/AST/Interp/Source.cpp @@ -33,7 +33,7 @@ SourceRange SourceInfo::getRange() const { } const Expr *SourceInfo::asExpr() const { - if (auto *S = Source.dyn_cast()) + if (const auto *S = Source.dyn_cast()) return dyn_cast(S); return nullptr; } From 8f4cd2c7e3c35f23004c1448705be8f673c2d271 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 14 Feb 2024 17:39:59 +0100 Subject: [PATCH 151/240] [mlir][Transforms] Support `moveOpBefore`/`After` in dialect conversion (#81240) Add a new rewrite class for "operation movements". This rewrite class can roll back `moveOpBefore` and `moveOpAfter`. `RewriterBase::moveOpBefore` and `RewriterBase::moveOpAfter` is no longer virtual. (The dialect conversion can gather all required information for rollbacks from listener notifications.) --- mlir/include/mlir/IR/PatternMatch.h | 6 +- .../mlir/Transforms/DialectConversion.h | 9 +-- .../Transforms/Utils/DialectConversion.cpp | 74 +++++++++++++++---- mlir/test/Transforms/test-legalizer.mlir | 14 ++++ mlir/test/lib/Dialect/Test/TestPatterns.cpp | 20 ++++- 5 files changed, 95 insertions(+), 28 deletions(-) diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 78dcfe7f6fc3d2..b8aeea0d23475b 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -588,8 +588,7 @@ class RewriterBase : public OpBuilder { /// Unlink this operation from its current block and insert it right before /// `iterator` in the specified block. - virtual void moveOpBefore(Operation *op, Block *block, - Block::iterator iterator); + void moveOpBefore(Operation *op, Block *block, Block::iterator iterator); /// Unlink this operation from its current block and insert it right after /// `existingOp` which may be in the same or another block in the same @@ -598,8 +597,7 @@ class RewriterBase : public OpBuilder { /// Unlink this operation from its current block and insert it right after /// `iterator` in the specified block. - virtual void moveOpAfter(Operation *op, Block *block, - Block::iterator iterator); + void moveOpAfter(Operation *op, Block *block, Block::iterator iterator); /// Unlink this block and insert it right before `existingBlock`. void moveBlockBefore(Block *block, Block *anotherBlock); diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 851d639ae68a77..15fa39bde104b9 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -744,8 +744,8 @@ class ConversionPatternRewriter final : public PatternRewriter { /// PatternRewriter hook for updating the given operation in-place. /// Note: These methods only track updates to the given operation itself, - /// and not nested regions. Updates to regions will still require notification - /// through other more specific hooks above. + /// and not nested regions. Updates to regions will still require + /// notification through other more specific hooks above. void startOpModification(Operation *op) override; /// PatternRewriter hook for updating the given operation in-place. @@ -761,11 +761,6 @@ class ConversionPatternRewriter final : public PatternRewriter { // Hide unsupported pattern rewriter API. using OpBuilder::setListener; - void moveOpBefore(Operation *op, Block *block, - Block::iterator iterator) override; - void moveOpAfter(Operation *op, Block *block, - Block::iterator iterator) override; - std::unique_ptr impl; }; diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 9875f8668b65a8..84597fb7986b07 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -760,7 +760,8 @@ class IRRewrite { InlineBlock, MoveBlock, SplitBlock, - BlockTypeConversion + BlockTypeConversion, + MoveOperation }; virtual ~IRRewrite() = default; @@ -982,6 +983,54 @@ class BlockTypeConversionRewrite : public BlockRewrite { // `ArgConverter::applyRewrites`. This should be done in the "commit" method. void rollback() override; }; + +/// An operation rewrite. +class OperationRewrite : public IRRewrite { +public: + /// Return the operation that this rewrite operates on. + Operation *getOperation() const { return op; } + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() >= Kind::MoveOperation && + rewrite->getKind() <= Kind::MoveOperation; + } + +protected: + OperationRewrite(Kind kind, ConversionPatternRewriterImpl &rewriterImpl, + Operation *op) + : IRRewrite(kind, rewriterImpl), op(op) {} + + // The operation that this rewrite operates on. + Operation *op; +}; + +/// Moving of an operation. This rewrite is immediately reflected in the IR. +class MoveOperationRewrite : public OperationRewrite { +public: + MoveOperationRewrite(ConversionPatternRewriterImpl &rewriterImpl, + Operation *op, Block *block, Operation *insertBeforeOp) + : OperationRewrite(Kind::MoveOperation, rewriterImpl, op), block(block), + insertBeforeOp(insertBeforeOp) {} + + static bool classof(const IRRewrite *rewrite) { + return rewrite->getKind() == Kind::MoveOperation; + } + + void rollback() override { + // Move the operation back to its original position. + Block::iterator before = + insertBeforeOp ? Block::iterator(insertBeforeOp) : block->end(); + block->getOperations().splice(before, op->getBlock()->getOperations(), op); + } + +private: + // The block in which this operation was previously contained. + Block *block; + + // The original successor of this operation before it was moved. "nullptr" if + // this operation was the only operation in the region. + Operation *insertBeforeOp; +}; } // namespace //===----------------------------------------------------------------------===// @@ -1478,12 +1527,19 @@ LogicalResult ConversionPatternRewriterImpl::convertNonEntryRegionTypes( void ConversionPatternRewriterImpl::notifyOperationInserted( Operation *op, OpBuilder::InsertPoint previous) { - assert(!previous.isSet() && "expected newly created op"); LLVM_DEBUG({ logger.startLine() << "** Insert : '" << op->getName() << "'(" << op << ")\n"; }); - createdOps.push_back(op); + if (!previous.isSet()) { + // This is a newly created op. + createdOps.push_back(op); + return; + } + Operation *prevOp = previous.getPoint() == previous.getBlock()->end() + ? nullptr + : &*previous.getPoint(); + appendRewrite(op, previous.getBlock(), prevOp); } void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op, @@ -1722,18 +1778,6 @@ void ConversionPatternRewriter::cancelOpModification(Operation *op) { rootUpdates.erase(rootUpdates.begin() + updateIdx); } -void ConversionPatternRewriter::moveOpBefore(Operation *op, Block *block, - Block::iterator iterator) { - llvm_unreachable( - "moving single ops is not supported in a dialect conversion"); -} - -void ConversionPatternRewriter::moveOpAfter(Operation *op, Block *block, - Block::iterator iterator) { - llvm_unreachable( - "moving single ops is not supported in a dialect conversion"); -} - detail::ConversionPatternRewriterImpl &ConversionPatternRewriter::getImpl() { return *impl; } diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index d8cf6e4719cede..84fcc18ab7d370 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -320,3 +320,17 @@ module { return } } + +// ----- + +// CHECK-LABEL: func @test_move_op_before_rollback() +func.func @test_move_op_before_rollback() { + // CHECK: "test.one_region_op"() + // CHECK: "test.hoist_me"() + "test.one_region_op"() ({ + // expected-remark @below{{'test.hoist_me' is not legalizable}} + %0 = "test.hoist_me"() : () -> (i32) + "test.valid"(%0) : (i32) -> () + }) : () -> () + "test.return"() : () -> () +} diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index d7e5d6db50c1fb..1c02232b8adbb1 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -773,6 +773,22 @@ struct TestUndoBlockArgReplace : public ConversionPattern { } }; +/// This pattern hoists ops out of a "test.hoist_me" and then fails conversion. +/// This is to test the rollback logic. +struct TestUndoMoveOpBefore : public ConversionPattern { + TestUndoMoveOpBefore(MLIRContext *ctx) + : ConversionPattern("test.hoist_me", /*benefit=*/1, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + rewriter.moveOpBefore(op, op->getParentOp()); + // Replace with an illegal op to ensure the conversion fails. + rewriter.replaceOpWithNewOp(op, rewriter.getF32Type()); + return success(); + } +}; + /// A rewrite pattern that tests the undo mechanism when erasing a block. struct TestUndoBlockErase : public ConversionPattern { TestUndoBlockErase(MLIRContext *ctx) @@ -1069,7 +1085,7 @@ struct TestLegalizePatternDriver TestChangeProducerTypeF32ToInvalid, TestUpdateConsumerType, TestNonRootReplacement, TestBoundedRecursiveRewrite, TestNestedOpCreationUndoRewrite, TestReplaceEraseOp, - TestCreateUnregisteredOp>(&getContext()); + TestCreateUnregisteredOp, TestUndoMoveOpBefore>(&getContext()); patterns.add(&getContext(), converter); mlir::populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, converter); @@ -1079,7 +1095,7 @@ struct TestLegalizePatternDriver ConversionTarget target(getContext()); target.addLegalOp(); target.addLegalOp(); + TerminatorOp, OneRegionOp>(); target .addIllegalOp(); target.addDynamicallyLegalOp([](TestReturnOp op) { From 4efbf524ad2726f6ff4cb095bebef3e871d4984e Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 14 Feb 2024 08:47:09 -0800 Subject: [PATCH 152/240] [libc][__support][bit] remove compiler has builtin checks (#81679) We only support building llvmlibc with modern compilers. https://libc.llvm.org/compiler_support.html#minimum-supported-versions All versions of the these compilers support these builtins; GCC does not support the short variants. --- libc/src/__support/CPP/bit.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 392fbe248138ae..a8bf75a9a2efac 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -93,15 +93,9 @@ template >> #if LIBC_HAS_BUILTIN(__builtin_ctzs) ADD_SPECIALIZATION(countr_zero, unsigned short, __builtin_ctzs) #endif -#if LIBC_HAS_BUILTIN(__builtin_ctz) ADD_SPECIALIZATION(countr_zero, unsigned int, __builtin_ctz) -#endif -#if LIBC_HAS_BUILTIN(__builtin_ctzl) ADD_SPECIALIZATION(countr_zero, unsigned long, __builtin_ctzl) -#endif -#if LIBC_HAS_BUILTIN(__builtin_ctzll) ADD_SPECIALIZATION(countr_zero, unsigned long long, __builtin_ctzll) -#endif /// Count number of 0's from the most significant bit to the least /// stopping at the first 1. @@ -128,15 +122,9 @@ template >> #if LIBC_HAS_BUILTIN(__builtin_clzs) ADD_SPECIALIZATION(countl_zero, unsigned short, __builtin_clzs) #endif -#if LIBC_HAS_BUILTIN(__builtin_clz) ADD_SPECIALIZATION(countl_zero, unsigned int, __builtin_clz) -#endif -#if LIBC_HAS_BUILTIN(__builtin_clzl) ADD_SPECIALIZATION(countl_zero, unsigned long, __builtin_clzl) -#endif -#if LIBC_HAS_BUILTIN(__builtin_clzll) ADD_SPECIALIZATION(countl_zero, unsigned long long, __builtin_clzll) -#endif #undef ADD_SPECIALIZATION @@ -256,15 +244,9 @@ template >> #if LIBC_HAS_BUILTIN(__builtin_clzs) SPECIALIZE_FLZ(first_leading_zero, unsigned short, __builtin_clzs) #endif -#if LIBC_HAS_BUILTIN(__builtin_clz) SPECIALIZE_FLZ(first_leading_zero, unsigned int, __builtin_clz) -#endif -#if LIBC_HAS_BUILTIN(__builtin_clzl) SPECIALIZE_FLZ(first_leading_zero, unsigned long, __builtin_clzl) -#endif -#if LIBC_HAS_BUILTIN(__builtin_clzll) SPECIALIZE_FLZ(first_leading_zero, unsigned long long, __builtin_clzll) -#endif #undef SPECIALIZE_FLZ From 0f6f5bfe5322f08a96fda149ff70888dc45a2e35 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 14 Feb 2024 08:52:38 -0800 Subject: [PATCH 153/240] [libc][__support][bit] simplify FLZ (#81678) `countl_zero(~x)` *is* `countl_one(x)` --- libc/src/__support/CPP/bit.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index a8bf75a9a2efac..4115d67c7705c0 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -226,30 +226,11 @@ LIBC_INLINE constexpr To bit_or_static_cast(const From &from) { } } -#define SPECIALIZE_FLZ(NAME, TYPE, BUILTIN) \ - template <> [[nodiscard]] LIBC_INLINE constexpr int NAME(TYPE value) { \ - static_assert(cpp::is_unsigned_v); \ - return value == cpp::numeric_limits::max() \ - ? 0 \ - : BUILTIN(static_cast(~value)) + 1; \ - } - template >> [[nodiscard]] LIBC_INLINE constexpr int first_leading_zero(T value) { - return value == cpp::numeric_limits::max() - ? 0 - : countl_zero(static_cast(~value)) + 1; + return value == cpp::numeric_limits::max() ? 0 : countl_one(value) + 1; } -#if LIBC_HAS_BUILTIN(__builtin_clzs) -SPECIALIZE_FLZ(first_leading_zero, unsigned short, __builtin_clzs) -#endif -SPECIALIZE_FLZ(first_leading_zero, unsigned int, __builtin_clz) -SPECIALIZE_FLZ(first_leading_zero, unsigned long, __builtin_clzl) -SPECIALIZE_FLZ(first_leading_zero, unsigned long long, __builtin_clzll) - -#undef SPECIALIZE_FLZ - } // namespace LIBC_NAMESPACE::cpp #endif // LLVM_LIBC_SRC___SUPPORT_CPP_BIT_H From 7c4c274643cedcf9671e1db65361231a7d9bee70 Mon Sep 17 00:00:00 2001 From: Javed Absar <106147771+javedabsar1@users.noreply.github.com> Date: Wed, 14 Feb 2024 17:00:42 +0000 Subject: [PATCH 154/240] [MLIR][NFC] Fix some comments in padding transform. (#81741) --- mlir/lib/Dialect/Linalg/Transforms/Padding.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp b/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp index 278f3499f53e82..8c4b70db248989 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp @@ -23,7 +23,7 @@ using namespace mlir::linalg; #define DBGSNL() (llvm::dbgs() << "\n") /// Compute the padded shape of the given operand. The operand is padded to a -/// static bounding box according to the specified options. +/// static bounding box according to the specified padding options. static LogicalResult computePaddedShape(linalg::LinalgOp opToPad, OpOperand *opOperand, const LinalgPaddingOptions &options, @@ -75,7 +75,7 @@ static LogicalResult computePaddedShape(linalg::LinalgOp opToPad, presburger::BoundType::UB, opOperand->get(), /*dim=*/i, /*stopCondition=*/nullptr, /*closedUB=*/true); if (failed(upperBound)) { - LLVM_DEBUG(DBGS() << "----count not compute a bounding box for padding"); + LLVM_DEBUG(DBGS() << "----could not compute a bounding box for padding"); return failure(); } paddedShape[i] = ceil(*upperBound, shapeDimToMultiple[i]); @@ -89,7 +89,7 @@ static LogicalResult computePaddedShape(linalg::LinalgOp opToPad, /// the nofold flag found in "paddingValues" and "packPaddings", respectively. /// /// Exit early and return the `opOperand` value if it already has the requested -/// shape. I.e.: +/// shape. i.e.: /// - static shape /// - nofold is not set /// - dim sizes are multiples of "padToMultipleOf" @@ -232,7 +232,7 @@ linalg::rewriteAsPaddedOp(RewriterBase &rewriter, LinalgOp opToPad, // Copy back unpadded results to the original destination (i.e., inits of the // linalg op), so that the destination buffer of the computation does not - // change. If the padding folds away, this will materizalize as a memcpy + // change. If the padding folds away, this will materialize as a memcpy // between two identical buffers, which will then also fold away. assert(static_cast(paddedSubtensorResults.size()) == opToPad.getNumDpsInits() && From 60596716c2327575fc8ea16eab3321dabc4a706f Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 14 Feb 2024 09:16:00 -0800 Subject: [PATCH 155/240] [libc][stdbit] implement stdc_first_leading_one (C23) (#81502) --- libc/config/linux/x86_64/entrypoints.txt | 5 ++ libc/docs/stdbit.rst | 48 +++++++++---------- libc/include/llvm-libc-macros/stdbit-macros.h | 22 +++++++++ libc/spec/stdc.td | 10 +++- libc/src/__support/CPP/bit.h | 5 ++ libc/src/stdbit/CMakeLists.txt | 1 + libc/src/stdbit/stdc_first_leading_one_uc.cpp | 20 ++++++++ libc/src/stdbit/stdc_first_leading_one_uc.h | 18 +++++++ libc/src/stdbit/stdc_first_leading_one_ui.cpp | 20 ++++++++ libc/src/stdbit/stdc_first_leading_one_ui.h | 18 +++++++ libc/src/stdbit/stdc_first_leading_one_ul.cpp | 20 ++++++++ libc/src/stdbit/stdc_first_leading_one_ul.h | 18 +++++++ .../src/stdbit/stdc_first_leading_one_ull.cpp | 21 ++++++++ libc/src/stdbit/stdc_first_leading_one_ull.h | 18 +++++++ libc/src/stdbit/stdc_first_leading_one_us.cpp | 21 ++++++++ libc/src/stdbit/stdc_first_leading_one_us.h | 18 +++++++ libc/test/include/stdbit_test.cpp | 15 ++++++ libc/test/src/__support/CPP/bit_test.cpp | 7 +++ libc/test/src/stdbit/CMakeLists.txt | 1 + .../stdbit/stdc_first_leading_one_uc_test.cpp | 21 ++++++++ .../stdbit/stdc_first_leading_one_ui_test.cpp | 21 ++++++++ .../stdbit/stdc_first_leading_one_ul_test.cpp | 21 ++++++++ .../stdc_first_leading_one_ull_test.cpp | 21 ++++++++ .../stdbit/stdc_first_leading_one_us_test.cpp | 21 ++++++++ 24 files changed, 385 insertions(+), 26 deletions(-) create mode 100644 libc/src/stdbit/stdc_first_leading_one_uc.cpp create mode 100644 libc/src/stdbit/stdc_first_leading_one_uc.h create mode 100644 libc/src/stdbit/stdc_first_leading_one_ui.cpp create mode 100644 libc/src/stdbit/stdc_first_leading_one_ui.h create mode 100644 libc/src/stdbit/stdc_first_leading_one_ul.cpp create mode 100644 libc/src/stdbit/stdc_first_leading_one_ul.h create mode 100644 libc/src/stdbit/stdc_first_leading_one_ull.cpp create mode 100644 libc/src/stdbit/stdc_first_leading_one_ull.h create mode 100644 libc/src/stdbit/stdc_first_leading_one_us.cpp create mode 100644 libc/src/stdbit/stdc_first_leading_one_us.h create mode 100644 libc/test/src/stdbit/stdc_first_leading_one_uc_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_leading_one_ui_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_leading_one_ul_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_leading_one_ull_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_leading_one_us_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 75e39ae51fdb06..fc30bcf56665c7 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -117,6 +117,11 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_first_leading_zero_ui libc.src.stdbit.stdc_first_leading_zero_ul libc.src.stdbit.stdc_first_leading_zero_ull + libc.src.stdbit.stdc_first_leading_one_uc + libc.src.stdbit.stdc_first_leading_one_us + libc.src.stdbit.stdc_first_leading_one_ui + libc.src.stdbit.stdc_first_leading_one_ul + libc.src.stdbit.stdc_first_leading_one_ull # stdlib.h entrypoints libc.src.stdlib.abs diff --git a/libc/docs/stdbit.rst b/libc/docs/stdbit.rst index 4f242d21f8b842..5ff36dcb0f5929 100644 --- a/libc/docs/stdbit.rst +++ b/libc/docs/stdbit.rst @@ -41,26 +41,26 @@ stdc_leading_ones_us |check| stdc_leading_ones_ui |check| stdc_leading_ones_ul |check| stdc_leading_ones_ull |check| -stdc_trailing_zeros_uc -stdc_trailing_zeros_us -stdc_trailing_zeros_ui -stdc_trailing_zeros_ul -stdc_trailing_zeros_ull -stdc_trailing_ones_uc -stdc_trailing_ones_us -stdc_trailing_ones_ui -stdc_trailing_ones_ul -stdc_trailing_ones_ull -stdc_first_leading_zero_uc -stdc_first_leading_zero_us -stdc_first_leading_zero_ui -stdc_first_leading_zero_ul -stdc_first_leading_zero_ull -stdc_first_leading_one_uc -stdc_first_leading_one_us -stdc_first_leading_one_ui -stdc_first_leading_one_ul -stdc_first_leading_one_ull +stdc_trailing_zeros_uc |check| +stdc_trailing_zeros_us |check| +stdc_trailing_zeros_ui |check| +stdc_trailing_zeros_ul |check| +stdc_trailing_zeros_ull |check| +stdc_trailing_ones_uc |check| +stdc_trailing_ones_us |check| +stdc_trailing_ones_ui |check| +stdc_trailing_ones_ul |check| +stdc_trailing_ones_ull |check| +stdc_first_leading_zero_uc |check| +stdc_first_leading_zero_us |check| +stdc_first_leading_zero_ui |check| +stdc_first_leading_zero_ul |check| +stdc_first_leading_zero_ull |check| +stdc_first_leading_one_uc |check| +stdc_first_leading_one_us |check| +stdc_first_leading_one_ui |check| +stdc_first_leading_one_ul |check| +stdc_first_leading_one_ull |check| stdc_first_trailing_zero_uc stdc_first_trailing_zero_us stdc_first_trailing_zero_ui @@ -116,10 +116,10 @@ __STDC_ENDIAN_BIG__ __STDC_ENDIAN_NATIVE__ stdc_leading_zeros |check| stdc_leading_ones |check| -stdc_trailing_zeros -stdc_trailing_ones -stdc_first_leading_zero -stdc_first_leading_one +stdc_trailing_zeros |check| +stdc_trailing_ones |check| +stdc_first_leading_zero |check| +stdc_first_leading_one |check| stdc_first_trailing_zero stdc_first_trailing_one stdc_count_zeros diff --git a/libc/include/llvm-libc-macros/stdbit-macros.h b/libc/include/llvm-libc-macros/stdbit-macros.h index 693a45e63419ff..87c590e61e3999 100644 --- a/libc/include/llvm-libc-macros/stdbit-macros.h +++ b/libc/include/llvm-libc-macros/stdbit-macros.h @@ -86,6 +86,21 @@ inline unsigned stdc_first_leading_zero(unsigned long x) { inline unsigned stdc_first_leading_zero(unsigned long long x) { return stdc_first_leading_zero_ull(x); } +inline unsigned stdc_first_leading_one(unsigned char x) { + return stdc_first_leading_one_uc(x); +} +inline unsigned stdc_first_leading_one(unsigned short x) { + return stdc_first_leading_one_us(x); +} +inline unsigned stdc_first_leading_one(unsigned x) { + return stdc_first_leading_one_ui(x); +} +inline unsigned stdc_first_leading_one(unsigned long x) { + return stdc_first_leading_one_ul(x); +} +inline unsigned stdc_first_leading_one(unsigned long long x) { + return stdc_first_leading_one_ull(x); +} #else #define stdc_leading_zeros(x) \ _Generic((x), \ @@ -122,6 +137,13 @@ inline unsigned stdc_first_leading_zero(unsigned long long x) { unsigned: stdc_first_leading_zero_ui, \ unsigned long: stdc_first_leading_zero_ul, \ unsigned long long: stdc_first_leading_zero_ull)(x) +#define stdc_first_leading_one(x) \ + _Generic((x), \ + unsigned char: stdc_first_leading_one_uc, \ + unsigned short: stdc_first_leading_one_us, \ + unsigned: stdc_first_leading_one_ui, \ + unsigned long: stdc_first_leading_one_ul, \ + unsigned long long: stdc_first_leading_one_ull)(x) #endif // __cplusplus #endif // __LLVM_LIBC_MACROS_STDBIT_MACROS_H diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 1720a4a3c3aaf9..5e87831b907fb5 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -781,7 +781,8 @@ def StdC : StandardSpec<"stdc"> { Macro<"stdc_leading_ones">, Macro<"stdc_trailing_zeros">, Macro<"stdc_trailing_ones">, - Macro<"stdc_first_leading_zero"> + Macro<"stdc_first_leading_zero">, + Macro<"stdc_first_leading_one"> ], // Macros [], // Types [], // Enumerations @@ -810,7 +811,12 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"stdc_first_leading_zero_us", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_first_leading_zero_ui", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_first_leading_zero_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_zero_ull", RetValSpec, [ArgSpec]> + FunctionSpec<"stdc_first_leading_zero_ull", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_leading_one_uc", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_leading_one_us", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_leading_one_ui", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_leading_one_ul", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_leading_one_ull", RetValSpec, [ArgSpec]> ] // Functions >; diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 4115d67c7705c0..23e305ab86219e 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -231,6 +231,11 @@ template >> return value == cpp::numeric_limits::max() ? 0 : countl_one(value) + 1; } +template >> +[[nodiscard]] LIBC_INLINE constexpr int first_leading_one(T value) { + return first_leading_zero(static_cast(~value)); +} + } // namespace LIBC_NAMESPACE::cpp #endif // LLVM_LIBC_SRC___SUPPORT_CPP_BIT_H diff --git a/libc/src/stdbit/CMakeLists.txt b/libc/src/stdbit/CMakeLists.txt index 65d5f344307475..6ee93861b8db40 100644 --- a/libc/src/stdbit/CMakeLists.txt +++ b/libc/src/stdbit/CMakeLists.txt @@ -4,6 +4,7 @@ set(prefixes trailing_zeros trailing_ones first_leading_zero + first_leading_one ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/src/stdbit/stdc_first_leading_one_uc.cpp b/libc/src/stdbit/stdc_first_leading_one_uc.cpp new file mode 100644 index 00000000000000..02871595fdb6b8 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_uc.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_first_leading_one_uc -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_leading_one_uc.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_uc, (unsigned char value)) { + return static_cast(cpp::first_leading_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_leading_one_uc.h b/libc/src/stdbit/stdc_first_leading_one_uc.h new file mode 100644 index 00000000000000..58892c3f0ff298 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_uc.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_leading_one_uc -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UC_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UC_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_leading_one_uc(unsigned char value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UC_H diff --git a/libc/src/stdbit/stdc_first_leading_one_ui.cpp b/libc/src/stdbit/stdc_first_leading_one_ui.cpp new file mode 100644 index 00000000000000..a6c7ef5a833914 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_ui.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_first_leading_one_ui -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_leading_one_ui.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_ui, (unsigned value)) { + return static_cast(cpp::first_leading_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_leading_one_ui.h b/libc/src/stdbit/stdc_first_leading_one_ui.h new file mode 100644 index 00000000000000..613adf4e1ff762 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_ui.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_leading_one_ui -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UI_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UI_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_leading_one_ui(unsigned value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UI_H diff --git a/libc/src/stdbit/stdc_first_leading_one_ul.cpp b/libc/src/stdbit/stdc_first_leading_one_ul.cpp new file mode 100644 index 00000000000000..d1bcab5dda02a9 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_ul.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_first_leading_one_ul -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_leading_one_ul.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_ul, (unsigned long value)) { + return static_cast(cpp::first_leading_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_leading_one_ul.h b/libc/src/stdbit/stdc_first_leading_one_ul.h new file mode 100644 index 00000000000000..47c179f3fbacd1 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_ul.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_leading_one_ul -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UL_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_leading_one_ul(unsigned long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_UL_H diff --git a/libc/src/stdbit/stdc_first_leading_one_ull.cpp b/libc/src/stdbit/stdc_first_leading_one_ull.cpp new file mode 100644 index 00000000000000..7be8f1051ec231 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_ull.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_leading_one_ull ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_leading_one_ull.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_ull, + (unsigned long long value)) { + return static_cast(cpp::first_leading_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_leading_one_ull.h b/libc/src/stdbit/stdc_first_leading_one_ull.h new file mode 100644 index 00000000000000..344d03f7100f47 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_ull.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_leading_one_ull ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_ULL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_ULL_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_leading_one_ull(unsigned long long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_ULL_H diff --git a/libc/src/stdbit/stdc_first_leading_one_us.cpp b/libc/src/stdbit/stdc_first_leading_one_us.cpp new file mode 100644 index 00000000000000..7a4c7e673f367f --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_us.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_leading_one_us -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_leading_one_us.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_leading_one_us, + (unsigned short value)) { + return static_cast(cpp::first_leading_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_leading_one_us.h b/libc/src/stdbit/stdc_first_leading_one_us.h new file mode 100644 index 00000000000000..9d5feaf1e92f08 --- /dev/null +++ b/libc/src/stdbit/stdc_first_leading_one_us.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_leading_one_us -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_US_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_US_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_leading_one_us(unsigned short value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_LEADING_ONE_US_H diff --git a/libc/test/include/stdbit_test.cpp b/libc/test/include/stdbit_test.cpp index 9a66a76de20bc5..c2fbcb8ce2d321 100644 --- a/libc/test/include/stdbit_test.cpp +++ b/libc/test/include/stdbit_test.cpp @@ -50,6 +50,13 @@ unsigned stdc_first_leading_zero_ul(unsigned long) noexcept { return 0xEDU; } unsigned stdc_first_leading_zero_ull(unsigned long long) noexcept { return 0xEFU; } +unsigned stdc_first_leading_one_uc(unsigned char) noexcept { return 0xFAU; } +unsigned stdc_first_leading_one_us(unsigned short) noexcept { return 0xFBU; } +unsigned stdc_first_leading_one_ui(unsigned) noexcept { return 0xFCU; } +unsigned stdc_first_leading_one_ul(unsigned long) noexcept { return 0xFDU; } +unsigned stdc_first_leading_one_ull(unsigned long long) noexcept { + return 0xFFU; +} } #include "include/llvm-libc-macros/stdbit-macros.h" @@ -93,3 +100,11 @@ TEST(LlvmLibcStdbitTest, TypeGenericMacroFirstLeadingZero) { EXPECT_EQ(stdc_first_leading_zero(0UL), 0xEDU); EXPECT_EQ(stdc_first_leading_zero(0ULL), 0xEFU); } + +TEST(LlvmLibcStdbitTest, TypeGenericMacroFirstLeadingOne) { + EXPECT_EQ(stdc_first_leading_one(static_cast(0U)), 0xFAU); + EXPECT_EQ(stdc_first_leading_one(static_cast(0U)), 0xFBU); + EXPECT_EQ(stdc_first_leading_one(0U), 0xFCU); + EXPECT_EQ(stdc_first_leading_one(0UL), 0xFDU); + EXPECT_EQ(stdc_first_leading_one(0ULL), 0xFFU); +} diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 00d8ca5d293ace..a70726d4feb241 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -213,4 +213,11 @@ TYPED_TEST(LlvmLibcBitTest, FirstLeadingZero, UnsignedTypes) { cpp::numeric_limits::digits - i); } +TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypes) { + EXPECT_EQ(first_leading_one(static_cast(0)), 0); + for (int i = 0U; i != cpp::numeric_limits::digits; ++i) + EXPECT_EQ(first_leading_one(T(1) << i), + cpp::numeric_limits::digits - i); +} + } // namespace LIBC_NAMESPACE::cpp diff --git a/libc/test/src/stdbit/CMakeLists.txt b/libc/test/src/stdbit/CMakeLists.txt index bc7e49d186a0d5..e32663f88e8620 100644 --- a/libc/test/src/stdbit/CMakeLists.txt +++ b/libc/test/src/stdbit/CMakeLists.txt @@ -6,6 +6,7 @@ set(prefixes trailing_zeros trailing_ones first_leading_zero + first_leading_one ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/test/src/stdbit/stdc_first_leading_one_uc_test.cpp b/libc/test/src/stdbit/stdc_first_leading_one_uc_test.cpp new file mode 100644 index 00000000000000..b8c8db587098e4 --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_leading_one_uc_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_first_leading_one_uc ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_leading_one_uc.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstLeadingOneUcTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_uc(0U), 0U); +} + +TEST(LlvmLibcStdcFirstLeadingOneUcTest, OneHot) { + for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_uc(1U << i), + UCHAR_WIDTH - i); +} diff --git a/libc/test/src/stdbit/stdc_first_leading_one_ui_test.cpp b/libc/test/src/stdbit/stdc_first_leading_one_ui_test.cpp new file mode 100644 index 00000000000000..319d7482c50f9e --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_leading_one_ui_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_first_leading_one_ui ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_leading_one_ui.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstLeadingOneUiTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_ui(0U), 0U); +} + +TEST(LlvmLibcStdcFirstLeadingOneUiTest, OneHot) { + for (unsigned i = 0U; i != UINT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_ui(1U << i), + UINT_WIDTH - i); +} diff --git a/libc/test/src/stdbit/stdc_first_leading_one_ul_test.cpp b/libc/test/src/stdbit/stdc_first_leading_one_ul_test.cpp new file mode 100644 index 00000000000000..5884cec418ce2b --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_leading_one_ul_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_first_leading_one_ul ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_leading_one_ul.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstLeadingOneUlTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_ul(0UL), 0U); +} + +TEST(LlvmLibcStdcFirstLeadingOneUlTest, OneHot) { + for (unsigned i = 0U; i != ULONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_ul(1UL << i), + ULONG_WIDTH - i); +} diff --git a/libc/test/src/stdbit/stdc_first_leading_one_ull_test.cpp b/libc/test/src/stdbit/stdc_first_leading_one_ull_test.cpp new file mode 100644 index 00000000000000..bf57f16c1dcaaa --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_leading_one_ull_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_first_leading_one_ull --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_leading_one_ull.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstLeadingOneUllTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_ull(0ULL), 0U); +} + +TEST(LlvmLibcStdcFirstLeadingOneUllTest, OneHot) { + for (unsigned i = 0U; i != ULLONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_ull(1ULL << i), + ULLONG_WIDTH - i); +} diff --git a/libc/test/src/stdbit/stdc_first_leading_one_us_test.cpp b/libc/test/src/stdbit/stdc_first_leading_one_us_test.cpp new file mode 100644 index 00000000000000..e9488335d9b008 --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_leading_one_us_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_first_leading_one_us ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_leading_one_us.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstLeadingOneUsTest, Zero) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_us(0U), 0U); +} + +TEST(LlvmLibcStdcFirstLeadingOneUsTest, OneHot) { + for (unsigned i = 0U; i != USHRT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_us(1U << i), + USHRT_WIDTH - i); +} From 6f907733e65d24edad65f763fb14402464bd578b Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 14 Feb 2024 18:26:38 +0100 Subject: [PATCH 156/240] [lld/ELF] Avoid unnecessary TPOFF relocations in GOT for -pie (#81739) With the new SystemZ port we noticed that -pie executables generated from files containing R_390_TLS_IEENT relocations will have unnecessary relocations in their GOT: 9e8d8: R_390_TLS_TPOFF *ABS*+0x18 This is caused by the config->isPic conditon in addTpOffsetGotEntry: static void addTpOffsetGotEntry(Symbol &sym) { in.got->addEntry(sym); uint64_t off = sym.getGotOffset(); if (!sym.isPreemptible && !config->isPic) { in.got->addConstant({R_TPREL, target->symbolicRel, off, 0, &sym}); return; } It is correct that we need to retain a TPOFF relocation if the target symbol is preemptible or if we're building a shared library. But when building a -pie executable, those values are fixed at link time and there's no need for any remaining dynamic relocation. Note that the equivalent MIPS-specific code in MipsGotSection::build checks for config->shared instead of config->isPic; we should use the same check here. (Note also that on many other platforms we're not even using addTpOffsetGotEntry in this case as an IE->LE relaxation is applied before; we don't have this type of relaxation on SystemZ.) --- lld/ELF/Relocations.cpp | 2 +- lld/test/ELF/systemz-tls-ie.s | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index f64b4219e0acc1..619fbaf5dc5452 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -940,7 +940,7 @@ void elf::addGotEntry(Symbol &sym) { static void addTpOffsetGotEntry(Symbol &sym) { in.got->addEntry(sym); uint64_t off = sym.getGotOffset(); - if (!sym.isPreemptible && !config->isPic) { + if (!sym.isPreemptible && !config->shared) { in.got->addConstant({R_TPREL, target->symbolicRel, off, 0, &sym}); return; } diff --git a/lld/test/ELF/systemz-tls-ie.s b/lld/test/ELF/systemz-tls-ie.s index 27b642ed2dfc5f..85e2f24cb61f62 100644 --- a/lld/test/ELF/systemz-tls-ie.s +++ b/lld/test/ELF/systemz-tls-ie.s @@ -12,6 +12,14 @@ # RUN: llvm-objdump --section .data --full-contents %t | FileCheck --check-prefix=LE-DATA %s # RUN: llvm-objdump --section .got --full-contents %t | FileCheck --check-prefix=LE-GOT %s +## With -pie we still have the R_390_RELATIVE for the data element, but all GOT +## entries should be fully resolved without any remaining R_390_TLS_TPOFF. +# RUN: ld.lld -pie %t.o -o %t.pie +# RUN: llvm-readelf -r %t.pie | FileCheck --check-prefix=PIE-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.pie | FileCheck --check-prefix=PIE %s +# RUN: llvm-objdump --section .data --full-contents %t.pie | FileCheck --check-prefix=PIE-DATA %s +# RUN: llvm-objdump --section .got --full-contents %t.pie | FileCheck --check-prefix=PIE-GOT %s + # IE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 4 entries: # IE-REL: 0000000000003478 000000000000000c R_390_RELATIVE 2460 # IE-REL: 0000000000002460 0000000100000038 R_390_TLS_TPOFF 0000000000000008 a + 0 @@ -58,6 +66,32 @@ # LE-GOT: 1002248 00000000 00000000 ffffffff fffffff8 # LE-GOT: 1002258 ffffffff fffffffc 00000000 00000000 +# PIE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# PIE-REL: 00000000000033d0 000000000000000c R_390_RELATIVE 23b8 + +## TP offset for a is at 0x23b8 +# PIE: lgrl %r1, 0x23b8 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x23c0 +# PIE-NEXT: lgrl %r1, 0x23c0 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x23c8 +# PIE-NEXT: lgrl %r1, 0x23c8 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## Data element: TP offset for a is at 0x23b8 (relocated via R_390_RELATIVE above) +# PIE-DATA: 33d0 00000000 00000000 + +## TP offsets in GOT: +# a: -8 +# b: -4 +# c: 0 +# PIE-GOT: 23a0 00000000 000022d0 00000000 00000000 +# PIE-GOT: 23b0 00000000 00000000 ffffffff fffffff8 +# PIE-GOT: 23c0 ffffffff fffffffc 00000000 00000000 + ear %r7,%a0 sllg %r7,%r1,32 ear %r7,%a1 From 411554a3535e55a1436ccda80064d7a91814dc27 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 14 Feb 2024 18:35:54 +0100 Subject: [PATCH 157/240] [libc][NFC] Use user defined literals to build 128 and 256 bit constants. (#81746) --- libc/src/__support/integer_literals.h | 10 +- libc/src/math/generic/CMakeLists.txt | 40 +- libc/src/math/generic/exp.cpp | 24 +- libc/src/math/generic/exp10.cpp | 20 +- libc/src/math/generic/exp2.cpp | 20 +- libc/src/math/generic/expm1.cpp | 26 +- libc/src/math/generic/log.cpp | 1257 ++++++++-------- libc/src/math/generic/log10.cpp | 1262 +++++++++-------- libc/src/math/generic/log1p.cpp | 944 ++++++------ libc/src/math/generic/log2.cpp | 1251 ++++++++-------- libc/test/src/__support/CMakeLists.txt | 4 +- .../test/src/__support/FPUtil/fpbits_test.cpp | 42 +- .../src/__support/integer_to_string_test.cpp | 39 +- .../src/__support/str_to_long_double_test.cpp | 48 +- .../llvm-project-overlay/libc/BUILD.bazel | 24 +- .../libc/test/src/__support/BUILD.bazel | 2 + 16 files changed, 2523 insertions(+), 2490 deletions(-) diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h index c8e965c1a03a05..ae09c5462a435b 100644 --- a/libc/src/__support/integer_literals.h +++ b/libc/src/__support/integer_literals.h @@ -22,19 +22,19 @@ namespace LIBC_NAMESPACE { LIBC_INLINE constexpr uint8_t operator""_u8(unsigned long long value) { - return value; + return static_cast(value); } LIBC_INLINE constexpr uint16_t operator""_u16(unsigned long long value) { - return value; + return static_cast(value); } LIBC_INLINE constexpr uint32_t operator""_u32(unsigned long long value) { - return value; + return static_cast(value); } LIBC_INLINE constexpr uint64_t operator""_u64(unsigned long long value) { - return value; + return static_cast(value); } namespace internal { @@ -76,7 +76,7 @@ template struct DigitBuffer { // Returns the digit for a particular character. // Returns 255 if the character is invalid. LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) { - const auto to_lower = [](char c) { return c | 32; }; + const auto to_lower = [](char c) -> uint8_t { return c | 32; }; const auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; const auto is_alpha = [](char c) { return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index fdf383f070697e..ab7f5a91ab7bac 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -622,6 +622,8 @@ add_entrypoint_object( DEPENDS .common_constants .explogxf + libc.include.errno + libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -632,10 +634,9 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double + libc.src.__support.integer_literals libc.src.__support.macros.optimization - libc.include.errno libc.src.errno.errno - libc.include.math COMPILE_OPTIONS -O3 ) @@ -672,6 +673,8 @@ add_entrypoint_object( DEPENDS .common_constants .explogxf + libc.include.errno + libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -682,10 +685,9 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double + libc.src.__support.integer_literals libc.src.__support.macros.optimization - libc.include.errno libc.src.errno.errno - libc.include.math COMPILE_OPTIONS -O3 ) @@ -731,6 +733,8 @@ add_entrypoint_object( DEPENDS .common_constants .explogxf + libc.include.errno + libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -741,10 +745,9 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double + libc.src.__support.integer_literals libc.src.__support.macros.optimization - libc.include.errno libc.src.errno.errno - libc.include.math COMPILE_OPTIONS -O3 ) @@ -791,6 +794,8 @@ add_entrypoint_object( DEPENDS .common_constants .explogxf + libc.include.errno + libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -801,10 +806,9 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double + libc.src.__support.integer_literals libc.src.__support.macros.optimization - libc.include.errno libc.src.errno.errno - libc.include.math COMPILE_OPTIONS -O3 ) @@ -1061,12 +1065,13 @@ add_entrypoint_object( DEPENDS .common_constants .log_range_reduction + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float + libc.src.__support.integer_literals libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 @@ -1097,12 +1102,13 @@ add_entrypoint_object( ../log1p.h DEPENDS .common_constants + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float + libc.src.__support.integer_literals libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 @@ -1135,12 +1141,13 @@ add_entrypoint_object( DEPENDS .common_constants .log_range_reduction + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float + libc.src.__support.integer_literals libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 @@ -1173,12 +1180,13 @@ add_entrypoint_object( DEPENDS .common_constants .log_range_reduction + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float + libc.src.__support.integer_literals libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 diff --git a/libc/src/math/generic/exp.cpp b/libc/src/math/generic/exp.cpp index f23170f8ed4259..42a4491131a04e 100644 --- a/libc/src/math/generic/exp.cpp +++ b/libc/src/math/generic/exp.cpp @@ -21,6 +21,7 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" +#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -31,6 +32,7 @@ using fputil::DoubleDouble; using fputil::TripleDouble; using Float128 = typename fputil::DyadicFloat<128>; using Sign = fputil::Sign; +using LIBC_NAMESPACE::operator""_u128; // log2(e) constexpr double LOG2_E = 0x1.71547652b82fep+0; @@ -97,21 +99,15 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // For |dx| < 2^-13 + 2^-30: // | output - exp(dx) | < 2^-126. Float128 poly_approx_f128(const Float128 &dx) { - using MType = typename Float128::MantissaType; - constexpr Float128 COEFFS_128[]{ - {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 - {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 - {Sign::POS, -128, MType({0, 0x8000000000000000})}, // 0.5 - {Sign::POS, -130, MType({0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaaa})}, // 1/6 - {Sign::POS, -132, - MType({0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaaa})}, // 1/24 - {Sign::POS, -134, - MType({0x8888888888888889, 0x8888888888888888})}, // 1/120 - {Sign::POS, -137, - MType({0x60b60b60b60b60b6, 0xb60b60b60b60b60b})}, // 1/720 - {Sign::POS, -140, - MType({0x00d00d00d00d00d0, 0xd00d00d00d00d00d})}, // 1/5040 + {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 + {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 + {Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5 + {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/6 + {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/24 + {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/120 + {Sign::POS, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/720 + {Sign::POS, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/5040 }; Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2], diff --git a/libc/src/math/generic/exp10.cpp b/libc/src/math/generic/exp10.cpp index 6b40f5561845d8..72ece669765688 100644 --- a/libc/src/math/generic/exp10.cpp +++ b/libc/src/math/generic/exp10.cpp @@ -21,6 +21,7 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" +#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -31,6 +32,7 @@ using fputil::DoubleDouble; using fputil::TripleDouble; using Float128 = typename fputil::DyadicFloat<128>; using Sign = fputil::Sign; +using LIBC_NAMESPACE::operator""_u128; // log2(10) constexpr double LOG2_10 = 0x1.a934f0979a371p+1; @@ -99,17 +101,15 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // For |dx| < 2^-14: // | output - 10^dx | < 1.5 * 2^-124. Float128 poly_approx_f128(const Float128 &dx) { - using MType = typename Float128::MantissaType; - constexpr Float128 COEFFS_128[]{ - {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 - {Sign::POS, -126, MType({0xea56d62b82d30a2d, 0x935d8dddaaa8ac16})}, - {Sign::POS, -126, MType({0x80a99ce75f4d5bdb, 0xa9a92639e753443a})}, - {Sign::POS, -126, MType({0x6a4f9d7dbf6c9635, 0x82382c8ef1652304})}, - {Sign::POS, -124, MType({0x345787019216c7af, 0x12bd7609fd98c44c})}, - {Sign::POS, -127, MType({0xcc41ed7e0d27aee5, 0x450a7ff47535d889})}, - {Sign::POS, -130, MType({0x8326bb91a6e7601d, 0xd3f6b844702d636b})}, - {Sign::POS, -130, MType({0xfa7b46df314112a9, 0x45b937f0d05bb1cd})}, + {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 + {Sign::POS, -126, 0x935d8ddd'aaa8ac16'ea56d62b'82d30a2d_u128}, + {Sign::POS, -126, 0xa9a92639'e753443a'80a99ce7'5f4d5bdb_u128}, + {Sign::POS, -126, 0x82382c8e'f1652304'6a4f9d7d'bf6c9635_u128}, + {Sign::POS, -124, 0x12bd7609'fd98c44c'34578701'9216c7af_u128}, + {Sign::POS, -127, 0x450a7ff4'7535d889'cc41ed7e'0d27aee5_u128}, + {Sign::POS, -130, 0xd3f6b844'702d636b'8326bb91'a6e7601d_u128}, + {Sign::POS, -130, 0x45b937f0'd05bb1cd'fa7b46df'314112a9_u128}, }; Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2], diff --git a/libc/src/math/generic/exp2.cpp b/libc/src/math/generic/exp2.cpp index 01e66d1ae00f70..83f545eb116bd3 100644 --- a/libc/src/math/generic/exp2.cpp +++ b/libc/src/math/generic/exp2.cpp @@ -21,6 +21,7 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" +#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -31,6 +32,7 @@ using fputil::DoubleDouble; using fputil::TripleDouble; using Float128 = typename fputil::DyadicFloat<128>; using Sign = fputil::Sign; +using LIBC_NAMESPACE::operator""_u128; // Error bounds: // Errors when using double precision. @@ -88,17 +90,15 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // For |dx| < 2^-13 + 2^-30: // | output - exp(dx) | < 2^-126. Float128 poly_approx_f128(const Float128 &dx) { - using MType = typename Float128::MantissaType; - constexpr Float128 COEFFS_128[]{ - {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 - {Sign::POS, -128, MType({0xc9e3b39803f2f6af, 0xb17217f7d1cf79ab})}, - {Sign::POS, -128, MType({0xde2d60dd9c9a1d9f, 0x3d7f7bff058b1d50})}, - {Sign::POS, -132, MType({0x9d3b15d9e7fb6897, 0xe35846b82505fc59})}, - {Sign::POS, -134, MType({0x184462f6bcd2b9e7, 0x9d955b7dd273b94e})}, - {Sign::POS, -137, MType({0x39ea1bb964c51a89, 0xaec3ff3c53398883})}, - {Sign::POS, -138, MType({0x842c53418fa8ae61, 0x2861225f345c396a})}, - {Sign::POS, -144, MType({0x7abeb5abd5ad2079, 0xffe5fe2d109a319d})}, + {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 + {Sign::POS, -128, 0xb17217f7'd1cf79ab'c9e3b398'03f2f6af_u128}, + {Sign::POS, -128, 0x3d7f7bff'058b1d50'de2d60dd'9c9a1d9f_u128}, + {Sign::POS, -132, 0xe35846b8'2505fc59'9d3b15d9'e7fb6897_u128}, + {Sign::POS, -134, 0x9d955b7d'd273b94e'184462f6'bcd2b9e7_u128}, + {Sign::POS, -137, 0xaec3ff3c'53398883'39ea1bb9'64c51a89_u128}, + {Sign::POS, -138, 0x2861225f'345c396a'842c5341'8fa8ae61_u128}, + {Sign::POS, -144, 0xffe5fe2d'109a319d'7abeb5ab'd5ad2079_u128}, }; Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2], diff --git a/libc/src/math/generic/expm1.cpp b/libc/src/math/generic/expm1.cpp index c1fb80309d7b46..9f14a8c2068ec1 100644 --- a/libc/src/math/generic/expm1.cpp +++ b/libc/src/math/generic/expm1.cpp @@ -22,6 +22,7 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" +#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -39,6 +40,7 @@ using fputil::DoubleDouble; using fputil::TripleDouble; using Float128 = typename fputil::DyadicFloat<128>; using Sign = fputil::Sign; +using LIBC_NAMESPACE::operator""_u128; // log2(e) constexpr double LOG2_E = 0x1.71547652b82fep+0; @@ -107,20 +109,14 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // For |dx| < 2^-13 + 2^-30: // | output - exp(dx) | < 2^-126. Float128 poly_approx_f128(const Float128 &dx) { - using MType = typename Float128::MantissaType; - constexpr Float128 COEFFS_128[]{ - {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 - {Sign::POS, -128, MType({0, 0x8000000000000000})}, // 0.5 - {Sign::POS, -130, MType({0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaaa})}, // 1/6 - {Sign::POS, -132, - MType({0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaaa})}, // 1/24 - {Sign::POS, -134, - MType({0x8888888888888889, 0x8888888888888888})}, // 1/120 - {Sign::POS, -137, - MType({0x60b60b60b60b60b6, 0xb60b60b60b60b60b})}, // 1/720 - {Sign::POS, -140, - MType({0x00d00d00d00d00d0, 0xd00d00d00d00d00d})}, // 1/5040 + {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 + {Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5 + {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/6 + {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/24 + {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/120 + {Sign::POS, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/720 + {Sign::POS, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/5040 }; Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2], @@ -146,7 +142,6 @@ std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) { // TODO(lntue): investigate triple-double precision implementation for this // step. Float128 expm1_f128(double x, double kd, int idx1, int idx2) { - using MType = typename Float128::MantissaType; // Recalculate dx: double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact @@ -170,7 +165,8 @@ Float128 expm1_f128(double x, double kd, int idx1, int idx2) { Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2); int hi = static_cast(kd) >> 12; - Float128 minus_one{Sign::NEG, -127 - hi, MType({0, 0x8000000000000000})}; + Float128 minus_one{Sign::NEG, -127 - hi, + 0x80000000'00000000'00000000'00000000_u128}; Float128 exp_mid_m1 = fputil::quick_add(exp_mid, minus_one); diff --git a/libc/src/math/generic/log.cpp b/libc/src/math/generic/log.cpp index 9edc52b8a8e24c..d770112599a98f 100644 --- a/libc/src/math/generic/log.cpp +++ b/libc/src/math/generic/log.cpp @@ -14,6 +14,7 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" +#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "common_constants.h" @@ -23,8 +24,8 @@ namespace LIBC_NAMESPACE { // 128-bit precision dyadic floating point numbers. using Float128 = typename fputil::DyadicFloat<128>; -using MType = typename Float128::MantissaType; using Sign = fputil::Sign; +using LIBC_NAMESPACE::operator""_u128; namespace { @@ -34,150 +35,152 @@ constexpr double HI_ERR = 0x1.0p-85; // Extra errors from P is from using x^2 to reduce evaluation latency. constexpr double P_ERR = 0x1.0p-50; -// log(2) with 128-bit prepcision generated by SageMath with: -// sage: (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent(); -// sage: print("MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})"); +// log(2) with 128-bit precision generated by SageMath with: +// def format_hex(value): +// l = hex(value)[2:] +// n = 8 +// x = [l[i:i + n] for i in range(0, len(l), n)] +// return "0x" + "'".join(x) + "_uint128" +// (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent(); +// print(format_hex(m)); const Float128 LOG_2(Sign::POS, /*exponent=*/-128, /*mantissa=*/ - MType({0xc9e3b39803f2f6af, 0xb17217f7d1cf79ab})); + 0xb17217f7'd1cf79ab'c9e3b398'03f2f6af_u128); alignas(64) const LogRR LOG_TABLE = { // -log(r) with 128-bit precision generated by SageMath with: - // // for i in range(128): // r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^(-7)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); - // print("{Sign::POS,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) - // % 2^64), - // "})},"); + // print("{Sign::POS,", e, ", format_hex(m), "},"); /* .step_1= */ { - {Sign::POS, 0, MType(0)}, - {Sign::POS, -134, MType({0x662d417ced007a46, 0x8080abac46f38946})}, - {Sign::POS, -133, MType({0x91d082dce3ddcd38, 0x8102b2c49ac23a4f})}, - {Sign::POS, -133, MType({0xda5f3cc0b3251dbd, 0xc24929464655f45c})}, - {Sign::POS, -132, MType({0xb9e3aea6c444ef07, 0x820aec4f3a222380})}, - {Sign::POS, -132, MType({0x521016bd904dc968, 0xa33576a16f1f4c64})}, - {Sign::POS, -132, MType({0xbe97660a23cc540d, 0xc4a550a4fd9a19a8})}, - {Sign::POS, -132, MType({0xe09f5fe2058d6006, 0xe65b9e6eed965c36})}, - {Sign::POS, -131, MType({0x1fecdfa819b96098, 0x842cc5acf1d03445})}, - {Sign::POS, -131, MType({0xa7c9859530a45153, 0x8cb9de8a32ab368a})}, - {Sign::POS, -131, MType({0x976d3b5b45f6ca0b, 0x9defad3e8f73217a})}, - {Sign::POS, -131, MType({0xe8b8b88a14ff0ce, 0xaf4ad26cbc8e5be7})}, - {Sign::POS, -131, MType({0x6a677b4c8bec22e1, 0xb8069857560707a3})}, - {Sign::POS, -131, MType({0xeaf51f66692844ba, 0xc99af2eaca4c4570})}, - {Sign::POS, -131, MType({0xa8112e35a60e6375, 0xdb56446d6ad8deff})}, - {Sign::POS, -131, MType({0x196ab34ce0bccd12, 0xe442c00de2591b47})}, - {Sign::POS, -131, MType({0x4066e87f2c0f7340, 0xf639cc185088fe5d})}, - {Sign::POS, -131, MType({0xc17bd40d8d9291ec, 0xff4489cedeab2ca6})}, - {Sign::POS, -130, MType({0x9c5a0fe396f40f1e, 0x88bc74113f23def1})}, - {Sign::POS, -130, MType({0x88713268840cbcc0, 0x8d515bf11fb94f1c})}, - {Sign::POS, -130, MType({0x65c0da506a088484, 0x968b08643409ceb6})}, - {Sign::POS, -130, MType({0x411a5b944aca8708, 0x9b2fe580ac80b17d})}, - {Sign::POS, -130, MType({0xa9fb6cf0ecb411b7, 0xa489ec199dab06f2})}, - {Sign::POS, -130, MType({0xcad2fb8d48054ae0, 0xa93f2f250dac67d1})}, - {Sign::POS, -130, MType({0x2c3c2e77904afa78, 0xb2ba75f46099cf8b})}, - {Sign::POS, -130, MType({0x34c7bc3d32750fde, 0xb780945bab55dce4})}, - {Sign::POS, -130, MType({0x9a631e830fd30904, 0xc11e0b2a8d1e0ddb})}, - {Sign::POS, -130, MType({0xaa8b6997a402bf30, 0xc5f57f59c7f46155})}, - {Sign::POS, -130, MType({0x2c507fb7a3d0bf6a, 0xcad2d6e7b80bf914})}, - {Sign::POS, -130, MType({0x5f53bd2e406e66e7, 0xd49f69e456cf1b79})}, - {Sign::POS, -130, MType({0x58a98f2ad65bee9b, 0xd98ec2bade71e539})}, - {Sign::POS, -130, MType({0x4d57da945b5d0aaa, 0xde8439c1dec56877})}, - {Sign::POS, -130, MType({0xc524848e3443e040, 0xe881bf932af3dac0})}, - {Sign::POS, -130, MType({0x11d49f96cb88317b, 0xed89ed86a44a01aa})}, - {Sign::POS, -130, MType({0x3b020fa1820c9492, 0xf29877ff38809091})}, - {Sign::POS, -130, MType({0x54d2238f75f969b1, 0xf7ad6f26e7ff2ef7})}, - {Sign::POS, -130, MType({0xca0cdf301431b60f, 0xfcc8e3659d9bcbec})}, - {Sign::POS, -129, MType({0x62dda9d2270fa1f4, 0x8389c3026ac3139b})}, - {Sign::POS, -129, MType({0x163ceae88f720f1e, 0x86216b3b0b17188b})}, - {Sign::POS, -129, MType({0x9c5a0fe396f40f1e, 0x88bc74113f23def1})}, - {Sign::POS, -129, MType({0xf7a5168126a58b9a, 0x8b5ae65d67db9acd})}, - {Sign::POS, -129, MType({0x5147bdb6ddcaf59c, 0x8dfccb1ad35ca6ed})}, - {Sign::POS, -129, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, - {Sign::POS, -129, MType({0x4a5004f3ef063313, 0x95f783e6e49a9cfa})}, - {Sign::POS, -129, MType({0x2cdec34784707839, 0x98a78f0e9ae71d85})}, - {Sign::POS, -129, MType({0xd878bbe3d392be25, 0x9b5b3bb5f088b766})}, - {Sign::POS, -129, MType({0x5b035eae273a855f, 0x9e1293b9998c1daa})}, - {Sign::POS, -129, MType({0xbb2438273918db7e, 0xa0cda11eaf46390d})}, - {Sign::POS, -129, MType({0xf698298adddd7f32, 0xa38c6e138e20d831})}, - {Sign::POS, -129, MType({0xe4f5275c2d15c21f, 0xa64f04f0b961df76})}, - {Sign::POS, -129, MType({0x8164c759686a2209, 0xa9157039c51ebe70})}, - {Sign::POS, -129, MType({0xf72ea07749ce6bd3, 0xabdfba9e468fd6f6})}, - {Sign::POS, -129, MType({0x7dd6e688ebb13b03, 0xaeadeefacaf97d35})}, - {Sign::POS, -129, MType({0x18ce51fff99479cd, 0xb1801859d56249dc})}, - {Sign::POS, -129, MType({0x2756eba00bc33978, 0xb45641f4e350a0d3})}, - {Sign::POS, -129, MType({0xbe1116c3466beb6d, 0xb730773578cb90b2})}, - {Sign::POS, -129, MType({0x49dc60b2b059a60b, 0xba0ec3b633dd8b09})}, - {Sign::POS, -129, MType({0x2efd17781bb3afec, 0xbcf13343e7d9ec7d})}, - {Sign::POS, -129, MType({0x37eda996244bccb0, 0xbfd7d1dec0a8df6f})}, - {Sign::POS, -129, MType({0x33337789d592e296, 0xc2c2abbb6e5fd56f})}, - {Sign::POS, -129, MType({0x1a18fb8f9f9ef280, 0xc5b1cd44596fa51e})}, - {Sign::POS, -129, MType({0x688ce7c1a75e341a, 0xc8a5431adfb44ca5})}, - {Sign::POS, -129, MType({0x2d7e9307c70c0668, 0xcb9d1a189ab56e76})}, - {Sign::POS, -129, MType({0xef2f3f4f861ad6a9, 0xce995f50af69d861})}, - {Sign::POS, -129, MType({0x7f9d79f51dcc7301, 0xd19a201127d3c645})}, - {Sign::POS, -129, MType({0x7f9d79f51dcc7301, 0xd19a201127d3c645})}, - {Sign::POS, -129, MType({0x5f53bd2e406e66e7, 0xd49f69e456cf1b79})}, - {Sign::POS, -129, MType({0xad88bba7d0cee8e0, 0xd7a94a92466e833a})}, - {Sign::POS, -129, MType({0x96c20cca6efe2ac5, 0xdab7d02231484a92})}, - {Sign::POS, -129, MType({0xf40a666c87842843, 0xddcb08dc0717d85b})}, - {Sign::POS, -129, MType({0x7fe8e1802aba24d6, 0xe0e30349fd1cec80})}, - {Sign::POS, -129, MType({0x7fe8e1802aba24d6, 0xe0e30349fd1cec80})}, - {Sign::POS, -129, MType({0x3eadb651b49ac53a, 0xe3ffce3a2aa64922})}, - {Sign::POS, -129, MType({0x304e1653e71d9973, 0xe72178c0323a1a0f})}, - {Sign::POS, -129, MType({0xe9a767a80d6d97e8, 0xea481236f7d35baf})}, - {Sign::POS, -129, MType({0x4f91cf4b33e42998, 0xed73aa4264b0ade9})}, - {Sign::POS, -129, MType({0xfc66eb6408ff6433, 0xf0a450d139366ca6})}, - {Sign::POS, -129, MType({0xfc66eb6408ff6433, 0xf0a450d139366ca6})}, - {Sign::POS, -129, MType({0xac8d42f78d3e65d3, 0xf3da161eed6b9aaf})}, - {Sign::POS, -129, MType({0x5a470250d40ebe90, 0xf7150ab5a09f27f4})}, - {Sign::POS, -129, MType({0xb780a545a1b54dcf, 0xfa553f7018c966f2})}, - {Sign::POS, -129, MType({0xb780a545a1b54dcf, 0xfa553f7018c966f2})}, - {Sign::POS, -129, MType({0x8f05924d258c14c5, 0xfd9ac57bd244217e})}, - {Sign::POS, -128, MType({0x89d1b09c70c4010a, 0x8072d72d903d588b})}, - {Sign::POS, -128, MType({0x30d58c3f7e2ea1f, 0x821b05f3b01d6774})}, - {Sign::POS, -128, MType({0x30d58c3f7e2ea1f, 0x821b05f3b01d6774})}, - {Sign::POS, -128, MType({0x20f6fafe8fbb68b9, 0x83c5f8299e2b4091})}, - {Sign::POS, -128, MType({0xe21f9f89c1ab80b2, 0x8573b71682a7d21a})}, - {Sign::POS, -128, MType({0xe21f9f89c1ab80b2, 0x8573b71682a7d21a})}, - {Sign::POS, -128, MType({0x1e005d06dbfa8f8, 0x87244c308e670a66})}, - {Sign::POS, -128, MType({0x223111a707b6de2c, 0x88d7c11e3ad53cdc})}, - {Sign::POS, -128, MType({0x223111a707b6de2c, 0x88d7c11e3ad53cdc})}, - {Sign::POS, -128, MType({0x2eb628dba173c82d, 0x8a8e1fb794b09134})}, - {Sign::POS, -128, MType({0xbe2ad19415fe25a5, 0x8c47720791e53313})}, - {Sign::POS, -128, MType({0xbe2ad19415fe25a5, 0x8c47720791e53313})}, - {Sign::POS, -128, MType({0xbddae1ccce247838, 0x8e03c24d73003959})}, - {Sign::POS, -128, MType({0x9b00bf167e95da67, 0x8fc31afe30b2c6de})}, - {Sign::POS, -128, MType({0x9b00bf167e95da67, 0x8fc31afe30b2c6de})}, - {Sign::POS, -128, MType({0x9b92199ed1a4bab1, 0x918586c5f5e4bf01})}, - {Sign::POS, -128, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, - {Sign::POS, -128, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, - {Sign::POS, -128, MType({0xf3cbc416a2418012, 0x9513c36876083695})}, - {Sign::POS, -128, MType({0xbe1188fbc94e2f15, 0x96dfaabd86fa1646})}, - {Sign::POS, -128, MType({0xbe1188fbc94e2f15, 0x96dfaabd86fa1646})}, - {Sign::POS, -128, MType({0x1d2f89321647b358, 0x98aed221a03458b6})}, - {Sign::POS, -128, MType({0x1d2f89321647b358, 0x98aed221a03458b6})}, - {Sign::POS, -128, MType({0xe549f9aaea3cb5e1, 0x9a81456cec642e0f})}, - {Sign::POS, -128, MType({0xa2554b2dd4619e63, 0x9c5710b8cbb73a42})}, - {Sign::POS, -128, MType({0xa2554b2dd4619e63, 0x9c5710b8cbb73a42})}, - {Sign::POS, -128, MType({0x30603d87b6df81ad, 0x9e304061b5fda919})}, - {Sign::POS, -128, MType({0x30603d87b6df81ad, 0x9e304061b5fda919})}, - {Sign::POS, -128, MType({0x67879c5a30cd1242, 0xa00ce1092e5498c3})}, - {Sign::POS, -128, MType({0xb7efae08e597e16, 0xa1ecff97c91e267b})}, - {Sign::POS, -128, MType({0xb7efae08e597e16, 0xa1ecff97c91e267b})}, - {Sign::POS, -128, MType({0x83594fab088c0d65, 0xa3d0a93f45169a4a})}, - {Sign::POS, -128, MType({0x83594fab088c0d65, 0xa3d0a93f45169a4a})}, - {Sign::POS, -128, MType({0xaf6a62a0dec6e073, 0xa5b7eb7cb860fb88})}, - {Sign::POS, -128, MType({0xaf6a62a0dec6e073, 0xa5b7eb7cb860fb88})}, - {Sign::POS, -128, MType({0x49362382a768847a, 0xa7a2d41ad270c9d7})}, - {Sign::POS, -128, MType({0x49362382a768847a, 0xa7a2d41ad270c9d7})}, - {Sign::POS, -128, MType({0x8ba4aea614d05701, 0xa991713433c2b998})}, - {Sign::POS, -128, MType({0x8ba4aea614d05701, 0xa991713433c2b998})}, - {Sign::POS, -128, MType({0x7fe6607ba902ef3c, 0xab83d135dc633301})}, - {Sign::POS, -128, MType({0x7fe6607ba902ef3c, 0xab83d135dc633301})}, - {Sign::POS, -128, MType({0xd60864fd949b4bd3, 0xad7a02e1b24efd31})}, - {Sign::POS, -128, MType({0xd60864fd949b4bd3, 0xad7a02e1b24efd31})}, - {Sign::POS, -128, MType({0x66d235ee63073dd, 0xaf74155120c9011c})}, - {Sign::POS, 0, MType(0)}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -134, 0x8080abac'46f38946'662d417c'ed007a46_u128}, + {Sign::POS, -133, 0x8102b2c4'9ac23a4f'91d082dc'e3ddcd38_u128}, + {Sign::POS, -133, 0xc2492946'4655f45c'da5f3cc0'b3251dbd_u128}, + {Sign::POS, -132, 0x820aec4f'3a222380'b9e3aea6'c444ef07_u128}, + {Sign::POS, -132, 0xa33576a1'6f1f4c64'521016bd'904dc968_u128}, + {Sign::POS, -132, 0xc4a550a4'fd9a19a8'be97660a'23cc540d_u128}, + {Sign::POS, -132, 0xe65b9e6e'ed965c36'e09f5fe2'058d6006_u128}, + {Sign::POS, -131, 0x842cc5ac'f1d03445'1fecdfa8'19b96098_u128}, + {Sign::POS, -131, 0x8cb9de8a'32ab368a'a7c98595'30a45153_u128}, + {Sign::POS, -131, 0x9defad3e'8f73217a'976d3b5b'45f6ca0b_u128}, + {Sign::POS, -131, 0xaf4ad26c'bc8e5be7'0e8b8b88'a14ff0ce_u128}, + {Sign::POS, -131, 0xb8069857'560707a3'6a677b4c'8bec22e1_u128}, + {Sign::POS, -131, 0xc99af2ea'ca4c4570'eaf51f66'692844ba_u128}, + {Sign::POS, -131, 0xdb56446d'6ad8deff'a8112e35'a60e6375_u128}, + {Sign::POS, -131, 0xe442c00d'e2591b47'196ab34c'e0bccd12_u128}, + {Sign::POS, -131, 0xf639cc18'5088fe5d'4066e87f'2c0f7340_u128}, + {Sign::POS, -131, 0xff4489ce'deab2ca6'c17bd40d'8d9291ec_u128}, + {Sign::POS, -130, 0x88bc7411'3f23def1'9c5a0fe3'96f40f1e_u128}, + {Sign::POS, -130, 0x8d515bf1'1fb94f1c'88713268'840cbcc0_u128}, + {Sign::POS, -130, 0x968b0864'3409ceb6'65c0da50'6a088484_u128}, + {Sign::POS, -130, 0x9b2fe580'ac80b17d'411a5b94'4aca8708_u128}, + {Sign::POS, -130, 0xa489ec19'9dab06f2'a9fb6cf0'ecb411b7_u128}, + {Sign::POS, -130, 0xa93f2f25'0dac67d1'cad2fb8d'48054ae0_u128}, + {Sign::POS, -130, 0xb2ba75f4'6099cf8b'2c3c2e77'904afa78_u128}, + {Sign::POS, -130, 0xb780945b'ab55dce4'34c7bc3d'32750fde_u128}, + {Sign::POS, -130, 0xc11e0b2a'8d1e0ddb'9a631e83'0fd30904_u128}, + {Sign::POS, -130, 0xc5f57f59'c7f46155'aa8b6997'a402bf30_u128}, + {Sign::POS, -130, 0xcad2d6e7'b80bf914'2c507fb7'a3d0bf6a_u128}, + {Sign::POS, -130, 0xd49f69e4'56cf1b79'5f53bd2e'406e66e7_u128}, + {Sign::POS, -130, 0xd98ec2ba'de71e539'58a98f2a'd65bee9b_u128}, + {Sign::POS, -130, 0xde8439c1'dec56877'4d57da94'5b5d0aaa_u128}, + {Sign::POS, -130, 0xe881bf93'2af3dac0'c524848e'3443e040_u128}, + {Sign::POS, -130, 0xed89ed86'a44a01aa'11d49f96'cb88317b_u128}, + {Sign::POS, -130, 0xf29877ff'38809091'3b020fa1'820c9492_u128}, + {Sign::POS, -130, 0xf7ad6f26'e7ff2ef7'54d2238f'75f969b1_u128}, + {Sign::POS, -130, 0xfcc8e365'9d9bcbec'ca0cdf30'1431b60f_u128}, + {Sign::POS, -129, 0x8389c302'6ac3139b'62dda9d2'270fa1f4_u128}, + {Sign::POS, -129, 0x86216b3b'0b17188b'163ceae8'8f720f1e_u128}, + {Sign::POS, -129, 0x88bc7411'3f23def1'9c5a0fe3'96f40f1e_u128}, + {Sign::POS, -129, 0x8b5ae65d'67db9acd'f7a51681'26a58b9a_u128}, + {Sign::POS, -129, 0x8dfccb1a'd35ca6ed'5147bdb6'ddcaf59c_u128}, + {Sign::POS, -129, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, + {Sign::POS, -129, 0x95f783e6'e49a9cfa'4a5004f3'ef063313_u128}, + {Sign::POS, -129, 0x98a78f0e'9ae71d85'2cdec347'84707839_u128}, + {Sign::POS, -129, 0x9b5b3bb5'f088b766'd878bbe3'd392be25_u128}, + {Sign::POS, -129, 0x9e1293b9'998c1daa'5b035eae'273a855f_u128}, + {Sign::POS, -129, 0xa0cda11e'af46390d'bb243827'3918db7e_u128}, + {Sign::POS, -129, 0xa38c6e13'8e20d831'f698298a'dddd7f32_u128}, + {Sign::POS, -129, 0xa64f04f0'b961df76'e4f5275c'2d15c21f_u128}, + {Sign::POS, -129, 0xa9157039'c51ebe70'8164c759'686a2209_u128}, + {Sign::POS, -129, 0xabdfba9e'468fd6f6'f72ea077'49ce6bd3_u128}, + {Sign::POS, -129, 0xaeadeefa'caf97d35'7dd6e688'ebb13b03_u128}, + {Sign::POS, -129, 0xb1801859'd56249dc'18ce51ff'f99479cd_u128}, + {Sign::POS, -129, 0xb45641f4'e350a0d3'2756eba0'0bc33978_u128}, + {Sign::POS, -129, 0xb7307735'78cb90b2'be1116c3'466beb6d_u128}, + {Sign::POS, -129, 0xba0ec3b6'33dd8b09'49dc60b2'b059a60b_u128}, + {Sign::POS, -129, 0xbcf13343'e7d9ec7d'2efd1778'1bb3afec_u128}, + {Sign::POS, -129, 0xbfd7d1de'c0a8df6f'37eda996'244bccb0_u128}, + {Sign::POS, -129, 0xc2c2abbb'6e5fd56f'33337789'd592e296_u128}, + {Sign::POS, -129, 0xc5b1cd44'596fa51e'1a18fb8f'9f9ef280_u128}, + {Sign::POS, -129, 0xc8a5431a'dfb44ca5'688ce7c1'a75e341a_u128}, + {Sign::POS, -129, 0xcb9d1a18'9ab56e76'2d7e9307'c70c0668_u128}, + {Sign::POS, -129, 0xce995f50'af69d861'ef2f3f4f'861ad6a9_u128}, + {Sign::POS, -129, 0xd19a2011'27d3c645'7f9d79f5'1dcc7301_u128}, + {Sign::POS, -129, 0xd19a2011'27d3c645'7f9d79f5'1dcc7301_u128}, + {Sign::POS, -129, 0xd49f69e4'56cf1b79'5f53bd2e'406e66e7_u128}, + {Sign::POS, -129, 0xd7a94a92'466e833a'ad88bba7'd0cee8e0_u128}, + {Sign::POS, -129, 0xdab7d022'31484a92'96c20cca'6efe2ac5_u128}, + {Sign::POS, -129, 0xddcb08dc'0717d85b'f40a666c'87842843_u128}, + {Sign::POS, -129, 0xe0e30349'fd1cec80'7fe8e180'2aba24d6_u128}, + {Sign::POS, -129, 0xe0e30349'fd1cec80'7fe8e180'2aba24d6_u128}, + {Sign::POS, -129, 0xe3ffce3a'2aa64922'3eadb651'b49ac53a_u128}, + {Sign::POS, -129, 0xe72178c0'323a1a0f'304e1653'e71d9973_u128}, + {Sign::POS, -129, 0xea481236'f7d35baf'e9a767a8'0d6d97e8_u128}, + {Sign::POS, -129, 0xed73aa42'64b0ade9'4f91cf4b'33e42998_u128}, + {Sign::POS, -129, 0xf0a450d1'39366ca6'fc66eb64'08ff6433_u128}, + {Sign::POS, -129, 0xf0a450d1'39366ca6'fc66eb64'08ff6433_u128}, + {Sign::POS, -129, 0xf3da161e'ed6b9aaf'ac8d42f7'8d3e65d3_u128}, + {Sign::POS, -129, 0xf7150ab5'a09f27f4'5a470250'd40ebe90_u128}, + {Sign::POS, -129, 0xfa553f70'18c966f2'b780a545'a1b54dcf_u128}, + {Sign::POS, -129, 0xfa553f70'18c966f2'b780a545'a1b54dcf_u128}, + {Sign::POS, -129, 0xfd9ac57b'd244217e'8f05924d'258c14c5_u128}, + {Sign::POS, -128, 0x8072d72d'903d588b'89d1b09c'70c4010a_u128}, + {Sign::POS, -128, 0x821b05f3'b01d6774'030d58c3'f7e2ea1f_u128}, + {Sign::POS, -128, 0x821b05f3'b01d6774'030d58c3'f7e2ea1f_u128}, + {Sign::POS, -128, 0x83c5f829'9e2b4091'20f6fafe'8fbb68b9_u128}, + {Sign::POS, -128, 0x8573b716'82a7d21a'e21f9f89'c1ab80b2_u128}, + {Sign::POS, -128, 0x8573b716'82a7d21a'e21f9f89'c1ab80b2_u128}, + {Sign::POS, -128, 0x87244c30'8e670a66'01e005d0'6dbfa8f8_u128}, + {Sign::POS, -128, 0x88d7c11e'3ad53cdc'223111a7'07b6de2c_u128}, + {Sign::POS, -128, 0x88d7c11e'3ad53cdc'223111a7'07b6de2c_u128}, + {Sign::POS, -128, 0x8a8e1fb7'94b09134'2eb628db'a173c82d_u128}, + {Sign::POS, -128, 0x8c477207'91e53313'be2ad194'15fe25a5_u128}, + {Sign::POS, -128, 0x8c477207'91e53313'be2ad194'15fe25a5_u128}, + {Sign::POS, -128, 0x8e03c24d'73003959'bddae1cc'ce247838_u128}, + {Sign::POS, -128, 0x8fc31afe'30b2c6de'9b00bf16'7e95da67_u128}, + {Sign::POS, -128, 0x8fc31afe'30b2c6de'9b00bf16'7e95da67_u128}, + {Sign::POS, -128, 0x918586c5'f5e4bf01'9b92199e'd1a4bab1_u128}, + {Sign::POS, -128, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, + {Sign::POS, -128, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, + {Sign::POS, -128, 0x9513c368'76083695'f3cbc416'a2418012_u128}, + {Sign::POS, -128, 0x96dfaabd'86fa1646'be1188fb'c94e2f15_u128}, + {Sign::POS, -128, 0x96dfaabd'86fa1646'be1188fb'c94e2f15_u128}, + {Sign::POS, -128, 0x98aed221'a03458b6'1d2f8932'1647b358_u128}, + {Sign::POS, -128, 0x98aed221'a03458b6'1d2f8932'1647b358_u128}, + {Sign::POS, -128, 0x9a81456c'ec642e0f'e549f9aa'ea3cb5e1_u128}, + {Sign::POS, -128, 0x9c5710b8'cbb73a42'a2554b2d'd4619e63_u128}, + {Sign::POS, -128, 0x9c5710b8'cbb73a42'a2554b2d'd4619e63_u128}, + {Sign::POS, -128, 0x9e304061'b5fda919'30603d87'b6df81ad_u128}, + {Sign::POS, -128, 0x9e304061'b5fda919'30603d87'b6df81ad_u128}, + {Sign::POS, -128, 0xa00ce109'2e5498c3'67879c5a'30cd1242_u128}, + {Sign::POS, -128, 0xa1ecff97'c91e267b'0b7efae0'8e597e16_u128}, + {Sign::POS, -128, 0xa1ecff97'c91e267b'0b7efae0'8e597e16_u128}, + {Sign::POS, -128, 0xa3d0a93f'45169a4a'83594fab'088c0d65_u128}, + {Sign::POS, -128, 0xa3d0a93f'45169a4a'83594fab'088c0d65_u128}, + {Sign::POS, -128, 0xa5b7eb7c'b860fb88'af6a62a0'dec6e073_u128}, + {Sign::POS, -128, 0xa5b7eb7c'b860fb88'af6a62a0'dec6e073_u128}, + {Sign::POS, -128, 0xa7a2d41a'd270c9d7'49362382'a768847a_u128}, + {Sign::POS, -128, 0xa7a2d41a'd270c9d7'49362382'a768847a_u128}, + {Sign::POS, -128, 0xa9917134'33c2b998'8ba4aea6'14d05701_u128}, + {Sign::POS, -128, 0xa9917134'33c2b998'8ba4aea6'14d05701_u128}, + {Sign::POS, -128, 0xab83d135'dc633301'7fe6607b'a902ef3c_u128}, + {Sign::POS, -128, 0xab83d135'dc633301'7fe6607b'a902ef3c_u128}, + {Sign::POS, -128, 0xad7a02e1'b24efd31'd60864fd'949b4bd3_u128}, + {Sign::POS, -128, 0xad7a02e1'b24efd31'd60864fd'949b4bd3_u128}, + {Sign::POS, -128, 0xaf741551'20c9011c'066d235e'e63073dd_u128}, + {Sign::POS, 0, 0_u128}, }, // -log(r) for the second step, generated by SageMath with: // @@ -185,202 +188,202 @@ alignas(64) const LogRR LOG_TABLE = { // r = 2^-16 * round( 2^16 / (1 + i*2^(-14)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if s == -1 else "{Sign::NEG,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // format_hex(m), "},"); /* .step_2 = */ { - {Sign::NEG, -135, MType({0xa1c6f3fc242ef8d0, 0x803faacac419abf2})}, - {Sign::NEG, -136, MType({0xa225ebc02e6d9dd4, 0xfc834da16f0d9f57})}, - {Sign::NEG, -136, MType({0xc33f6ad340ae18a9, 0xf88735ccc7433381})}, - {Sign::NEG, -136, MType({0x70b2a4d38a242244, 0xf48b0e171249b6bc})}, - {Sign::NEG, -136, MType({0x1d54819048b811b0, 0xf08ed67fd190e280})}, - {Sign::NEG, -136, MType({0xaee5983701d2a02b, 0xec928f0686828706})}, - {Sign::NEG, -136, MType({0x40abb8ab72afa2d2, 0xe89637aab2828aed})}, - {Sign::NEG, -136, MType({0xdeb547a0d4a26ef9, 0xe499d06bd6eeead5})}, - {Sign::NEG, -136, MType({0x39c5bdfbcf6087a0, 0xe09d5949751fb909})}, - {Sign::NEG, -136, MType({0x53ea9bf152de635f, 0xdca0d2430e671d18})}, - {Sign::NEG, -136, MType({0x25b820436f5f4352, 0xd8a43b582411537e})}, - {Sign::NEG, -136, MType({0x3c2d13ea1d0be058, 0xd4a794883764ad41})}, - {Sign::NEG, -136, MType({0x4f3cfa62bcb3ce3a, 0xd0aaddd2c9a18f95})}, - {Sign::NEG, -136, MType({0xd0fff6cdf14a86c7, 0xccae17375c02737c})}, - {Sign::NEG, -136, MType({0x7587b5f0453ac3d2, 0xc8b140b56fbbe56a})}, - {Sign::NEG, -136, MType({0xb358ad16dfd0d085, 0xc4b45a4c85fc84e2})}, - {Sign::NEG, -136, MType({0x3c86fdce5dbe7314, 0xc0b763fc1fed041d})}, - {Sign::NEG, -136, MType({0x70764e46ac18a96d, 0xbcba5dc3beb027a6})}, - {Sign::NEG, -136, MType({0xc63be62b8f285882, 0xb8bd47a2e362c600})}, - {Sign::NEG, -136, MType({0x72e7b5a386e5e31b, 0xb3c0d59a244325a4})}, - {Sign::NEG, -136, MType({0xc3ea2cd93f316b34, 0xafc39bac66434f27})}, - {Sign::NEG, -136, MType({0x1dfb11a7cc892843, 0xabc651d491a7b438})}, - {Sign::NEG, -136, MType({0xfc679a28e9d9f212, 0xa7c8f8122773f38d})}, - {Sign::NEG, -136, MType({0xe7bc977eeec42254, 0xa3cb8e64a8a5bbe6})}, - {Sign::NEG, -136, MType({0xb20f215bd3b58c61, 0x9fce14cb9634cba6})}, - {Sign::NEG, -136, MType({0xabe2862508d67a98, 0x9bd08b467112f078})}, - {Sign::NEG, -136, MType({0xd1aacedcefe9d377, 0x97d2f1d4ba2c06f0})}, - {Sign::NEG, -136, MType({0xf1eb25e77d05f58d, 0x93d54875f265fa2c})}, - {Sign::NEG, -136, MType({0xcbef6fac33691e95, 0x8fd78f299aa0c375})}, - {Sign::NEG, -136, MType({0x2720640462a0f8ad, 0x8bd9c5ef33b669e0})}, - {Sign::NEG, -136, MType({0xe2f1775134c8da75, 0x87dbecc63e7b01ed})}, - {Sign::NEG, -136, MType({0xff67e201c8c50d67, 0x83de03ae3bbcad2e})}, - {Sign::NEG, -137, MType({0x3c742a7c76356396, 0xffc0154d588733c5})}, - {Sign::NEG, -137, MType({0xf90dd6b24aa686ec, 0xf7c4035e21a4052f})}, - {Sign::NEG, -137, MType({0xca47c52b7d7ffce2, 0xefc7d18dd4485b9e})}, - {Sign::NEG, -137, MType({0x3703617ad3d8311f, 0xe7cb7fdb71e0db36})}, - {Sign::NEG, -137, MType({0x7e4cfbd830393b88, 0xdfcf0e45fbce3e80})}, - {Sign::NEG, -137, MType({0x4f7a29cf0fc2c38e, 0xd7d27ccc736555af})}, - {Sign::NEG, -137, MType({0x7370ae83f9e72748, 0xcfd5cb6dd9ef05dd})}, - {Sign::NEG, -137, MType({0x671486eb4cd76f65, 0xc7d8fa2930a84850})}, - {Sign::NEG, -137, MType({0xe6dbb624f9739782, 0xbfdc08fd78c229b9})}, - {Sign::NEG, -137, MType({0x6b866e09e57d9079, 0xb7def7e9b361c979})}, - {Sign::NEG, -137, MType({0x97fa2fd0c9dc723e, 0xafe1c6ece1a058dd})}, - {Sign::NEG, -137, MType({0x983e80897cf1e60f, 0xa7e47606048b1a65})}, - {Sign::NEG, -137, MType({0x7199cd06ae5d39b3, 0x9fe705341d236102})}, - {Sign::NEG, -137, MType({0x43cd18a72a051a96, 0x97e974762c5e8f58})}, - {Sign::NEG, -137, MType({0x7b6d1248c3e1fd40, 0x8febc3cb332616ff})}, - {Sign::NEG, -137, MType({0xf5572a8814c703af, 0x87edf332325777c5})}, - {Sign::NEG, -138, MType({0x26828c92649a3a39, 0xffe0055455887de0})}, - {Sign::NEG, -138, MType({0x82c550bd1216d82a, 0xefe3e4643a640cf3})}, - {Sign::NEG, -138, MType({0xda6959f7f0e01bf0, 0xdfe7839214b4e8ae})}, - {Sign::NEG, -138, MType({0xda93e2fa85a8f214, 0xcfeae2dbe5d6736d})}, - {Sign::NEG, -138, MType({0xb47505bfa5a03b06, 0xbfee023faf0c2480})}, - {Sign::NEG, -138, MType({0xb1475a5180a43520, 0xaff0e1bb718186ad})}, - {Sign::NEG, -138, MType({0xa8740b91c95df537, 0x9ff3814d2e4a36b2})}, - {Sign::NEG, -138, MType({0x57d895d35921b59c, 0x8ff5e0f2e661e1c6})}, - {Sign::NEG, -139, MType({0x3c56c598c659c2a3, 0xfff0015535588833})}, - {Sign::NEG, -139, MType({0x2ef8ec33ed9d782a, 0xdff3c0e497ea4eb1})}, - {Sign::NEG, -139, MType({0x379eba7e6465ff63, 0xbff7008ff5e0c257})}, - {Sign::NEG, -139, MType({0x3f972b783fcab757, 0x9ff9c0535073a370})}, - {Sign::NEG, -140, MType({0xde026e271ee0549d, 0xfff8005551558885})}, - {Sign::NEG, -140, MType({0xeceb47ea01f6c632, 0xbffb8023febc0c25})}, - {Sign::NEG, -141, MType({0x7333c57857e1ed52, 0xfffc001554d55888})}, - {Sign::NEG, -142, MType({0x87dde026fa704374, 0xfffe000555455588})}, - {Sign::NEG, 0, MType({0x0, 0x0})}, - {Sign::POS, -141, MType({0x44999abe2fe2cc65, 0x80010002aab2aac4})}, - {Sign::POS, -140, MType({0x4eef381581464ccb, 0x8002000aaaeaac44})}, - {Sign::POS, -140, MType({0xdfeb485085f6f454, 0xc004802401440c26})}, - {Sign::POS, -139, MType({0x99abe3be3a1c6e93, 0x8004002aacaac445})}, - {Sign::POS, -139, MType({0x6bc1e20eac8448b4, 0xa00640535a37a37a})}, - {Sign::POS, -139, MType({0x979eedc064c242fd, 0xc00900900a20c275})}, - {Sign::POS, -139, MType({0xc72446cc1bf728bd, 0xe00c40e4bd6e4efd})}, - {Sign::POS, -138, MType({0xf381b821bbb569e5, 0x800800aabaac446e})}, - {Sign::POS, -138, MType({0x569b26aaa485ea5c, 0x900a20f319a3e273})}, - {Sign::POS, -138, MType({0x2dcf56c83c80b028, 0xa00c814d7c6a37f8})}, - {Sign::POS, -138, MType({0x5f69768284463b9b, 0xb00f21bbe3e388ee})}, - {Sign::POS, -138, MType({0xb48ea6c05e2773a1, 0xc0120240510c284c})}, - {Sign::POS, -138, MType({0x14d9d76196d8043a, 0xd01522dcc4f87991})}, - {Sign::POS, -138, MType({0xe016a611a4415d72, 0xe018839340d4f241})}, - {Sign::POS, -138, MType({0x661e135f49a47c40, 0xf01c2465c5e61b6f})}, - {Sign::POS, -137, MType({0xbe6bf0fa435e8383, 0x801002ab2ac4499a})}, - {Sign::POS, -137, MType({0x9a31ba0cbc030353, 0x881213337898871e})}, - {Sign::POS, -137, MType({0x54b57dfe0c4c840f, 0x901443cccd362c9f})}, - {Sign::POS, -137, MType({0x7ad1e9c315328f7e, 0x98169478296fad41})}, - {Sign::POS, -137, MType({0x1f3f686cf3d6be22, 0xa01905368e2389b3})}, - {Sign::POS, -137, MType({0xf105b66ec4703ede, 0xa81b9608fc3c50ec})}, - {Sign::POS, -137, MType({0x610848c68df4d233, 0xb01e46f074b0a0f3})}, - {Sign::POS, -137, MType({0xd6aef30cd312169a, 0xb82117edf8832797})}, - {Sign::POS, -137, MType({0xf3ac379608053d9d, 0xc024090288c2a339})}, - {Sign::POS, -137, MType({0xe6e2acf8f4d4c24a, 0xc8271a2f2689e388})}, - {Sign::POS, -137, MType({0xce6ae474d860359f, 0xd02a4b74d2ffca44})}, - {Sign::POS, -137, MType({0x28bb3cd9f2a65fb5, 0xd82d9cd48f574c00})}, - {Sign::POS, -137, MType({0x54f30dbef38a8066, 0xe0310e4f5ccf70e1})}, - {Sign::POS, -137, MType({0x224a96f5a7471c46, 0xe8349fe63cb35564})}, - {Sign::POS, -137, MType({0x6ea920591aa02e1b, 0xf038519a305a2b1b})}, - {Sign::POS, -137, MType({0xd462b63756c87e80, 0xf83c236c39273972})}, - {Sign::POS, -136, MType({0x338f77605fe77f2a, 0x80200aaeac44ef38})}, - {Sign::POS, -136, MType({0x3ff51287882500ed, 0x842213b747fec7bb})}, - {Sign::POS, -136, MType({0xcc394b3ef0ebeb12, 0x88242cd07084ed02})}, - {Sign::POS, -136, MType({0x1ab9679b55f78a6b, 0x8c2655faa6a1323f})}, - {Sign::POS, -136, MType({0x7025697d10af0436, 0x90288f366b237771})}, - {Sign::POS, -136, MType({0x17e4b7ac6c600cb4, 0x942ad8843ee1a9cd})}, - {Sign::POS, -136, MType({0x7013925a9a8da7f3, 0x982d31e4a2b7c418})}, - {Sign::POS, -136, MType({0xfd1a09c848e3950e, 0x9c2f9b581787cf0d})}, - {Sign::POS, -136, MType({0x84dd2de6e3d90a37, 0xa03214df1e39e1bd})}, - {Sign::POS, -136, MType({0x318b2ddd9d0a33b4, 0xa4349e7a37bc21ed})}, - {Sign::POS, -136, MType({0xbc031e6f5acfd4a8, 0xa8373829e502c47a})}, - {Sign::POS, -136, MType({0x9dd91e52c79fd070, 0xac39e1eea7080dbc})}, - {Sign::POS, -136, MType({0x4af78fa1cb48a12d, 0xb03c9bc8fecc51e3})}, - {Sign::POS, -136, MType({0x72de1d99ce252efd, 0xb43f65b96d55f55a})}, - {Sign::POS, -136, MType({0xefb1dbe721934877, 0xb74187bc8ccffa84})}, - {Sign::POS, -136, MType({0xb4b080f230c87598, 0xbb446dd4d9bca499})}, - {Sign::POS, -136, MType({0xda6a7cd19c7fa4f2, 0xbf476404a05f88f2})}, - {Sign::POS, -136, MType({0xdf00e3783b50ecfb, 0xc34a6a4c61d5cc3c})}, - {Sign::POS, -136, MType({0xda2e5e02ab4e183c, 0xc74d80ac9f42a52d})}, - {Sign::POS, -136, MType({0xea5f6ee99d30c626, 0xcb50a725d9cf5ce6})}, - {Sign::POS, -136, MType({0xa96d5956531d7d8b, 0xcf53ddb892ab4f55})}, - {Sign::POS, -136, MType({0xa8fc636eb36afa75, 0xd35724654b0beb95})}, - {Sign::POS, -136, MType({0xf67e2b827bfc4421, 0xd75a7b2c842cb451})}, - {Sign::POS, -136, MType({0xa6d8c817516303e6, 0xdb5de20ebf4f4026})}, - {Sign::POS, -136, MType({0x69b36ae5962e85f4, 0xdf61590c7dbb3a02})}, - {Sign::POS, -136, MType({0x24693eec2a831cc3, 0xe364e02640be6188})}, - {Sign::POS, -136, MType({0x94a339d56a55ab4a, 0xe768775c89ac8b70})}, - {Sign::POS, -136, MType({0xfa9998fbf9703bf4, 0xeb6c1eafd9dfa1eb})}, - {Sign::POS, -136, MType({0xcafdc27227b71eaa, 0xef6fd620b2b7a503})}, - {Sign::POS, -136, MType({0x688d4282f6026aa3, 0xf3739daf959aaafc})}, - {Sign::POS, -136, MType({0xe54e9e3804464cdd, 0xf777755d03f4e0b6})}, - {Sign::POS, -136, MType({0xcb78b383f4b59dce, 0xfb7b5d297f388a12})}, - {Sign::POS, -136, MType({0xee055fc515062c04, 0xff7f551588de024f})}, - {Sign::POS, -135, MType({0x207812b43382acdd, 0x81c1ae90d131de38})}, - {Sign::POS, -135, MType({0xdc90c4c4b61f3a87, 0x83c3baa726a721cc})}, - {Sign::POS, -135, MType({0x1a03f13fb2c978b1, 0x85c5cece05941dbc})}, - {Sign::POS, -135, MType({0xb36f282e83a7dc36, 0x87c7eb05aec1304f})}, - {Sign::POS, -135, MType({0x6ad14c3dfa414391, 0x89ca0f4e62f9c476})}, - {Sign::POS, -135, MType({0xe8dd4ea0d48b88e5, 0x8bcc3ba8630c51f4})}, - {Sign::POS, -135, MType({0xc02515afe8caeb90, 0x8dce7013efca5d96})}, - {Sign::POS, -135, MType({0x741ceaf3349f3cf1, 0x8fd0ac914a08795f})}, - {Sign::POS, -135, MType({0x83f7cd4929d2c28c, 0x91d2f120b29e44bb})}, - {Sign::POS, -135, MType({0x795d03ebc2fd03fa, 0x93d53dc26a666cb1})}, - {Sign::POS, -135, MType({0xfaf74f1d1ad16acc, 0x95d79276b23eac12})}, - {Sign::POS, -135, MType({0xe2de134f72fee429, 0x97d9ef3dcb07cbad})}, - {Sign::POS, -135, MType({0x58d8dba6cadac5d5, 0x99dc5417f5a5a27d})}, - {Sign::POS, -135, MType({0xf07d90bc5aae40a4, 0x9bdec10572ff15da})}, - {Sign::POS, -135, MType({0x1deaf79d9fc40374, 0x9d6098046659ea6b})}, - {Sign::POS, -135, MType({0x7ba63e6769b81999, 0x9f63131450b07988})}, - {Sign::POS, -135, MType({0x59ebfc9335094e59, 0xa1659638404d5f92})}, - {Sign::POS, -135, MType({0x16aae012b5026f71, 0xa36821707622f97a})}, - {Sign::POS, -135, MType({0xff5d4f2c0e4b9cae, 0xa56ab4bd3326b378})}, - {Sign::POS, -135, MType({0x855838b5119dcb28, 0xa76d501eb8510941})}, - {Sign::POS, -135, MType({0x75f70cbbe9cf1603, 0xa96ff395469d8630})}, - {Sign::POS, -135, MType({0x36a53ad4d5541cc9, 0xab729f211f0ac57e})}, - {Sign::POS, -135, MType({0x4c5934ec32d20d9, 0xad7552c2829a7270})}, - {Sign::POS, -135, MType({0x3977e89aec59bfa2, 0xaf780e79b2514889})}, - {Sign::POS, -135, MType({0x913d4e3dc55c3e6e, 0xb17ad246ef3713bc})}, - {Sign::POS, -135, MType({0x777b52a9e70d8bcc, 0xb37d9e2a7a56b09d})}, - {Sign::POS, -135, MType({0x55de916fd30591de, 0xb580722494be0c91})}, - {Sign::POS, -135, MType({0xe79cfb37be2861e4, 0xb7834e357f7e2600})}, - {Sign::POS, -135, MType({0x90983104d3805389, 0xb986325d7bab0c89})}, - {Sign::POS, -135, MType({0xb860504baa6f984d, 0xbb891e9cca5be12e})}, - {Sign::POS, -135, MType({0x29178d6ff5712b96, 0xbd8c12f3acaad68b})}, - {Sign::POS, -135, MType({0x7236fa47ba19a198, 0xbf8f0f6263b53102})}, - {Sign::POS, -135, MType({0x4f34d64cafcc50e3, 0xc19213e9309b46f2})}, - {Sign::POS, -135, MType({0x120cc62eb0a8db3e, 0xc3952088548080e4})}, - {Sign::POS, -135, MType({0x11aa5084779060e3, 0xc5983540108b59be})}, - {Sign::POS, -135, MType({0x1c35fd6236c8dcf1, 0xc79b5210a5e55ef5})}, - {Sign::POS, -135, MType({0xed4576a7e4b878fe, 0xc99e76fa55bb30bd})}, - {Sign::POS, -135, MType({0x6caf4bb8fd2c1131, 0xcb20d7fa3a336081})}, - {Sign::POS, -135, MType({0x3f24a6cbb09c654f, 0xcd240b10753e78de})}, - {Sign::POS, -135, MType({0x78bc003bb81e40f3, 0xcf2746407e0ff09f})}, - {Sign::POS, -135, MType({0x56647301edfd8e8b, 0xd12a898a95dff002})}, - {Sign::POS, -135, MType({0x28fe1c4d04ca4ed9, 0xd32dd4eefde9b2ef})}, - {Sign::POS, -135, MType({0xe1ea9ea6cbf57379, 0xd531286df76b892a})}, - {Sign::POS, -135, MType({0xa3832028141a5cc2, 0xd7348407c3a6d688})}, - {Sign::POS, -135, MType({0x557421dd379d3ead, 0xd937e7bca3e0131b})}, - {Sign::POS, -135, MType({0x3cff8e87a99bcaf0, 0xdb3b538cd95ecb67})}, - {Sign::POS, -135, MType({0x99255ef34bd0801f, 0xdd3ec778a56da093})}, - {Sign::POS, -135, MType({0x42b33220abfa15cd, 0xdf424380495a489c})}, - {Sign::POS, -135, MType({0x503b378faa97dbc0, 0xe145c7a406758e83})}, - {Sign::POS, -135, MType({0xbdf2ca006f59b544, 0xe34953e41e135282})}, - {Sign::POS, -135, MType({0x1979190af37ed16f, 0xe54ce840d18a8a3e})}, - {Sign::POS, -135, MType({0x31863ff7cf898c9c, 0xe75084ba623540f4})}, - {Sign::POS, -135, MType({0xc983284f60293647, 0xe9542951117097b0})}, - {Sign::POS, -135, MType({0x510a969ebe03f804, 0xeb57d605209cc57e})}, - {Sign::POS, -135, MType({0x9f53bffc6d23fe30, 0xed5b8ad6d11d1797})}, - {Sign::POS, -135, MType({0xb286c6e113337886, 0xef5f47c66457f199})}, - {Sign::POS, -135, MType({0xb6ed80852ae6fd63, 0xf0e21acdd6e7d412})}, - {Sign::POS, -135, MType({0xdf437fb0f616082d, 0xf2e5e5f25450c5a2})}, - {Sign::POS, -135, MType({0xf237cff1acb306b3, 0xf4e9b935685dbe0b})}, - {Sign::POS, -135, MType({0x52dbfafb4121a092, 0xf6ed94975480b696})}, - {Sign::POS, -135, MType({0xd81648249cece4c, 0xf8f178185a2ebfd9})}, - {Sign::POS, -135, MType({0xad95e6b0b96903d3, 0xfaf563b8bae001eb})}, - {Sign::POS, -135, MType({0x176cd56887ac7fe9, 0xfcf95778b80fbc98})}, - {Sign::POS, -135, MType({0x65f4c7397f1f478d, 0xfefd5358933c478c})}, + {Sign::NEG, -135, 0x803faaca'c419abf2'a1c6f3fc'242ef8d0_u128}, + {Sign::NEG, -136, 0xfc834da1'6f0d9f57'a225ebc0'2e6d9dd4_u128}, + {Sign::NEG, -136, 0xf88735cc'c7433381'c33f6ad3'40ae18a9_u128}, + {Sign::NEG, -136, 0xf48b0e17'1249b6bc'70b2a4d3'8a242244_u128}, + {Sign::NEG, -136, 0xf08ed67f'd190e280'1d548190'48b811b0_u128}, + {Sign::NEG, -136, 0xec928f06'86828706'aee59837'01d2a02b_u128}, + {Sign::NEG, -136, 0xe89637aa'b2828aed'40abb8ab'72afa2d2_u128}, + {Sign::NEG, -136, 0xe499d06b'd6eeead5'deb547a0'd4a26ef9_u128}, + {Sign::NEG, -136, 0xe09d5949'751fb909'39c5bdfb'cf6087a0_u128}, + {Sign::NEG, -136, 0xdca0d243'0e671d18'53ea9bf1'52de635f_u128}, + {Sign::NEG, -136, 0xd8a43b58'2411537e'25b82043'6f5f4352_u128}, + {Sign::NEG, -136, 0xd4a79488'3764ad41'3c2d13ea'1d0be058_u128}, + {Sign::NEG, -136, 0xd0aaddd2'c9a18f95'4f3cfa62'bcb3ce3a_u128}, + {Sign::NEG, -136, 0xccae1737'5c02737c'd0fff6cd'f14a86c7_u128}, + {Sign::NEG, -136, 0xc8b140b5'6fbbe56a'7587b5f0'453ac3d2_u128}, + {Sign::NEG, -136, 0xc4b45a4c'85fc84e2'b358ad16'dfd0d085_u128}, + {Sign::NEG, -136, 0xc0b763fc'1fed041d'3c86fdce'5dbe7314_u128}, + {Sign::NEG, -136, 0xbcba5dc3'beb027a6'70764e46'ac18a96d_u128}, + {Sign::NEG, -136, 0xb8bd47a2'e362c600'c63be62b'8f285882_u128}, + {Sign::NEG, -136, 0xb3c0d59a'244325a4'72e7b5a3'86e5e31b_u128}, + {Sign::NEG, -136, 0xafc39bac'66434f27'c3ea2cd9'3f316b34_u128}, + {Sign::NEG, -136, 0xabc651d4'91a7b438'1dfb11a7'cc892843_u128}, + {Sign::NEG, -136, 0xa7c8f812'2773f38d'fc679a28'e9d9f212_u128}, + {Sign::NEG, -136, 0xa3cb8e64'a8a5bbe6'e7bc977e'eec42254_u128}, + {Sign::NEG, -136, 0x9fce14cb'9634cba6'b20f215b'd3b58c61_u128}, + {Sign::NEG, -136, 0x9bd08b46'7112f078'abe28625'08d67a98_u128}, + {Sign::NEG, -136, 0x97d2f1d4'ba2c06f0'd1aacedc'efe9d377_u128}, + {Sign::NEG, -136, 0x93d54875'f265fa2c'f1eb25e7'7d05f58d_u128}, + {Sign::NEG, -136, 0x8fd78f29'9aa0c375'cbef6fac'33691e95_u128}, + {Sign::NEG, -136, 0x8bd9c5ef'33b669e0'27206404'62a0f8ad_u128}, + {Sign::NEG, -136, 0x87dbecc6'3e7b01ed'e2f17751'34c8da75_u128}, + {Sign::NEG, -136, 0x83de03ae'3bbcad2e'ff67e201'c8c50d67_u128}, + {Sign::NEG, -137, 0xffc0154d'588733c5'3c742a7c'76356396_u128}, + {Sign::NEG, -137, 0xf7c4035e'21a4052f'f90dd6b2'4aa686ec_u128}, + {Sign::NEG, -137, 0xefc7d18d'd4485b9e'ca47c52b'7d7ffce2_u128}, + {Sign::NEG, -137, 0xe7cb7fdb'71e0db36'3703617a'd3d8311f_u128}, + {Sign::NEG, -137, 0xdfcf0e45'fbce3e80'7e4cfbd8'30393b88_u128}, + {Sign::NEG, -137, 0xd7d27ccc'736555af'4f7a29cf'0fc2c38e_u128}, + {Sign::NEG, -137, 0xcfd5cb6d'd9ef05dd'7370ae83'f9e72748_u128}, + {Sign::NEG, -137, 0xc7d8fa29'30a84850'671486eb'4cd76f65_u128}, + {Sign::NEG, -137, 0xbfdc08fd'78c229b9'e6dbb624'f9739782_u128}, + {Sign::NEG, -137, 0xb7def7e9'b361c979'6b866e09'e57d9079_u128}, + {Sign::NEG, -137, 0xafe1c6ec'e1a058dd'97fa2fd0'c9dc723e_u128}, + {Sign::NEG, -137, 0xa7e47606'048b1a65'983e8089'7cf1e60f_u128}, + {Sign::NEG, -137, 0x9fe70534'1d236102'7199cd06'ae5d39b3_u128}, + {Sign::NEG, -137, 0x97e97476'2c5e8f58'43cd18a7'2a051a96_u128}, + {Sign::NEG, -137, 0x8febc3cb'332616ff'7b6d1248'c3e1fd40_u128}, + {Sign::NEG, -137, 0x87edf332'325777c5'f5572a88'14c703af_u128}, + {Sign::NEG, -138, 0xffe00554'55887de0'26828c92'649a3a39_u128}, + {Sign::NEG, -138, 0xefe3e464'3a640cf3'82c550bd'1216d82a_u128}, + {Sign::NEG, -138, 0xdfe78392'14b4e8ae'da6959f7'f0e01bf0_u128}, + {Sign::NEG, -138, 0xcfeae2db'e5d6736d'da93e2fa'85a8f214_u128}, + {Sign::NEG, -138, 0xbfee023f'af0c2480'b47505bf'a5a03b06_u128}, + {Sign::NEG, -138, 0xaff0e1bb'718186ad'b1475a51'80a43520_u128}, + {Sign::NEG, -138, 0x9ff3814d'2e4a36b2'a8740b91'c95df537_u128}, + {Sign::NEG, -138, 0x8ff5e0f2'e661e1c6'57d895d3'5921b59c_u128}, + {Sign::NEG, -139, 0xfff00155'35588833'3c56c598'c659c2a3_u128}, + {Sign::NEG, -139, 0xdff3c0e4'97ea4eb1'2ef8ec33'ed9d782a_u128}, + {Sign::NEG, -139, 0xbff7008f'f5e0c257'379eba7e'6465ff63_u128}, + {Sign::NEG, -139, 0x9ff9c053'5073a370'3f972b78'3fcab757_u128}, + {Sign::NEG, -140, 0xfff80055'51558885'de026e27'1ee0549d_u128}, + {Sign::NEG, -140, 0xbffb8023'febc0c25'eceb47ea'01f6c632_u128}, + {Sign::NEG, -141, 0xfffc0015'54d55888'7333c578'57e1ed52_u128}, + {Sign::NEG, -142, 0xfffe0005'55455588'87dde026'fa704374_u128}, + {Sign::NEG, 0, 0_u128}, + {Sign::POS, -141, 0x80010002'aab2aac4'44999abe'2fe2cc65_u128}, + {Sign::POS, -140, 0x8002000a'aaeaac44'4eef3815'81464ccb_u128}, + {Sign::POS, -140, 0xc0048024'01440c26'dfeb4850'85f6f454_u128}, + {Sign::POS, -139, 0x8004002a'acaac445'99abe3be'3a1c6e93_u128}, + {Sign::POS, -139, 0xa0064053'5a37a37a'6bc1e20e'ac8448b4_u128}, + {Sign::POS, -139, 0xc0090090'0a20c275'979eedc0'64c242fd_u128}, + {Sign::POS, -139, 0xe00c40e4'bd6e4efd'c72446cc'1bf728bd_u128}, + {Sign::POS, -138, 0x800800aa'baac446e'f381b821'bbb569e5_u128}, + {Sign::POS, -138, 0x900a20f3'19a3e273'569b26aa'a485ea5c_u128}, + {Sign::POS, -138, 0xa00c814d'7c6a37f8'2dcf56c8'3c80b028_u128}, + {Sign::POS, -138, 0xb00f21bb'e3e388ee'5f697682'84463b9b_u128}, + {Sign::POS, -138, 0xc0120240'510c284c'b48ea6c0'5e2773a1_u128}, + {Sign::POS, -138, 0xd01522dc'c4f87991'14d9d761'96d8043a_u128}, + {Sign::POS, -138, 0xe0188393'40d4f241'e016a611'a4415d72_u128}, + {Sign::POS, -138, 0xf01c2465'c5e61b6f'661e135f'49a47c40_u128}, + {Sign::POS, -137, 0x801002ab'2ac4499a'be6bf0fa'435e8383_u128}, + {Sign::POS, -137, 0x88121333'7898871e'9a31ba0c'bc030353_u128}, + {Sign::POS, -137, 0x901443cc'cd362c9f'54b57dfe'0c4c840f_u128}, + {Sign::POS, -137, 0x98169478'296fad41'7ad1e9c3'15328f7e_u128}, + {Sign::POS, -137, 0xa0190536'8e2389b3'1f3f686c'f3d6be22_u128}, + {Sign::POS, -137, 0xa81b9608'fc3c50ec'f105b66e'c4703ede_u128}, + {Sign::POS, -137, 0xb01e46f0'74b0a0f3'610848c6'8df4d233_u128}, + {Sign::POS, -137, 0xb82117ed'f8832797'd6aef30c'd312169a_u128}, + {Sign::POS, -137, 0xc0240902'88c2a339'f3ac3796'08053d9d_u128}, + {Sign::POS, -137, 0xc8271a2f'2689e388'e6e2acf8'f4d4c24a_u128}, + {Sign::POS, -137, 0xd02a4b74'd2ffca44'ce6ae474'd860359f_u128}, + {Sign::POS, -137, 0xd82d9cd4'8f574c00'28bb3cd9'f2a65fb5_u128}, + {Sign::POS, -137, 0xe0310e4f'5ccf70e1'54f30dbe'f38a8066_u128}, + {Sign::POS, -137, 0xe8349fe6'3cb35564'224a96f5'a7471c46_u128}, + {Sign::POS, -137, 0xf038519a'305a2b1b'6ea92059'1aa02e1b_u128}, + {Sign::POS, -137, 0xf83c236c'39273972'd462b637'56c87e80_u128}, + {Sign::POS, -136, 0x80200aae'ac44ef38'338f7760'5fe77f2a_u128}, + {Sign::POS, -136, 0x842213b7'47fec7bb'3ff51287'882500ed_u128}, + {Sign::POS, -136, 0x88242cd0'7084ed02'cc394b3e'f0ebeb12_u128}, + {Sign::POS, -136, 0x8c2655fa'a6a1323f'1ab9679b'55f78a6b_u128}, + {Sign::POS, -136, 0x90288f36'6b237771'7025697d'10af0436_u128}, + {Sign::POS, -136, 0x942ad884'3ee1a9cd'17e4b7ac'6c600cb4_u128}, + {Sign::POS, -136, 0x982d31e4'a2b7c418'7013925a'9a8da7f3_u128}, + {Sign::POS, -136, 0x9c2f9b58'1787cf0d'fd1a09c8'48e3950e_u128}, + {Sign::POS, -136, 0xa03214df'1e39e1bd'84dd2de6'e3d90a37_u128}, + {Sign::POS, -136, 0xa4349e7a'37bc21ed'318b2ddd'9d0a33b4_u128}, + {Sign::POS, -136, 0xa8373829'e502c47a'bc031e6f'5acfd4a8_u128}, + {Sign::POS, -136, 0xac39e1ee'a7080dbc'9dd91e52'c79fd070_u128}, + {Sign::POS, -136, 0xb03c9bc8'fecc51e3'4af78fa1'cb48a12d_u128}, + {Sign::POS, -136, 0xb43f65b9'6d55f55a'72de1d99'ce252efd_u128}, + {Sign::POS, -136, 0xb74187bc'8ccffa84'efb1dbe7'21934877_u128}, + {Sign::POS, -136, 0xbb446dd4'd9bca499'b4b080f2'30c87598_u128}, + {Sign::POS, -136, 0xbf476404'a05f88f2'da6a7cd1'9c7fa4f2_u128}, + {Sign::POS, -136, 0xc34a6a4c'61d5cc3c'df00e378'3b50ecfb_u128}, + {Sign::POS, -136, 0xc74d80ac'9f42a52d'da2e5e02'ab4e183c_u128}, + {Sign::POS, -136, 0xcb50a725'd9cf5ce6'ea5f6ee9'9d30c626_u128}, + {Sign::POS, -136, 0xcf53ddb8'92ab4f55'a96d5956'531d7d8b_u128}, + {Sign::POS, -136, 0xd3572465'4b0beb95'a8fc636e'b36afa75_u128}, + {Sign::POS, -136, 0xd75a7b2c'842cb451'f67e2b82'7bfc4421_u128}, + {Sign::POS, -136, 0xdb5de20e'bf4f4026'a6d8c817'516303e6_u128}, + {Sign::POS, -136, 0xdf61590c'7dbb3a02'69b36ae5'962e85f4_u128}, + {Sign::POS, -136, 0xe364e026'40be6188'24693eec'2a831cc3_u128}, + {Sign::POS, -136, 0xe768775c'89ac8b70'94a339d5'6a55ab4a_u128}, + {Sign::POS, -136, 0xeb6c1eaf'd9dfa1eb'fa9998fb'f9703bf4_u128}, + {Sign::POS, -136, 0xef6fd620'b2b7a503'cafdc272'27b71eaa_u128}, + {Sign::POS, -136, 0xf3739daf'959aaafc'688d4282'f6026aa3_u128}, + {Sign::POS, -136, 0xf777755d'03f4e0b6'e54e9e38'04464cdd_u128}, + {Sign::POS, -136, 0xfb7b5d29'7f388a12'cb78b383'f4b59dce_u128}, + {Sign::POS, -136, 0xff7f5515'88de024f'ee055fc5'15062c04_u128}, + {Sign::POS, -135, 0x81c1ae90'd131de38'207812b4'3382acdd_u128}, + {Sign::POS, -135, 0x83c3baa7'26a721cc'dc90c4c4'b61f3a87_u128}, + {Sign::POS, -135, 0x85c5cece'05941dbc'1a03f13f'b2c978b1_u128}, + {Sign::POS, -135, 0x87c7eb05'aec1304f'b36f282e'83a7dc36_u128}, + {Sign::POS, -135, 0x89ca0f4e'62f9c476'6ad14c3d'fa414391_u128}, + {Sign::POS, -135, 0x8bcc3ba8'630c51f4'e8dd4ea0'd48b88e5_u128}, + {Sign::POS, -135, 0x8dce7013'efca5d96'c02515af'e8caeb90_u128}, + {Sign::POS, -135, 0x8fd0ac91'4a08795f'741ceaf3'349f3cf1_u128}, + {Sign::POS, -135, 0x91d2f120'b29e44bb'83f7cd49'29d2c28c_u128}, + {Sign::POS, -135, 0x93d53dc2'6a666cb1'795d03eb'c2fd03fa_u128}, + {Sign::POS, -135, 0x95d79276'b23eac12'faf74f1d'1ad16acc_u128}, + {Sign::POS, -135, 0x97d9ef3d'cb07cbad'e2de134f'72fee429_u128}, + {Sign::POS, -135, 0x99dc5417'f5a5a27d'58d8dba6'cadac5d5_u128}, + {Sign::POS, -135, 0x9bdec105'72ff15da'f07d90bc'5aae40a4_u128}, + {Sign::POS, -135, 0x9d609804'6659ea6b'1deaf79d'9fc40374_u128}, + {Sign::POS, -135, 0x9f631314'50b07988'7ba63e67'69b81999_u128}, + {Sign::POS, -135, 0xa1659638'404d5f92'59ebfc93'35094e59_u128}, + {Sign::POS, -135, 0xa3682170'7622f97a'16aae012'b5026f71_u128}, + {Sign::POS, -135, 0xa56ab4bd'3326b378'ff5d4f2c'0e4b9cae_u128}, + {Sign::POS, -135, 0xa76d501e'b8510941'855838b5'119dcb28_u128}, + {Sign::POS, -135, 0xa96ff395'469d8630'75f70cbb'e9cf1603_u128}, + {Sign::POS, -135, 0xab729f21'1f0ac57e'36a53ad4'd5541cc9_u128}, + {Sign::POS, -135, 0xad7552c2'829a7270'04c5934e'c32d20d9_u128}, + {Sign::POS, -135, 0xaf780e79'b2514889'3977e89a'ec59bfa2_u128}, + {Sign::POS, -135, 0xb17ad246'ef3713bc'913d4e3d'c55c3e6e_u128}, + {Sign::POS, -135, 0xb37d9e2a'7a56b09d'777b52a9'e70d8bcc_u128}, + {Sign::POS, -135, 0xb5807224'94be0c91'55de916f'd30591de_u128}, + {Sign::POS, -135, 0xb7834e35'7f7e2600'e79cfb37'be2861e4_u128}, + {Sign::POS, -135, 0xb986325d'7bab0c89'90983104'd3805389_u128}, + {Sign::POS, -135, 0xbb891e9c'ca5be12e'b860504b'aa6f984d_u128}, + {Sign::POS, -135, 0xbd8c12f3'acaad68b'29178d6f'f5712b96_u128}, + {Sign::POS, -135, 0xbf8f0f62'63b53102'7236fa47'ba19a198_u128}, + {Sign::POS, -135, 0xc19213e9'309b46f2'4f34d64c'afcc50e3_u128}, + {Sign::POS, -135, 0xc3952088'548080e4'120cc62e'b0a8db3e_u128}, + {Sign::POS, -135, 0xc5983540'108b59be'11aa5084'779060e3_u128}, + {Sign::POS, -135, 0xc79b5210'a5e55ef5'1c35fd62'36c8dcf1_u128}, + {Sign::POS, -135, 0xc99e76fa'55bb30bd'ed4576a7'e4b878fe_u128}, + {Sign::POS, -135, 0xcb20d7fa'3a336081'6caf4bb8'fd2c1131_u128}, + {Sign::POS, -135, 0xcd240b10'753e78de'3f24a6cb'b09c654f_u128}, + {Sign::POS, -135, 0xcf274640'7e0ff09f'78bc003b'b81e40f3_u128}, + {Sign::POS, -135, 0xd12a898a'95dff002'56647301'edfd8e8b_u128}, + {Sign::POS, -135, 0xd32dd4ee'fde9b2ef'28fe1c4d'04ca4ed9_u128}, + {Sign::POS, -135, 0xd531286d'f76b892a'e1ea9ea6'cbf57379_u128}, + {Sign::POS, -135, 0xd7348407'c3a6d688'a3832028'141a5cc2_u128}, + {Sign::POS, -135, 0xd937e7bc'a3e0131b'557421dd'379d3ead_u128}, + {Sign::POS, -135, 0xdb3b538c'd95ecb67'3cff8e87'a99bcaf0_u128}, + {Sign::POS, -135, 0xdd3ec778'a56da093'99255ef3'4bd0801f_u128}, + {Sign::POS, -135, 0xdf424380'495a489c'42b33220'abfa15cd_u128}, + {Sign::POS, -135, 0xe145c7a4'06758e83'503b378f'aa97dbc0_u128}, + {Sign::POS, -135, 0xe34953e4'1e135282'bdf2ca00'6f59b544_u128}, + {Sign::POS, -135, 0xe54ce840'd18a8a3e'1979190a'f37ed16f_u128}, + {Sign::POS, -135, 0xe75084ba'623540f4'31863ff7'cf898c9c_u128}, + {Sign::POS, -135, 0xe9542951'117097b0'c983284f'60293647_u128}, + {Sign::POS, -135, 0xeb57d605'209cc57e'510a969e'be03f804_u128}, + {Sign::POS, -135, 0xed5b8ad6'd11d1797'9f53bffc'6d23fe30_u128}, + {Sign::POS, -135, 0xef5f47c6'6457f199'b286c6e1'13337886_u128}, + {Sign::POS, -135, 0xf0e21acd'd6e7d412'b6ed8085'2ae6fd63_u128}, + {Sign::POS, -135, 0xf2e5e5f2'5450c5a2'df437fb0'f616082d_u128}, + {Sign::POS, -135, 0xf4e9b935'685dbe0b'f237cff1'acb306b3_u128}, + {Sign::POS, -135, 0xf6ed9497'5480b696'52dbfafb'4121a092_u128}, + {Sign::POS, -135, 0xf8f17818'5a2ebfd9'0d816482'49cece4c_u128}, + {Sign::POS, -135, 0xfaf563b8'bae001eb'ad95e6b0'b96903d3_u128}, + {Sign::POS, -135, 0xfcf95778'b80fbc98'176cd568'87ac7fe9_u128}, + {Sign::POS, -135, 0xfefd5358'933c478c'65f4c739'7f1f478d_u128}, }, // -log(r) for the third step, generated by SageMath with: // @@ -388,170 +391,170 @@ alignas(64) const LogRR LOG_TABLE = { // r = 2^-21 * round( 2^21 / (1 + i*2^(-21)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // format_hex(m), "},"); /* .step_3 = */ { - {Sign::NEG, -142, MType({0x374b294076d669c3, 0x9fff38014d52e45a})}, - {Sign::NEG, -142, MType({0x7f6f05dcdbeb776e, 0x9dff3cf940fad85a})}, - {Sign::NEG, -142, MType({0x3d55e21d41bbadf9, 0x9bff41e134f1cb36})}, - {Sign::NEG, -142, MType({0xccdba2d54aadbc5c, 0x99ff46b92936bcf4})}, - {Sign::NEG, -142, MType({0x71dd16d3073f79b2, 0x97ff4b811dc8ad9d})}, - {Sign::NEG, -142, MType({0x5837f3df1a58dd48, 0x95ff503912a69d37})}, - {Sign::NEG, -142, MType({0x93cad3bcdd26fd6d, 0x93ff54e107cf8bc9})}, - {Sign::NEG, -142, MType({0x2075312a827f14fa, 0x91ff5978fd42795b})}, - {Sign::NEG, -142, MType({0xe21764e139c98f60, 0x8fff5e00f2fe65f2})}, - {Sign::NEG, -142, MType({0xa492a29551751b4c, 0x8dff6278e9025197})}, - {Sign::NEG, -142, MType({0x1bc8f5f658f1c3a2, 0x8bff66e0df4d3c50})}, - {Sign::NEG, -142, MType({0xe39d3faf42340ed7, 0x89ff6b38d5de2622})}, - {Sign::NEG, -142, MType({0x7ff3326682c02485, 0x87ff6f80ccb40f16})}, - {Sign::NEG, -142, MType({0x5caf4fbe343cf928, 0x85ff73b8c3cdf731})}, - {Sign::NEG, -142, MType({0xcdb6e554348f7fe8, 0x83ff77e0bb2ade79})}, - {Sign::NEG, -142, MType({0xef009c2457de25d, 0x81ff7bf8b2c9c4f6})}, - {Sign::NEG, -143, MType({0x8883333c57b57c74, 0xffff000155535558})}, - {Sign::NEG, -143, MType({0xf32668f39c70d183, 0xfbff07f145931f44})}, - {Sign::NEG, -143, MType({0x459a73c6a6486fe3, 0xf7ff0fc13650e7bd})}, - {Sign::NEG, -143, MType({0x37b18cca7dd3a29f, 0xf3ff1771278aaecd})}, - {Sign::NEG, -143, MType({0x513f610d21bcfc78, 0xefff1f01193e7480})}, - {Sign::NEG, -143, MType({0xea190b95c0690b7b, 0xebff26710b6a38e1})}, - {Sign::NEG, -143, MType({0x2a150f64f0ad1743, 0xe7ff2dc0fe0bfbfd})}, - {Sign::NEG, -143, MType({0x90b5174e995e9d1, 0xe3ff34f0f121bddd})}, - {Sign::NEG, -143, MType({0x4ed512b9b93ea2bf, 0xdfff3c00e4a97e8c})}, - {Sign::NEG, -143, MType({0x934cea217ab794a2, 0xdbff42f0d8a13e15})}, - {Sign::NEG, -143, MType({0x3e4ebe948afd2c76, 0xd7ff49c0cd06fc83})}, - {Sign::NEG, -143, MType({0x87b7c0f5bcfee2e1, 0xd3ff5070c1d8b9df})}, - {Sign::NEG, -143, MType({0x776666228cb6371b, 0xcfff5700b7147634})}, - {Sign::NEG, -143, MType({0xe53a60f3514db358, 0xcbff5d70acb8318b})}, - {Sign::NEG, -143, MType({0x79149c3b6e57fa86, 0xc7ff63c0a2c1ebef})}, - {Sign::NEG, -143, MType({0xaad734c98416df2a, 0xc3ff69f0992fa568})}, - {Sign::NEG, -143, MType({0xc26573679ed28334, 0xbfff70008fff5e00})}, - {Sign::NEG, -143, MType({0xd7a3c6db6540809f, 0xbbff75f0872f15c0})}, - {Sign::NEG, -143, MType({0xd277bde645fb1aad, 0xb7ff7bc07ebcccb1})}, - {Sign::NEG, -143, MType({0x6ac80145a4087793, 0xb3ff817076a682dc})}, - {Sign::NEG, -143, MType({0x287c4db30271e265, 0xafff87006eea3849})}, - {Sign::NEG, -143, MType({0x637d6de42eeb151e, 0xabff8c706785ed00})}, - {Sign::NEG, -143, MType({0x43b5348b6b898a8c, 0xa7ff91c06077a10a})}, - {Sign::NEG, -143, MType({0xc10e7657978bd7f6, 0xa3ff96f059bd546e})}, - {Sign::NEG, -143, MType({0xa37503f457310e59, 0x9fff9c0053550735})}, - {Sign::NEG, -143, MType({0x82d5a40a3aa022ff, 0x9bffa0f04d3cb966})}, - {Sign::NEG, -143, MType({0xc71e0d3ee3df5f4d, 0x97ffa5c047726b08})}, - {Sign::NEG, -143, MType({0xa83ce0352bdbd79b, 0x93ffaa7041f41c23})}, - {Sign::NEG, -143, MType({0x2e21a18d4680e8e4, 0x8fffaf003cbfccbe})}, - {Sign::NEG, -143, MType({0x30bcb3e4e5dfbd28, 0x8bffb37037d37cdf})}, - {Sign::NEG, -143, MType({0x57ff51d75c66d64a, 0x87ffb7c0332d2c8d})}, - {Sign::NEG, -143, MType({0x1bdb87fdbe299f43, 0x83ffbbf02ecadbcf})}, - {Sign::NEG, -144, MType({0x88885dde02700703, 0xffff800055551555})}, - {Sign::NEG, -144, MType({0xd259ca803a0c1870, 0xf7ff87e04d94724c})}, - {Sign::NEG, -144, MType({0xe514130851c7070a, 0xefff8f80464fce8f})}, - {Sign::NEG, -144, MType({0x30a16898f3073a64, 0xe7ff96e03f832a2a})}, - {Sign::NEG, -144, MType({0xc4ed64517b2949ce, 0xdfff9e00392a8526})}, - {Sign::NEG, -144, MType({0x51e4fb4e32cf6350, 0xd7ffa4e03341df90})}, - {Sign::NEG, -144, MType({0x277672a88350bcce, 0xcfffab802dc53971})}, - {Sign::NEG, -144, MType({0x359153772a490f06, 0xc7ffb1e028b092d3})}, - {Sign::NEG, -144, MType({0xc265ece6b481a0e, 0xbfffb80023ffebc0})}, - {Sign::NEG, -144, MType({0xdb2781c03fa132f6, 0xb7ffbde01faf4440})}, - {Sign::NEG, -144, MType({0x7287c95c845ada33, 0xafffc3801bba9c5e})}, - {Sign::NEG, -144, MType({0x423b56b1263e5a77, 0xa7ffc8e0181df421})}, - {Sign::NEG, -144, MType({0x5a3752ca4c076fa3, 0x9fffce0014d54b91})}, - {Sign::NEG, -144, MType({0x6a71e2b27eb3f573, 0x97ffd2e011dca2b6})}, - {Sign::NEG, -144, MType({0xc2e21b72cff39d8f, 0x8fffd7800f2ff997})}, - {Sign::NEG, -144, MType({0x537ff612feb7ac9e, 0x87ffdbe00ccb503c})}, - {Sign::NEG, -145, MType({0x5888873333c57c18, 0xffffc00015554d55})}, - {Sign::NEG, -145, MType({0xfa51421842311c42, 0xefffc7c01193f9d1})}, - {Sign::NEG, -145, MType({0x2c4ed6de475b942c, 0xdfffcf000e4aa5fa})}, - {Sign::NEG, -145, MType({0xce77678cbb6fcb88, 0xcfffd5c00b7151d8})}, - {Sign::NEG, -145, MType({0xc26629a679ed3b, 0xbfffdc0008fffd78})}, - {Sign::NEG, -145, MType({0x23287cb9d3072728, 0xafffe1c006eea8e1})}, - {Sign::NEG, -145, MType({0xd5a37540fd057315, 0x9fffe7000535541c})}, - {Sign::NEG, -145, MType({0xf82e21c1fce36810, 0x8fffebc003cbff32})}, - {Sign::NEG, -146, MType({0x5588887ddde02702, 0xffffe00005555455})}, - {Sign::NEG, -146, MType({0x9ac4ed72adf5b295, 0xdfffe7800392aa14})}, - {Sign::NEG, -146, MType({0xc26648066b482, 0xbfffee00023fffaf})}, - {Sign::NEG, -146, MType({0x455a3754b292c077, 0x9ffff380014d552e})}, - {Sign::NEG, -147, MType({0x5558888833333c58, 0xfffff00001555535})}, - {Sign::NEG, -147, MType({0xe000c2665736679f, 0xbffff700008ffff5})}, - {Sign::NEG, -148, MType({0x5555888885ddde02, 0xfffff80000555551})}, - {Sign::NEG, -149, MType({0xd555588888733334, 0xfffffc0000155554})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -148, MType({0xeaaaac44444eeeef, 0x80000200000aaaaa})}, - {Sign::POS, -147, MType({0xaaaac444459999ac, 0x80000400002aaaac})}, - {Sign::POS, -147, MType({0x2000c2667596679f, 0xc00009000090000a})}, - {Sign::POS, -146, MType({0xaaac44446eeef381, 0x8000080000aaaaba})}, - {Sign::POS, -146, MType({0x655a3755f81815cc, 0xa0000c80014d557c})}, - {Sign::POS, -146, MType({0xc26684c66b482, 0xc000120002400051})}, - {Sign::POS, -146, MType({0xbac4ed7c40fb07eb, 0xe00018800392ab40})}, - {Sign::POS, -145, MType({0xaac44449999abe2c, 0x8000100002aaab2a})}, - {Sign::POS, -145, MType({0x82e21d79cbb6812, 0x9000144003cc00cd})}, - {Sign::POS, -145, MType({0xd5a37569adb01dc3, 0xa00019000535568d})}, - {Sign::POS, -145, MType({0x33287d01e8c9d1d9, 0xb0001e4006eeac74})}, - {Sign::POS, -145, MType({0xc266a32679ed48, 0xc000240009000288})}, - {Sign::POS, -145, MType({0xde77685122b2764b, 0xd0002a400b7158d1})}, - {Sign::POS, -145, MType({0x2c4ed810a8063f03, 0xe00031000e4aaf5b})}, - {Sign::POS, -145, MType({0xa5143e7be891c8f, 0xf00038401194062e})}, - {Sign::POS, -144, MType({0xac4444eeef3813a1, 0x800020000aaaaeaa})}, - {Sign::POS, -144, MType({0x5b7ff7fe1339025b, 0x880024200ccb5a6e})}, - {Sign::POS, -144, MType({0x42e21e26caf39e33, 0x900028800f300668})}, - {Sign::POS, -144, MType({0xf271e66fa5554bc6, 0x98002d2011dcb29e})}, - {Sign::POS, -144, MType({0x5a3757e0615cc676, 0xa000320014d55f19})}, - {Sign::POS, -144, MType({0xca3b5d8210ca5cab, 0xa8003720181e0bde})}, - {Sign::POS, -144, MType({0xf287d25f3cb032bb, 0xb0003c801bbab8f6})}, - {Sign::POS, -144, MType({0xe3278d840be28cdb, 0xb80042201faf6669})}, - {Sign::POS, -144, MType({0xc266dfe6b482076, 0xc000480024001440})}, - {Sign::POS, -144, MType({0x3d9166de380a6d3d, 0xc8004e2028b0c282})}, - {Sign::POS, -144, MType({0xa7768b356ba61e4b, 0xd00054802dc57139})}, - {Sign::POS, -144, MType({0xd9e51a1849db73c1, 0xd8005b203342206f})}, - {Sign::POS, -144, MType({0xc4ed8a9d907eb521, 0xe0006200392ad02e})}, - {Sign::POS, -144, MType({0xb8a197dea928acd7, 0xe80069203f838080})}, - {Sign::POS, -144, MType({0x65144cf7dcc72d3b, 0xf000708046503170})}, - {Sign::POS, -144, MType({0xda5a1108890d9f6a, 0xf80078204d94e308})}, - {Sign::POS, -143, MType({0xc4445999abe2ce2c, 0x800040002aaacaaa})}, - {Sign::POS, -143, MType({0x1fdbbb4f3bffc832, 0x840044102ecb2431})}, - {Sign::POS, -143, MType({0x97ff8f39ec91b4ee, 0x88004840332d7e1d})}, - {Sign::POS, -143, MType({0x74bcfcf0b3f0a95d, 0x8c004c9037d3d876})}, - {Sign::POS, -143, MType({0x2e21f80ca6813aff, 0x900051003cc03342})}, - {Sign::POS, -143, MType({0x6c3d4629170ce87f, 0x9400559041f48e87})}, - {Sign::POS, -143, MType({0x71e84e3b80a8881, 0x98005a404772ea4d})}, - {Sign::POS, -143, MType({0x6d62fdcbdd6bec3, 0x9c005f104d3d469a})}, - {Sign::POS, -143, MType({0xa375a6b701dc77c0, 0xa00064005355a375})}, - {Sign::POS, -143, MType({0x450f331826ad6b05, 0xa400691059be00e7})}, - {Sign::POS, -143, MType({0x83b60ea8bd0aa459, 0xa8006e4060785ef6})}, - {Sign::POS, -143, MType({0x277e691469dd13f5, 0xac0073906786bdab})}, - {Sign::POS, -143, MType({0x287d6e0a0d1e25eb, 0xb00079006eeb1d0d})}, - {Sign::POS, -143, MType({0xaec94b3be9b060f5, 0xb4007e9076a77d24})}, - {Sign::POS, -143, MType({0x1279365fce280cce, 0xb80084407ebdddfa})}, - {Sign::POS, -143, MType({0xdba5732f3e83e04a, 0xbc008a1087303f95})}, - {Sign::POS, -143, MType({0xc26759679ed5b754, 0xc00090009000a200})}, - {Sign::POS, -143, MType({0xaed95aca5edb5109, 0xc400961099310543})}, - {Sign::POS, -143, MType({0xb917091d2687160f, 0xc8009c40a2c36967})}, - {Sign::POS, -143, MType({0x293d1c2a0378e75d, 0xcc00a290acb9ce76})}, - {Sign::POS, -143, MType({0x776977bf9766f5a7, 0xd000a900b7163478})}, - {Sign::POS, -143, MType({0x4bbb31b14776a18b, 0xd400af90c1da9b78})}, - {Sign::POS, -143, MType({0x7e5297d76c8564ba, 0xd800b640cd09037f})}, - {Sign::POS, -143, MType({0x1751360f8461c447, 0xdc00bd10d8a36c98})}, - {Sign::POS, -143, MType({0x4ed9dc3c63f44c41, 0xe000c400e4abd6cc})}, - {Sign::POS, -143, MType({0x8d10a4466a5894d5, 0xe400cb10f1244226})}, - {Sign::POS, -143, MType({0x6a1af81bb4e6510e, 0xe800d240fe0eaeb1})}, - {Sign::POS, -143, MType({0xae1f97b0542a677a, 0xec00d9910b6d1c77})}, - {Sign::POS, -143, MType({0x51469efe81d014cc, 0xf000e10119418b84})}, - {Sign::POS, -143, MType({0x7bb98c06d77a18b4, 0xf400e891278dfbe2})}, - {Sign::POS, -143, MType({0x85a344d0868bed17, 0xf800f04136546d9d})}, - {Sign::POS, -143, MType({0xf7301d6990e307cc, 0xfc00f8114596e0c0})}, - {Sign::POS, -142, MType({0x4446eef38140138f, 0x80008000aaabaaac})}, - {Sign::POS, -142, MType({0x10f5e43296105497, 0x82008408b2cbe5b8})}, - {Sign::POS, -142, MType({0xedbd4f83ef63f730, 0x84008820bb2d2189})}, - {Sign::POS, -142, MType({0xfeb654fd541c638e, 0x86008c48c3d05e27})}, - {Sign::POS, -142, MType({0x7ffadeb8882f7674, 0x88009080ccb69b98})}, - {Sign::POS, -142, MType({0xc5a59fd36bd44397, 0x8a0094c8d5e0d9e1})}, - {Sign::POS, -142, MType({0x3bd217701b27dddb, 0x8c009920df50190a})}, - {Sign::POS, -142, MType({0x669c93b50e4a2595, 0x8e009d88e9055918})}, - {Sign::POS, -142, MType({0xe22234cd39f29cd4, 0x9000a200f3019a12})}, - {Sign::POS, -142, MType({0x6280efe8307d41d9, 0x9200a688fd45dc00})}, - {Sign::POS, -142, MType({0xb3d7923a436f6fc4, 0x9400ab2107d31ee7})}, - {Sign::POS, -142, MType({0xba45c3fca574c5a0, 0x9600afc912aa62cf})}, - {Sign::POS, -142, MType({0x71ec0b6d8cd413d1, 0x9800b4811dcca7bf})}, - {Sign::POS, -142, MType({0xeeebcfd0565c5006, 0x9a00b949293aedbd})}, - {Sign::POS, -142, MType({0x5d675c6da8c98fc3, 0x9c00be2134f634d2})}, - {Sign::POS, -142, MType({0x181e39398a2099a, 0x9e00c30940ff7d04})}, - {Sign::POS, -142, MType({0x375f8195cc8b1d29, 0xa000c8014d57c65a})}, + {Sign::NEG, -142, 0x9fff3801'4d52e45a'374b2940'76d669c3_u128}, + {Sign::NEG, -142, 0x9dff3cf9'40fad85a'7f6f05dc'dbeb776e_u128}, + {Sign::NEG, -142, 0x9bff41e1'34f1cb36'3d55e21d'41bbadf9_u128}, + {Sign::NEG, -142, 0x99ff46b9'2936bcf4'ccdba2d5'4aadbc5c_u128}, + {Sign::NEG, -142, 0x97ff4b81'1dc8ad9d'71dd16d3'073f79b2_u128}, + {Sign::NEG, -142, 0x95ff5039'12a69d37'5837f3df'1a58dd48_u128}, + {Sign::NEG, -142, 0x93ff54e1'07cf8bc9'93cad3bc'dd26fd6d_u128}, + {Sign::NEG, -142, 0x91ff5978'fd42795b'2075312a'827f14fa_u128}, + {Sign::NEG, -142, 0x8fff5e00'f2fe65f2'e21764e1'39c98f60_u128}, + {Sign::NEG, -142, 0x8dff6278'e9025197'a492a295'51751b4c_u128}, + {Sign::NEG, -142, 0x8bff66e0'df4d3c50'1bc8f5f6'58f1c3a2_u128}, + {Sign::NEG, -142, 0x89ff6b38'd5de2622'e39d3faf'42340ed7_u128}, + {Sign::NEG, -142, 0x87ff6f80'ccb40f16'7ff33266'82c02485_u128}, + {Sign::NEG, -142, 0x85ff73b8'c3cdf731'5caf4fbe'343cf928_u128}, + {Sign::NEG, -142, 0x83ff77e0'bb2ade79'cdb6e554'348f7fe8_u128}, + {Sign::NEG, -142, 0x81ff7bf8'b2c9c4f6'0ef009c2'457de25d_u128}, + {Sign::NEG, -143, 0xffff0001'55535558'8883333c'57b57c74_u128}, + {Sign::NEG, -143, 0xfbff07f1'45931f44'f32668f3'9c70d183_u128}, + {Sign::NEG, -143, 0xf7ff0fc1'3650e7bd'459a73c6'a6486fe3_u128}, + {Sign::NEG, -143, 0xf3ff1771'278aaecd'37b18cca'7dd3a29f_u128}, + {Sign::NEG, -143, 0xefff1f01'193e7480'513f610d'21bcfc78_u128}, + {Sign::NEG, -143, 0xebff2671'0b6a38e1'ea190b95'c0690b7b_u128}, + {Sign::NEG, -143, 0xe7ff2dc0'fe0bfbfd'2a150f64'f0ad1743_u128}, + {Sign::NEG, -143, 0xe3ff34f0'f121bddd'090b5174'e995e9d1_u128}, + {Sign::NEG, -143, 0xdfff3c00'e4a97e8c'4ed512b9'b93ea2bf_u128}, + {Sign::NEG, -143, 0xdbff42f0'd8a13e15'934cea21'7ab794a2_u128}, + {Sign::NEG, -143, 0xd7ff49c0'cd06fc83'3e4ebe94'8afd2c76_u128}, + {Sign::NEG, -143, 0xd3ff5070'c1d8b9df'87b7c0f5'bcfee2e1_u128}, + {Sign::NEG, -143, 0xcfff5700'b7147634'77666622'8cb6371b_u128}, + {Sign::NEG, -143, 0xcbff5d70'acb8318b'e53a60f3'514db358_u128}, + {Sign::NEG, -143, 0xc7ff63c0'a2c1ebef'79149c3b'6e57fa86_u128}, + {Sign::NEG, -143, 0xc3ff69f0'992fa568'aad734c9'8416df2a_u128}, + {Sign::NEG, -143, 0xbfff7000'8fff5e00'c2657367'9ed28334_u128}, + {Sign::NEG, -143, 0xbbff75f0'872f15c0'd7a3c6db'6540809f_u128}, + {Sign::NEG, -143, 0xb7ff7bc0'7ebcccb1'd277bde6'45fb1aad_u128}, + {Sign::NEG, -143, 0xb3ff8170'76a682dc'6ac80145'a4087793_u128}, + {Sign::NEG, -143, 0xafff8700'6eea3849'287c4db3'0271e265_u128}, + {Sign::NEG, -143, 0xabff8c70'6785ed00'637d6de4'2eeb151e_u128}, + {Sign::NEG, -143, 0xa7ff91c0'6077a10a'43b5348b'6b898a8c_u128}, + {Sign::NEG, -143, 0xa3ff96f0'59bd546e'c10e7657'978bd7f6_u128}, + {Sign::NEG, -143, 0x9fff9c00'53550735'a37503f4'57310e59_u128}, + {Sign::NEG, -143, 0x9bffa0f0'4d3cb966'82d5a40a'3aa022ff_u128}, + {Sign::NEG, -143, 0x97ffa5c0'47726b08'c71e0d3e'e3df5f4d_u128}, + {Sign::NEG, -143, 0x93ffaa70'41f41c23'a83ce035'2bdbd79b_u128}, + {Sign::NEG, -143, 0x8fffaf00'3cbfccbe'2e21a18d'4680e8e4_u128}, + {Sign::NEG, -143, 0x8bffb370'37d37cdf'30bcb3e4'e5dfbd28_u128}, + {Sign::NEG, -143, 0x87ffb7c0'332d2c8d'57ff51d7'5c66d64a_u128}, + {Sign::NEG, -143, 0x83ffbbf0'2ecadbcf'1bdb87fd'be299f43_u128}, + {Sign::NEG, -144, 0xffff8000'55551555'88885dde'02700703_u128}, + {Sign::NEG, -144, 0xf7ff87e0'4d94724c'd259ca80'3a0c1870_u128}, + {Sign::NEG, -144, 0xefff8f80'464fce8f'e5141308'51c7070a_u128}, + {Sign::NEG, -144, 0xe7ff96e0'3f832a2a'30a16898'f3073a64_u128}, + {Sign::NEG, -144, 0xdfff9e00'392a8526'c4ed6451'7b2949ce_u128}, + {Sign::NEG, -144, 0xd7ffa4e0'3341df90'51e4fb4e'32cf6350_u128}, + {Sign::NEG, -144, 0xcfffab80'2dc53971'277672a8'8350bcce_u128}, + {Sign::NEG, -144, 0xc7ffb1e0'28b092d3'35915377'2a490f06_u128}, + {Sign::NEG, -144, 0xbfffb800'23ffebc0'0c265ece'6b481a0e_u128}, + {Sign::NEG, -144, 0xb7ffbde0'1faf4440'db2781c0'3fa132f6_u128}, + {Sign::NEG, -144, 0xafffc380'1bba9c5e'7287c95c'845ada33_u128}, + {Sign::NEG, -144, 0xa7ffc8e0'181df421'423b56b1'263e5a77_u128}, + {Sign::NEG, -144, 0x9fffce00'14d54b91'5a3752ca'4c076fa3_u128}, + {Sign::NEG, -144, 0x97ffd2e0'11dca2b6'6a71e2b2'7eb3f573_u128}, + {Sign::NEG, -144, 0x8fffd780'0f2ff997'c2e21b72'cff39d8f_u128}, + {Sign::NEG, -144, 0x87ffdbe0'0ccb503c'537ff612'feb7ac9e_u128}, + {Sign::NEG, -145, 0xffffc000'15554d55'58888733'33c57c18_u128}, + {Sign::NEG, -145, 0xefffc7c0'1193f9d1'fa514218'42311c42_u128}, + {Sign::NEG, -145, 0xdfffcf00'0e4aa5fa'2c4ed6de'475b942c_u128}, + {Sign::NEG, -145, 0xcfffd5c0'0b7151d8'ce77678c'bb6fcb88_u128}, + {Sign::NEG, -145, 0xbfffdc00'08fffd78'00c26629'a679ed3b_u128}, + {Sign::NEG, -145, 0xafffe1c0'06eea8e1'23287cb9'd3072728_u128}, + {Sign::NEG, -145, 0x9fffe700'0535541c'd5a37540'fd057315_u128}, + {Sign::NEG, -145, 0x8fffebc0'03cbff32'f82e21c1'fce36810_u128}, + {Sign::NEG, -146, 0xffffe000'05555455'5588887d'dde02702_u128}, + {Sign::NEG, -146, 0xdfffe780'0392aa14'9ac4ed72'adf5b295_u128}, + {Sign::NEG, -146, 0xbfffee00'023fffaf'000c2664'8066b482_u128}, + {Sign::NEG, -146, 0x9ffff380'014d552e'455a3754'b292c077_u128}, + {Sign::NEG, -147, 0xfffff000'01555535'55588888'33333c58_u128}, + {Sign::NEG, -147, 0xbffff700'008ffff5'e000c266'5736679f_u128}, + {Sign::NEG, -148, 0xfffff800'00555551'55558888'85ddde02_u128}, + {Sign::NEG, -149, 0xfffffc00'00155554'd5555888'88733334_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -148, 0x80000200'000aaaaa'eaaaac44'444eeeef_u128}, + {Sign::POS, -147, 0x80000400'002aaaac'aaaac444'459999ac_u128}, + {Sign::POS, -147, 0xc0000900'0090000a'2000c266'7596679f_u128}, + {Sign::POS, -146, 0x80000800'00aaaaba'aaac4444'6eeef381_u128}, + {Sign::POS, -146, 0xa0000c80'014d557c'655a3755'f81815cc_u128}, + {Sign::POS, -146, 0xc0001200'02400051'000c2668'4c66b482_u128}, + {Sign::POS, -146, 0xe0001880'0392ab40'bac4ed7c'40fb07eb_u128}, + {Sign::POS, -145, 0x80001000'02aaab2a'aac44449'999abe2c_u128}, + {Sign::POS, -145, 0x90001440'03cc00cd'082e21d7'9cbb6812_u128}, + {Sign::POS, -145, 0xa0001900'0535568d'd5a37569'adb01dc3_u128}, + {Sign::POS, -145, 0xb0001e40'06eeac74'33287d01'e8c9d1d9_u128}, + {Sign::POS, -145, 0xc0002400'09000288'00c266a3'2679ed48_u128}, + {Sign::POS, -145, 0xd0002a40'0b7158d1'de776851'22b2764b_u128}, + {Sign::POS, -145, 0xe0003100'0e4aaf5b'2c4ed810'a8063f03_u128}, + {Sign::POS, -145, 0xf0003840'1194062e'0a5143e7'be891c8f_u128}, + {Sign::POS, -144, 0x80002000'0aaaaeaa'ac4444ee'ef3813a1_u128}, + {Sign::POS, -144, 0x88002420'0ccb5a6e'5b7ff7fe'1339025b_u128}, + {Sign::POS, -144, 0x90002880'0f300668'42e21e26'caf39e33_u128}, + {Sign::POS, -144, 0x98002d20'11dcb29e'f271e66f'a5554bc6_u128}, + {Sign::POS, -144, 0xa0003200'14d55f19'5a3757e0'615cc676_u128}, + {Sign::POS, -144, 0xa8003720'181e0bde'ca3b5d82'10ca5cab_u128}, + {Sign::POS, -144, 0xb0003c80'1bbab8f6'f287d25f'3cb032bb_u128}, + {Sign::POS, -144, 0xb8004220'1faf6669'e3278d84'0be28cdb_u128}, + {Sign::POS, -144, 0xc0004800'24001440'0c266dfe'6b482076_u128}, + {Sign::POS, -144, 0xc8004e20'28b0c282'3d9166de'380a6d3d_u128}, + {Sign::POS, -144, 0xd0005480'2dc57139'a7768b35'6ba61e4b_u128}, + {Sign::POS, -144, 0xd8005b20'3342206f'd9e51a18'49db73c1_u128}, + {Sign::POS, -144, 0xe0006200'392ad02e'c4ed8a9d'907eb521_u128}, + {Sign::POS, -144, 0xe8006920'3f838080'b8a197de'a928acd7_u128}, + {Sign::POS, -144, 0xf0007080'46503170'65144cf7'dcc72d3b_u128}, + {Sign::POS, -144, 0xf8007820'4d94e308'da5a1108'890d9f6a_u128}, + {Sign::POS, -143, 0x80004000'2aaacaaa'c4445999'abe2ce2c_u128}, + {Sign::POS, -143, 0x84004410'2ecb2431'1fdbbb4f'3bffc832_u128}, + {Sign::POS, -143, 0x88004840'332d7e1d'97ff8f39'ec91b4ee_u128}, + {Sign::POS, -143, 0x8c004c90'37d3d876'74bcfcf0'b3f0a95d_u128}, + {Sign::POS, -143, 0x90005100'3cc03342'2e21f80c'a6813aff_u128}, + {Sign::POS, -143, 0x94005590'41f48e87'6c3d4629'170ce87f_u128}, + {Sign::POS, -143, 0x98005a40'4772ea4d'071e84e3'b80a8881_u128}, + {Sign::POS, -143, 0x9c005f10'4d3d469a'06d62fdc'bdd6bec3_u128}, + {Sign::POS, -143, 0xa0006400'5355a375'a375a6b7'01dc77c0_u128}, + {Sign::POS, -143, 0xa4006910'59be00e7'450f3318'26ad6b05_u128}, + {Sign::POS, -143, 0xa8006e40'60785ef6'83b60ea8'bd0aa459_u128}, + {Sign::POS, -143, 0xac007390'6786bdab'277e6914'69dd13f5_u128}, + {Sign::POS, -143, 0xb0007900'6eeb1d0d'287d6e0a'0d1e25eb_u128}, + {Sign::POS, -143, 0xb4007e90'76a77d24'aec94b3b'e9b060f5_u128}, + {Sign::POS, -143, 0xb8008440'7ebdddfa'1279365f'ce280cce_u128}, + {Sign::POS, -143, 0xbc008a10'87303f95'dba5732f'3e83e04a_u128}, + {Sign::POS, -143, 0xc0009000'9000a200'c2675967'9ed5b754_u128}, + {Sign::POS, -143, 0xc4009610'99310543'aed95aca'5edb5109_u128}, + {Sign::POS, -143, 0xc8009c40'a2c36967'b917091d'2687160f_u128}, + {Sign::POS, -143, 0xcc00a290'acb9ce76'293d1c2a'0378e75d_u128}, + {Sign::POS, -143, 0xd000a900'b7163478'776977bf'9766f5a7_u128}, + {Sign::POS, -143, 0xd400af90'c1da9b78'4bbb31b1'4776a18b_u128}, + {Sign::POS, -143, 0xd800b640'cd09037f'7e5297d7'6c8564ba_u128}, + {Sign::POS, -143, 0xdc00bd10'd8a36c98'1751360f'8461c447_u128}, + {Sign::POS, -143, 0xe000c400'e4abd6cc'4ed9dc3c'63f44c41_u128}, + {Sign::POS, -143, 0xe400cb10'f1244226'8d10a446'6a5894d5_u128}, + {Sign::POS, -143, 0xe800d240'fe0eaeb1'6a1af81b'b4e6510e_u128}, + {Sign::POS, -143, 0xec00d991'0b6d1c77'ae1f97b0'542a677a_u128}, + {Sign::POS, -143, 0xf000e101'19418b84'51469efe'81d014cc_u128}, + {Sign::POS, -143, 0xf400e891'278dfbe2'7bb98c06'd77a18b4_u128}, + {Sign::POS, -143, 0xf800f041'36546d9d'85a344d0'868bed17_u128}, + {Sign::POS, -143, 0xfc00f811'4596e0c0'f7301d69'90e307cc_u128}, + {Sign::POS, -142, 0x80008000'aaabaaac'4446eef3'8140138f_u128}, + {Sign::POS, -142, 0x82008408'b2cbe5b8'10f5e432'96105497_u128}, + {Sign::POS, -142, 0x84008820'bb2d2189'edbd4f83'ef63f730_u128}, + {Sign::POS, -142, 0x86008c48'c3d05e27'feb654fd'541c638e_u128}, + {Sign::POS, -142, 0x88009080'ccb69b98'7ffadeb8'882f7674_u128}, + {Sign::POS, -142, 0x8a0094c8'd5e0d9e1'c5a59fd3'6bd44397_u128}, + {Sign::POS, -142, 0x8c009920'df50190a'3bd21770'1b27dddb_u128}, + {Sign::POS, -142, 0x8e009d88'e9055918'669c93b5'0e4a2595_u128}, + {Sign::POS, -142, 0x9000a200'f3019a12'e22234cd'39f29cd4_u128}, + {Sign::POS, -142, 0x9200a688'fd45dc00'6280efe8'307d41d9_u128}, + {Sign::POS, -142, 0x9400ab21'07d31ee7'b3d7923a'436f6fc4_u128}, + {Sign::POS, -142, 0x9600afc9'12aa62cf'ba45c3fc'a574c5a0_u128}, + {Sign::POS, -142, 0x9800b481'1dcca7bf'71ec0b6d'8cd413d1_u128}, + {Sign::POS, -142, 0x9a00b949'293aedbd'eeebcfd0'565c5006_u128}, + {Sign::POS, -142, 0x9c00be21'34f634d2'5d675c6d'a8c98fc3_u128}, + {Sign::POS, -142, 0x9e00c309'40ff7d04'0181e393'98a2099a_u128}, + {Sign::POS, -142, 0xa000c801'4d57c65a'375f8195'cc8b1d29_u128}, }, // -log(r) for the fourth step, generated by SageMath with: @@ -560,139 +563,139 @@ alignas(64) const LogRR LOG_TABLE = { // r = 2^-28 * round( 2^28 / (1 + i*2^(-28)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // format_hex(m), "},"); /* .step_4 = */ { - {Sign::NEG, -149, MType({0x4cd24d68ff2f11ae, 0x81fffef7f002cb2b})}, - {Sign::NEG, -150, MType({0x455555888887ddde, 0xfffffe0000055555})}, - {Sign::NEG, -150, MType({0xf0fa101f52b3971f, 0xfbfffe0fe0051653})}, - {Sign::NEG, -150, MType({0x9c9329d659ed3734, 0xf7fffe1f8004d94a})}, - {Sign::NEG, -150, MType({0x4821006d9b58462e, 0xf3fffe2ee0049e31})}, - {Sign::NEG, -150, MType({0xf3a3f025142f8c21, 0xeffffe3e000464ff})}, - {Sign::NEG, -150, MType({0x9f1c53bcc1c4b11c, 0xebfffe4ce0042dae})}, - {Sign::NEG, -150, MType({0x4a8a8474a17fdd30, 0xe7fffe5b8003f835})}, - {Sign::NEG, -150, MType({0xf5eeda0cb0df586d, 0xe3fffe69e003c48b})}, - {Sign::NEG, -150, MType({0xa149aac4ed772adf, 0xdffffe78000392aa})}, - {Sign::NEG, -150, MType({0x4c9b4b5d54f0bc96, 0xdbfffe85e0036289})}, - {Sign::NEG, -150, MType({0xf7e40f15e50a759f, 0xd7fffe938003341f})}, - {Sign::NEG, -150, MType({0xa32447ae9b975e05, 0xd3fffea0e0030766})}, - {Sign::NEG, -150, MType({0x4e5c4567767ebdd5, 0xcffffeae0002dc55})}, - {Sign::NEG, -150, MType({0xf98c570073bbbd19, 0xcbfffebae002b2e3})}, - {Sign::NEG, -150, MType({0xa4b4c9b9915d03dd, 0xc7fffec780028b0a})}, - {Sign::NEG, -150, MType({0x4fd5e952cd845a28, 0xc3fffed3e00264c1})}, - {Sign::NEG, -150, MType({0xfaf0000c26664806, 0xbffffee000023fff})}, - {Sign::NEG, -150, MType({0xa60356a59a49b57f, 0xbbfffeebe0021cbe})}, - {Sign::NEG, -150, MType({0x5110345f27878a9b, 0xb7fffef78001faf5})}, - {Sign::NEG, -150, MType({0xfc16def8cc8a4f61, 0xb3ffff02e001da9b})}, - {Sign::NEG, -150, MType({0xa7179ab287cdcbd8, 0xafffff0e0001bbaa})}, - {Sign::NEG, -150, MType({0x5212aa4c57dea809, 0xabffff18e0019e19})}, - {Sign::NEG, -150, MType({0xfd084f063b5a0bf8, 0xa7ffff23800181df})}, - {Sign::NEG, -150, MType({0xa7f8c8a030ed3fab, 0xa3ffff2de00166f6})}, - {Sign::NEG, -150, MType({0x52e4555a37554b29, 0x9fffff3800014d55})}, - {Sign::NEG, -150, MType({0xfdcb31f44d5e9676, 0x9bffff41e00134f3})}, - {Sign::NEG, -150, MType({0xa8ad99ae71e48997, 0x97ffff4b80011dca})}, - {Sign::NEG, -150, MType({0x538bc648a3d12c90, 0x93ffff54e00107d1})}, - {Sign::NEG, -150, MType({0xfe65f002e21cc765, 0x8fffff5e0000f2ff})}, - {Sign::NEG, -150, MType({0xa93c4d9d2bcd821a, 0x8bffff66e000df4e})}, - {Sign::NEG, -150, MType({0x540f14577ff704b2, 0x87ffff6f8000ccb5})}, - {Sign::NEG, -150, MType({0xfede77f1ddba1731, 0x83ffff77e000bb2b})}, - {Sign::NEG, -151, MType({0x5355555888888333, 0xffffff0000015555})}, - {Sign::NEG, -151, MType({0xa8e7ba8d659ed7dc, 0xf7ffff0fc0013652})}, - {Sign::NEG, -151, MType({0xfe747e025142fc61, 0xefffff1f0001193f})}, - {Sign::NEG, -151, MType({0x53fbfb374a1800c7, 0xe7ffff2dc000fe0d})}, - {Sign::NEG, -151, MType({0xa97e8aac4ed77513, 0xdfffff3c0000e4aa})}, - {Sign::NEG, -151, MType({0xfefc81e15e50a947, 0xd7ffff49c000cd07})}, - {Sign::NEG, -151, MType({0x547633567767ed66, 0xcfffff570000b715})}, - {Sign::NEG, -151, MType({0xa9ebee8b9915d174, 0xc7ffff63c000a2c2})}, - {Sign::NEG, -151, MType({0xff5e0000c2666573, 0xbfffff7000008fff})}, - {Sign::NEG, -151, MType({0x54ccb135f2787966, 0xb7ffff7bc0007ebd})}, - {Sign::NEG, -151, MType({0xaa3848ab287cdd4e, 0xafffff8700006eea})}, - {Sign::NEG, -151, MType({0xffa109e063b5a12d, 0xa7ffff91c0006077})}, - {Sign::NEG, -151, MType({0x55073555a3755504, 0x9fffff9c00005355})}, - {Sign::NEG, -151, MType({0xaa6b088ae71e48d5, 0x97ffffa5c0004772})}, - {Sign::NEG, -151, MType({0xffccbe002e21cca2, 0x8fffffaf00003cbf})}, - {Sign::NEG, -151, MType({0x552c8d3577ff706a, 0x87ffffb7c000332d})}, - {Sign::NEG, -152, MType({0x551555558888885e, 0xffffff8000005555})}, - {Sign::NEG, -152, MType({0xffce8fc025142fe3, 0xefffff8f8000464f})}, - {Sign::NEG, -152, MType({0xaa8526aac4ed7764, 0xdfffff9e0000392a})}, - {Sign::NEG, -152, MType({0x5539711567767ee3, 0xcfffffab80002dc5})}, - {Sign::NEG, -152, MType({0xffebc0000c26665f, 0xbfffffb8000023ff})}, - {Sign::NEG, -152, MType({0xaa9c5e6ab287cdd9, 0xafffffc380001bba})}, - {Sign::NEG, -152, MType({0x554b91555a375553, 0x9fffffce000014d5})}, - {Sign::NEG, -152, MType({0xfff997c002e21ccb, 0x8fffffd780000f2f})}, - {Sign::NEG, -153, MType({0x554d555558888887, 0xffffffc000001555})}, - {Sign::NEG, -153, MType({0xaaa5fa2aac4ed777, 0xdfffffcf00000e4a})}, - {Sign::NEG, -153, MType({0xfffd780000c26666, 0xbfffffdc000008ff})}, - {Sign::NEG, -153, MType({0x55541cd555a37555, 0x9fffffe700000535})}, - {Sign::NEG, -154, MType({0x5554555555888888, 0xffffffe000000555})}, - {Sign::NEG, -154, MType({0xffffaf00000c2666, 0xbfffffee0000023f})}, - {Sign::NEG, -155, MType({0x5555355555588889, 0xfffffff000000155})}, - {Sign::NEG, -156, MType({0x5555515555558889, 0xfffffff800000055})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -155, MType({0xaaaaacaaaaaac444, 0x800000040000002a})}, - {Sign::POS, -154, MType({0xaaaabaaaaaac4444, 0x80000008000000aa})}, - {Sign::POS, -154, MType({0x5100000c2666, 0xc000001200000240})}, - {Sign::POS, -153, MType({0xaaab2aaaaac44444, 0x80000010000002aa})}, - {Sign::POS, -153, MType({0x55568dd555a37555, 0xa000001900000535})}, - {Sign::POS, -153, MType({0x2880000c26667, 0xc000002400000900})}, - {Sign::POS, -153, MType({0xaaaf5b2aac4ed778, 0xe000003100000e4a})}, - {Sign::POS, -152, MType({0xaaaeaaaaac444445, 0x8000002000000aaa})}, - {Sign::POS, -152, MType({0x6684002e21cce, 0x9000002880000f30})}, - {Sign::POS, -152, MType({0x555f19555a375558, 0xa0000032000014d5})}, - {Sign::POS, -152, MType({0xaab8f6eab287cde2, 0xb000003c80001bba})}, - {Sign::POS, -152, MType({0x1440000c26666e, 0xc000004800002400})}, - {Sign::POS, -152, MType({0x5571399567767efb, 0xd000005480002dc5})}, - {Sign::POS, -152, MType({0xaad02eaac4ed778b, 0xe00000620000392a})}, - {Sign::POS, -152, MType({0x3170402514301d, 0xf000007080004650})}, - {Sign::POS, -151, MType({0xaacaaaaac444445a, 0x8000004000002aaa})}, - {Sign::POS, -151, MType({0x557e1d7577ff70a7, 0x880000484000332d})}, - {Sign::POS, -151, MType({0x3342002e21ccf8, 0x9000005100003cc0})}, - {Sign::POS, -151, MType({0xaaea4ccae71e494d, 0x9800005a40004772})}, - {Sign::POS, -151, MType({0x55a37555a37555a7, 0xa000006400005355})}, - {Sign::POS, -151, MType({0x5ef62063b5a207, 0xa800006e40006078})}, - {Sign::POS, -151, MType({0xab1d0cab287cde6e, 0xb000007900006eea})}, - {Sign::POS, -151, MType({0x55ddf975f2787ade, 0xb800008440007ebd})}, - {Sign::POS, -151, MType({0xa20000c2666759, 0xc000009000009000})}, - {Sign::POS, -151, MType({0xab6966cb9915d3e1, 0xc800009c4000a2c2})}, - {Sign::POS, -151, MType({0x563477567767f078, 0xd00000a90000b715})}, - {Sign::POS, -151, MType({0x1037e215e50ad20, 0xd80000b64000cd08})}, - {Sign::POS, -151, MType({0xabd6caac4ed779dc, 0xe00000c40000e4aa})}, - {Sign::POS, -151, MType({0x56aeaf774a1806b0, 0xe80000d24000fe0d})}, - {Sign::POS, -151, MType({0x18b82025143039f, 0xf00000e100011940})}, - {Sign::POS, -151, MType({0xac6d9acd659ee0ad, 0xf80000f040013652})}, - {Sign::POS, -150, MType({0xabaaaaac444446ef, 0x800000800000aaaa})}, - {Sign::POS, -150, MType({0x1218811ddba1d9b, 0x840000882000bb2c})}, - {Sign::POS, -150, MType({0x569b96577ff70c5f, 0x880000908000ccb5})}, - {Sign::POS, -150, MType({0xac1907bd2bcd8b3b, 0x8c0000992000df4e})}, - {Sign::POS, -150, MType({0x19a1002e21cd235, 0x900000a20000f300})}, - {Sign::POS, -150, MType({0x571ee468a3d1394e, 0x940000ab200107d1})}, - {Sign::POS, -150, MType({0xaca7bbae71e4988b, 0x980000b480011dca})}, - {Sign::POS, -150, MType({0x234ce144d5ea7f0, 0x9c0000be200134f4})}, - {Sign::POS, -150, MType({0x57c6555a37555f82, 0xa00000c800014d55})}, - {Sign::POS, -150, MType({0xad5c8cc030ed5744, 0xa40000d2200166f6})}, - {Sign::POS, -150, MType({0x2f7b1063b5a273b, 0xa80000dc800181e0})}, - {Sign::POS, -150, MType({0x5898006c57dec76f, 0xac0000e720019e19})}, - {Sign::POS, -150, MType({0xae3dbab287cdefe3, 0xb00000f20001bbaa})}, - {Sign::POS, -150, MType({0x3e92118cc8a789f, 0xb40000fd2001da9c})}, - {Sign::POS, -150, MType({0x599a765f2787b9aa, 0xb80001088001faf5})}, - {Sign::POS, -150, MType({0xaf51fec59a49eb0a, 0xbc00011420021cbe})}, - {Sign::POS, -150, MType({0x510000c266684c6, 0xc000012000024000})}, - {Sign::POS, -150, MType({0x5ad4c172cd849ee9, 0xc400012c200264c1})}, - {Sign::POS, -150, MType({0xb0a08bb9915d5179, 0xc800013880028b0a})}, - {Sign::POS, -150, MType({0x673a92073bc1480, 0xcc0001452002b2e4})}, - {Sign::POS, -150, MType({0x5c4e6567767f2009, 0xd00001520002dc55})}, - {Sign::POS, -150, MType({0xb2310dce9b97cc1d, 0xd400015f20030766})}, - {Sign::POS, -150, MType({0x81bf115e50af0c7, 0xd800016c80033420})}, - {Sign::POS, -150, MType({0x5e0f5f7d54f14614, 0xdc00017a20036289})}, - {Sign::POS, -150, MType({0xb40baac4ed77c410, 0xe0000188000392aa})}, - {Sign::POS, -150, MType({0xa11262cb0e002c7, 0xe40001962003c48c})}, - {Sign::POS, -150, MType({0x60202674a1809a47, 0xe80001a48003f835})}, - {Sign::POS, -150, MType({0xb63901dcc1c582a0, 0xec0001b320042dae})}, - {Sign::POS, -150, MType({0xc5c1025143073df, 0xf00001c200046500})}, - {Sign::POS, -150, MType({0x6289aa8d9b594616, 0xf40001d120049e31})}, - {Sign::POS, -150, MType({0xb8c22bd659ee5155, 0xf80001e08004d94a})}, - {Sign::POS, -150, MType({0xf05f03f52b4cdae, 0xfc0001f020051654})}, - {Sign::POS, -149, MType({0xb2aaaac44444999a, 0x800001000002aaaa})}, + {Sign::NEG, -149, 0x81fffef7'f002cb2b'4cd24d68'ff2f11ae_u128}, + {Sign::NEG, -150, 0xfffffe00'00055555'45555588'8887ddde_u128}, + {Sign::NEG, -150, 0xfbfffe0f'e0051653'f0fa101f'52b3971f_u128}, + {Sign::NEG, -150, 0xf7fffe1f'8004d94a'9c9329d6'59ed3734_u128}, + {Sign::NEG, -150, 0xf3fffe2e'e0049e31'4821006d'9b58462e_u128}, + {Sign::NEG, -150, 0xeffffe3e'000464ff'f3a3f025'142f8c21_u128}, + {Sign::NEG, -150, 0xebfffe4c'e0042dae'9f1c53bc'c1c4b11c_u128}, + {Sign::NEG, -150, 0xe7fffe5b'8003f835'4a8a8474'a17fdd30_u128}, + {Sign::NEG, -150, 0xe3fffe69'e003c48b'f5eeda0c'b0df586d_u128}, + {Sign::NEG, -150, 0xdffffe78'000392aa'a149aac4'ed772adf_u128}, + {Sign::NEG, -150, 0xdbfffe85'e0036289'4c9b4b5d'54f0bc96_u128}, + {Sign::NEG, -150, 0xd7fffe93'8003341f'f7e40f15'e50a759f_u128}, + {Sign::NEG, -150, 0xd3fffea0'e0030766'a32447ae'9b975e05_u128}, + {Sign::NEG, -150, 0xcffffeae'0002dc55'4e5c4567'767ebdd5_u128}, + {Sign::NEG, -150, 0xcbfffeba'e002b2e3'f98c5700'73bbbd19_u128}, + {Sign::NEG, -150, 0xc7fffec7'80028b0a'a4b4c9b9'915d03dd_u128}, + {Sign::NEG, -150, 0xc3fffed3'e00264c1'4fd5e952'cd845a28_u128}, + {Sign::NEG, -150, 0xbffffee0'00023fff'faf0000c'26664806_u128}, + {Sign::NEG, -150, 0xbbfffeeb'e0021cbe'a60356a5'9a49b57f_u128}, + {Sign::NEG, -150, 0xb7fffef7'8001faf5'5110345f'27878a9b_u128}, + {Sign::NEG, -150, 0xb3ffff02'e001da9b'fc16def8'cc8a4f61_u128}, + {Sign::NEG, -150, 0xafffff0e'0001bbaa'a7179ab2'87cdcbd8_u128}, + {Sign::NEG, -150, 0xabffff18'e0019e19'5212aa4c'57dea809_u128}, + {Sign::NEG, -150, 0xa7ffff23'800181df'fd084f06'3b5a0bf8_u128}, + {Sign::NEG, -150, 0xa3ffff2d'e00166f6'a7f8c8a0'30ed3fab_u128}, + {Sign::NEG, -150, 0x9fffff38'00014d55'52e4555a'37554b29_u128}, + {Sign::NEG, -150, 0x9bffff41'e00134f3'fdcb31f4'4d5e9676_u128}, + {Sign::NEG, -150, 0x97ffff4b'80011dca'a8ad99ae'71e48997_u128}, + {Sign::NEG, -150, 0x93ffff54'e00107d1'538bc648'a3d12c90_u128}, + {Sign::NEG, -150, 0x8fffff5e'0000f2ff'fe65f002'e21cc765_u128}, + {Sign::NEG, -150, 0x8bffff66'e000df4e'a93c4d9d'2bcd821a_u128}, + {Sign::NEG, -150, 0x87ffff6f'8000ccb5'540f1457'7ff704b2_u128}, + {Sign::NEG, -150, 0x83ffff77'e000bb2b'fede77f1'ddba1731_u128}, + {Sign::NEG, -151, 0xffffff00'00015555'53555558'88888333_u128}, + {Sign::NEG, -151, 0xf7ffff0f'c0013652'a8e7ba8d'659ed7dc_u128}, + {Sign::NEG, -151, 0xefffff1f'0001193f'fe747e02'5142fc61_u128}, + {Sign::NEG, -151, 0xe7ffff2d'c000fe0d'53fbfb37'4a1800c7_u128}, + {Sign::NEG, -151, 0xdfffff3c'0000e4aa'a97e8aac'4ed77513_u128}, + {Sign::NEG, -151, 0xd7ffff49'c000cd07'fefc81e1'5e50a947_u128}, + {Sign::NEG, -151, 0xcfffff57'0000b715'54763356'7767ed66_u128}, + {Sign::NEG, -151, 0xc7ffff63'c000a2c2'a9ebee8b'9915d174_u128}, + {Sign::NEG, -151, 0xbfffff70'00008fff'ff5e0000'c2666573_u128}, + {Sign::NEG, -151, 0xb7ffff7b'c0007ebd'54ccb135'f2787966_u128}, + {Sign::NEG, -151, 0xafffff87'00006eea'aa3848ab'287cdd4e_u128}, + {Sign::NEG, -151, 0xa7ffff91'c0006077'ffa109e0'63b5a12d_u128}, + {Sign::NEG, -151, 0x9fffff9c'00005355'55073555'a3755504_u128}, + {Sign::NEG, -151, 0x97ffffa5'c0004772'aa6b088a'e71e48d5_u128}, + {Sign::NEG, -151, 0x8fffffaf'00003cbf'ffccbe00'2e21cca2_u128}, + {Sign::NEG, -151, 0x87ffffb7'c000332d'552c8d35'77ff706a_u128}, + {Sign::NEG, -152, 0xffffff80'00005555'55155555'8888885e_u128}, + {Sign::NEG, -152, 0xefffff8f'8000464f'ffce8fc0'25142fe3_u128}, + {Sign::NEG, -152, 0xdfffff9e'0000392a'aa8526aa'c4ed7764_u128}, + {Sign::NEG, -152, 0xcfffffab'80002dc5'55397115'67767ee3_u128}, + {Sign::NEG, -152, 0xbfffffb8'000023ff'ffebc000'0c26665f_u128}, + {Sign::NEG, -152, 0xafffffc3'80001bba'aa9c5e6a'b287cdd9_u128}, + {Sign::NEG, -152, 0x9fffffce'000014d5'554b9155'5a375553_u128}, + {Sign::NEG, -152, 0x8fffffd7'80000f2f'fff997c0'02e21ccb_u128}, + {Sign::NEG, -153, 0xffffffc0'00001555'554d5555'58888887_u128}, + {Sign::NEG, -153, 0xdfffffcf'00000e4a'aaa5fa2a'ac4ed777_u128}, + {Sign::NEG, -153, 0xbfffffdc'000008ff'fffd7800'00c26666_u128}, + {Sign::NEG, -153, 0x9fffffe7'00000535'55541cd5'55a37555_u128}, + {Sign::NEG, -154, 0xffffffe0'00000555'55545555'55888888_u128}, + {Sign::NEG, -154, 0xbfffffee'0000023f'ffffaf00'000c2666_u128}, + {Sign::NEG, -155, 0xfffffff0'00000155'55553555'55588889_u128}, + {Sign::NEG, -156, 0xfffffff8'00000055'55555155'55558889_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -155, 0x80000004'0000002a'aaaaacaa'aaaac444_u128}, + {Sign::POS, -154, 0x80000008'000000aa'aaaabaaa'aaac4444_u128}, + {Sign::POS, -154, 0xc0000012'00000240'00005100'000c2666_u128}, + {Sign::POS, -153, 0x80000010'000002aa'aaab2aaa'aac44444_u128}, + {Sign::POS, -153, 0xa0000019'00000535'55568dd5'55a37555_u128}, + {Sign::POS, -153, 0xc0000024'00000900'00028800'00c26667_u128}, + {Sign::POS, -153, 0xe0000031'00000e4a'aaaf5b2a'ac4ed778_u128}, + {Sign::POS, -152, 0x80000020'00000aaa'aaaeaaaa'ac444445_u128}, + {Sign::POS, -152, 0x90000028'80000f30'00066840'02e21cce_u128}, + {Sign::POS, -152, 0xa0000032'000014d5'555f1955'5a375558_u128}, + {Sign::POS, -152, 0xb000003c'80001bba'aab8f6ea'b287cde2_u128}, + {Sign::POS, -152, 0xc0000048'00002400'00144000'0c26666e_u128}, + {Sign::POS, -152, 0xd0000054'80002dc5'55713995'67767efb_u128}, + {Sign::POS, -152, 0xe0000062'0000392a'aad02eaa'c4ed778b_u128}, + {Sign::POS, -152, 0xf0000070'80004650'00317040'2514301d_u128}, + {Sign::POS, -151, 0x80000040'00002aaa'aacaaaaa'c444445a_u128}, + {Sign::POS, -151, 0x88000048'4000332d'557e1d75'77ff70a7_u128}, + {Sign::POS, -151, 0x90000051'00003cc0'00334200'2e21ccf8_u128}, + {Sign::POS, -151, 0x9800005a'40004772'aaea4cca'e71e494d_u128}, + {Sign::POS, -151, 0xa0000064'00005355'55a37555'a37555a7_u128}, + {Sign::POS, -151, 0xa800006e'40006078'005ef620'63b5a207_u128}, + {Sign::POS, -151, 0xb0000079'00006eea'ab1d0cab'287cde6e_u128}, + {Sign::POS, -151, 0xb8000084'40007ebd'55ddf975'f2787ade_u128}, + {Sign::POS, -151, 0xc0000090'00009000'00a20000'c2666759_u128}, + {Sign::POS, -151, 0xc800009c'4000a2c2'ab6966cb'9915d3e1_u128}, + {Sign::POS, -151, 0xd00000a9'0000b715'56347756'7767f078_u128}, + {Sign::POS, -151, 0xd80000b6'4000cd08'01037e21'5e50ad20_u128}, + {Sign::POS, -151, 0xe00000c4'0000e4aa'abd6caac'4ed779dc_u128}, + {Sign::POS, -151, 0xe80000d2'4000fe0d'56aeaf77'4a1806b0_u128}, + {Sign::POS, -151, 0xf00000e1'00011940'018b8202'5143039f_u128}, + {Sign::POS, -151, 0xf80000f0'40013652'ac6d9acd'659ee0ad_u128}, + {Sign::POS, -150, 0x80000080'0000aaaa'abaaaaac'444446ef_u128}, + {Sign::POS, -150, 0x84000088'2000bb2c'01218811'ddba1d9b_u128}, + {Sign::POS, -150, 0x88000090'8000ccb5'569b9657'7ff70c5f_u128}, + {Sign::POS, -150, 0x8c000099'2000df4e'ac1907bd'2bcd8b3b_u128}, + {Sign::POS, -150, 0x900000a2'0000f300'019a1002'e21cd235_u128}, + {Sign::POS, -150, 0x940000ab'200107d1'571ee468'a3d1394e_u128}, + {Sign::POS, -150, 0x980000b4'80011dca'aca7bbae'71e4988b_u128}, + {Sign::POS, -150, 0x9c0000be'200134f4'0234ce14'4d5ea7f0_u128}, + {Sign::POS, -150, 0xa00000c8'00014d55'57c6555a'37555f82_u128}, + {Sign::POS, -150, 0xa40000d2'200166f6'ad5c8cc0'30ed5744_u128}, + {Sign::POS, -150, 0xa80000dc'800181e0'02f7b106'3b5a273b_u128}, + {Sign::POS, -150, 0xac0000e7'20019e19'5898006c'57dec76f_u128}, + {Sign::POS, -150, 0xb00000f2'0001bbaa'ae3dbab2'87cdefe3_u128}, + {Sign::POS, -150, 0xb40000fd'2001da9c'03e92118'cc8a789f_u128}, + {Sign::POS, -150, 0xb8000108'8001faf5'599a765f'2787b9aa_u128}, + {Sign::POS, -150, 0xbc000114'20021cbe'af51fec5'9a49eb0a_u128}, + {Sign::POS, -150, 0xc0000120'00024000'0510000c'266684c6_u128}, + {Sign::POS, -150, 0xc400012c'200264c1'5ad4c172'cd849ee9_u128}, + {Sign::POS, -150, 0xc8000138'80028b0a'b0a08bb9'915d5179_u128}, + {Sign::POS, -150, 0xcc000145'2002b2e4'0673a920'73bc1480_u128}, + {Sign::POS, -150, 0xd0000152'0002dc55'5c4e6567'767f2009_u128}, + {Sign::POS, -150, 0xd400015f'20030766'b2310dce'9b97cc1d_u128}, + {Sign::POS, -150, 0xd800016c'80033420'081bf115'e50af0c7_u128}, + {Sign::POS, -150, 0xdc00017a'20036289'5e0f5f7d'54f14614_u128}, + {Sign::POS, -150, 0xe0000188'000392aa'b40baac4'ed77c410_u128}, + {Sign::POS, -150, 0xe4000196'2003c48c'0a11262c'b0e002c7_u128}, + {Sign::POS, -150, 0xe80001a4'8003f835'60202674'a1809a47_u128}, + {Sign::POS, -150, 0xec0001b3'20042dae'b63901dc'c1c582a0_u128}, + {Sign::POS, -150, 0xf00001c2'00046500'0c5c1025'143073df_u128}, + {Sign::POS, -150, 0xf40001d1'20049e31'6289aa8d'9b594616_u128}, + {Sign::POS, -150, 0xf80001e0'8004d94a'b8c22bd6'59ee5155_u128}, + {Sign::POS, -150, 0xfc0001f0'20051654'0f05f03f'52b4cdae_u128}, + {Sign::POS, -149, 0x80000100'0002aaaa'b2aaaac4'4444999a_u128}, }}; // > P = fpminimax((log(1 + x) - x)/x^2, 2, [|1, 128...|], @@ -701,9 +704,9 @@ alignas(64) const LogRR LOG_TABLE = { // > dirtyinfnorm(log(1 + x)/x - x*P, [-0x1.0002143p-29 , 0x1p-29]); // 0x1.99a3...p-121 const Float128 BIG_COEFFS[3]{ - {Sign::NEG, -129, MType({0xb59c58e5554d581c, 0x800000000006a710})}, - {Sign::POS, -129, MType({0xde05c7c94ae9cbae, 0xaaaaaaaaaaaaaabd})}, - {Sign::NEG, -128, MType({0x0, 0x8000000000000000})}, + {Sign::NEG, -129, 0x80000000'0006a710'b59c58e5'554d581c_u128}, + {Sign::POS, -129, 0xaaaaaaaa'aaaaaabd'de05c7c9'4ae9cbae_u128}, + {Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128}, }; // Reuse the output of the fast pass range reduction. diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp index b3dc8982a7fb98..66a2e455cf59b3 100644 --- a/libc/src/math/generic/log10.cpp +++ b/libc/src/math/generic/log10.cpp @@ -14,6 +14,7 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" +#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "common_constants.h" @@ -23,8 +24,8 @@ namespace LIBC_NAMESPACE { // 128-bit precision dyadic floating point numbers. using Float128 = typename fputil::DyadicFloat<128>; -using MType = typename Float128::MantissaType; using Sign = fputil::Sign; +using LIBC_NAMESPACE::operator""_u128; namespace { @@ -37,11 +38,16 @@ constexpr double HI_ERR = 0x1.0p-85; // Extra errors from P is from using x^2 to reduce evaluation latency. constexpr double P_ERR = 0x1.0p-51; -// log10(2) with 128-bit prepcision generated by SageMath with: -// sage: (s, m, e) = RealField(128)(2).log10().sign_exponent_mantissa(); -// sage: print("MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})"); +// log10(2) with 128-bit precision generated by SageMath with: +// def format_hex(value): +// l = hex(value)[2:] +// n = 8 +// x = [l[i:i + n] for i in range(0, len(l), n)] +// return "0x" + "'".join(x) + "_uint128" +// (s, m, e) = RealField(128)(2).log10().sign_exponent_mantissa(); +// print(format_hex(m)); const Float128 LOG10_2(Sign::POS, /*exponent=*/-129, /*mantissa=*/ - MType({0x8f8959ac0b7c9178, 0x9a209a84fbcff798})); + 0x9a209a84'fbcff798'8f8959ac'0b7c9178_u128); const LogRR LOG10_TABLE = { // -log10(r) with 128-bit precision generated by SageMath with: @@ -49,138 +55,136 @@ const LogRR LOG10_TABLE = { // for i in range(128): // r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^(-7)) ); // s, m, e = RealField(128)(r).log10().sign_mantissa_exponent(); - // print("{Sign::POS,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) - // % 2^64), - // "})},"); + // print("{Sign::POS,", e, ", format_hex(m), "},"); /* .step_1 = */ { - {Sign::POS, 0, MType(0)}, - {Sign::POS, -136, MType({0x65af394fe05eafd3, 0xdf3b5ebbda7e186b})}, - {Sign::POS, -135, MType({0xa8fb8d87b30163b5, 0xe01d40572f029c16})}, - {Sign::POS, -134, MType({0x6bb0170e5bb5d630, 0xa8c1263ac3f57eb3})}, - {Sign::POS, -134, MType({0xfc2ea6eb0ea1370e, 0xe1e841bbc26204e5})}, - {Sign::POS, -133, MType({0xdc8a199a4bb63382, 0x8dc2eb02274d6ff4})}, - {Sign::POS, -133, MType({0x86b57ea610c7db33, 0xaacde920361dd054})}, - {Sign::POS, -133, MType({0x5f034a40e6a2f09d, 0xc81618eb15421bab})}, - {Sign::POS, -133, MType({0x594a31b2c5cc891c, 0xe59c7e66c5fedb4b})}, - {Sign::POS, -133, MType({0x221efda58221904b, 0xf477584f97b654de})}, - {Sign::POS, -132, MType({0x68a0dc47567691c9, 0x892e821975106e09})}, - {Sign::POS, -132, MType({0x10bc94f44d216b49, 0x9841c66e17dfe7da})}, - {Sign::POS, -132, MType({0xe303ea7e23c9d6fb, 0x9fd7be3318306cc5})}, - {Sign::POS, -132, MType({0xce697dbaa00d4c7d, 0xaf1cb35bf494a8dd})}, - {Sign::POS, -132, MType({0x9c216079dcf0ea96, 0xbe8380a2fa7eba5a})}, - {Sign::POS, -132, MType({0x75278940eecfc3a9, 0xc643c7758283a271})}, - {Sign::POS, -132, MType({0x2d3467d253e2d1fc, 0xd5de75ec27e4fe68})}, - {Sign::POS, -132, MType({0xead4055dcdec7b22, 0xddb904e8f1272a95})}, - {Sign::POS, -132, MType({0xe1e0dda0b3d375a4, 0xed88f6bb355fa196})}, - {Sign::POS, -132, MType({0x38dc40c4fe11e608, 0xf57e8281ade9d92d})}, - {Sign::POS, -131, MType({0x3bcdcfe7b23976cd, 0x82c2941bb20bbe1f})}, - {Sign::POS, -131, MType({0x456350b0bda452a6, 0x86cb36632807cdcd})}, - {Sign::POS, -131, MType({0x78185dcc37fda01a, 0x8eeaa306458b760a})}, - {Sign::POS, -131, MType({0x307643adbbbde1b3, 0x9301839512fc1168})}, - {Sign::POS, -131, MType({0x6c449d409f883fe3, 0x9b3dd1d550c41443})}, - {Sign::POS, -131, MType({0x8ea7b30c8b4ad886, 0x9f6356aa03c34389})}, - {Sign::POS, -131, MType({0x961c6e690d8879b4, 0xa7bd56cdde5d76a2})}, - {Sign::POS, -131, MType({0x42643ced81ec14a, 0xabf1ea3e1d7bd7cf})}, - {Sign::POS, -131, MType({0x4742fb3d0b5cdd19, 0xb02b9af74c2f879e})}, - {Sign::POS, -131, MType({0xf7e2ab36f09e9014, 0xb8ae8671b3d7dd6c})}, - {Sign::POS, -131, MType({0x8d3fc63485e7ff13, 0xbcf7dabd87c01afc})}, - {Sign::POS, -131, MType({0xf3edc49375fbc5a5, 0xc1467f694d10a581})}, - {Sign::POS, -131, MType({0x5fcd7d0ce937375f, 0xc9f3ef07e1f3fc5e})}, - {Sign::POS, -131, MType({0x58252dada9f06111, 0xce52d50b94fa253a})}, - {Sign::POS, -131, MType({0x62f01e5ff43708ab, 0xd2b74192fae43777})}, - {Sign::POS, -131, MType({0x481d9b3131f52639, 0xd72142a84ca85abd})}, - {Sign::POS, -131, MType({0xb305ced1419fe924, 0xdb90e68b8abf14af})}, - {Sign::POS, -131, MType({0x849266a85513dc6d, 0xe48150cf32888b9c})}, - {Sign::POS, -131, MType({0x80ecf3266b4dcf4, 0xe90234c65a15e533})}, - {Sign::POS, -131, MType({0xe1e0dda0b3d375a4, 0xed88f6bb355fa196})}, - {Sign::POS, -131, MType({0xce3537a3a211b25b, 0xf215a60b6557943f})}, - {Sign::POS, -131, MType({0x5dab68307fedefcd, 0xf6a852513757dfbd})}, - {Sign::POS, -131, MType({0x1be2585c279c50a5, 0xffdfe15de3c01bac})}, - {Sign::POS, -130, MType({0x18aa302171017dcb, 0x8242724a155219f3})}, - {Sign::POS, -130, MType({0xabc7e698502d43c0, 0x849812d0ccbb5cbd})}, - {Sign::POS, -130, MType({0xc339089a51663370, 0x86f0dab1ab5822b6})}, - {Sign::POS, -130, MType({0x26f70b34ce5cf201, 0x894cd27d9f182c63})}, - {Sign::POS, -130, MType({0x676f20a87ab433df, 0x8bac02e8ac3e09ac})}, - {Sign::POS, -130, MType({0x6db4169cc4b83bc3, 0x8e0e74caae062e24})}, - {Sign::POS, -130, MType({0xcd3fdb2fad0d1fd6, 0x907431201c7f651a})}, - {Sign::POS, -130, MType({0x49d03e163250d1d4, 0x92dd410ad7bfe103})}, - {Sign::POS, -130, MType({0x9ec7dc02d5e723b9, 0x9549add2f8a3c7e0})}, - {Sign::POS, -130, MType({0x34698d03a5442573, 0x97b980e7a743d71c})}, - {Sign::POS, -130, MType({0x522904d1e47f3de, 0x9a2cc3dff7548556})}, - {Sign::POS, -130, MType({0x791a72646c87b976, 0x9ca3807bca9fe93f})}, - {Sign::POS, -130, MType({0x3826f190d655d736, 0x9f1dc0a4b9cea286})}, - {Sign::POS, -130, MType({0x544ab3e48199b299, 0xa19b8e6f03b60e45})}, - {Sign::POS, -130, MType({0xbe775fa82961114e, 0xa41cf41a83643487})}, - {Sign::POS, -130, MType({0x45798e5019e6c082, 0xa6a1fc13ad241953})}, - {Sign::POS, -130, MType({0x91fb1ed0cdc4d1fb, 0xa92ab0f492b772bd})}, - {Sign::POS, -130, MType({0x818b8b9cbbd17b72, 0xabb71d85ef05380d})}, - {Sign::POS, -130, MType({0xa50c2fea60c5b3b2, 0xae474cc0397f0d4f})}, - {Sign::POS, -130, MType({0x58ea34980ad8b720, 0xb0db49ccc1823c8e})}, - {Sign::POS, -130, MType({0x4b5f71941be508a4, 0xb3732006d1fbbba5})}, - {Sign::POS, -130, MType({0x9e405fb8bcb1ff1e, 0xb60edafcdd99ad1d})}, - {Sign::POS, -130, MType({0x9e405fb8bcb1ff1e, 0xb60edafcdd99ad1d})}, - {Sign::POS, -130, MType({0xf7e2ab36f09e9014, 0xb8ae8671b3d7dd6c})}, - {Sign::POS, -130, MType({0xc669639640c305bb, 0xbb522e5dbf37f63b})}, - {Sign::POS, -130, MType({0xa3dc9e464e98764b, 0xbdf9def04cf980ff})}, - {Sign::POS, -130, MType({0xffd3256b59fa9c59, 0xc0a5a490dea95b5e})}, - {Sign::POS, -130, MType({0xb0a2d48672a051a5, 0xc3558be085e3f4bc})}, - {Sign::POS, -130, MType({0xb0a2d48672a051a5, 0xc3558be085e3f4bc})}, - {Sign::POS, -130, MType({0xacb2ca5d4ca1c10e, 0xc609a1bb4aa98f59})}, - {Sign::POS, -130, MType({0x43690b9e3cde0d02, 0xc8c1f3399ca7d33b})}, - {Sign::POS, -130, MType({0x18b1fd60383f7e5a, 0xcb7e8db1cfe04827})}, - {Sign::POS, -130, MType({0x248757e5f45af3d, 0xce3f7eb9a517c969})}, - {Sign::POS, -130, MType({0x7c4acd605be48bc1, 0xd104d427de7fbcc4})}, - {Sign::POS, -130, MType({0x7c4acd605be48bc1, 0xd104d427de7fbcc4})}, - {Sign::POS, -130, MType({0x58ff63629a92652d, 0xd3ce9c15e10ec927})}, - {Sign::POS, -130, MType({0x6b49be3bd8c89f10, 0xd69ce4e16303fcdd})}, - {Sign::POS, -130, MType({0xe6dd603a881e9060, 0xd96fbd2e2814c9cc})}, - {Sign::POS, -130, MType({0xe6dd603a881e9060, 0xd96fbd2e2814c9cc})}, - {Sign::POS, -130, MType({0x89e281c98c1d705c, 0xdc4733e7cbcbfc8c})}, - {Sign::POS, -130, MType({0xdc0db7cf0cce9f32, 0xdf2358439aa5dd12})}, - {Sign::POS, -130, MType({0xfdf1c5b846db9deb, 0xe20439c27a7c01b8})}, - {Sign::POS, -130, MType({0xfdf1c5b846db9deb, 0xe20439c27a7c01b8})}, - {Sign::POS, -130, MType({0x3dd7eab48869c402, 0xe4e9e832e2da0c05})}, - {Sign::POS, -130, MType({0x4e8fcc900b41daef, 0xe7d473b2e5db8f2a})}, - {Sign::POS, -130, MType({0x4e8fcc900b41daef, 0xe7d473b2e5db8f2a})}, - {Sign::POS, -130, MType({0x7593e1a9e917359a, 0xeac3ecb24a3ac7b4})}, - {Sign::POS, -130, MType({0xe7741396b49e1ce5, 0xedb863f4b73f982d})}, - {Sign::POS, -130, MType({0xe7741396b49e1ce5, 0xedb863f4b73f982d})}, - {Sign::POS, -130, MType({0xc8ba4f8f47b85a5c, 0xf0b1ea93f34675a7})}, - {Sign::POS, -130, MType({0x7007c1276821b705, 0xf3b09202359f9787})}, - {Sign::POS, -130, MType({0x7007c1276821b705, 0xf3b09202359f9787})}, - {Sign::POS, -130, MType({0x7ee19afe6db7e324, 0xf6b46c0c8c8fdea1})}, - {Sign::POS, -130, MType({0xedf54f37f6d40420, 0xf9bd8add584687f0})}, - {Sign::POS, -130, MType({0xedf54f37f6d40420, 0xf9bd8add584687f0})}, - {Sign::POS, -130, MType({0xefe52ccf03e7dee1, 0xfccc00fedba4e6fb})}, - {Sign::POS, -130, MType({0x1be2585c279c50a5, 0xffdfe15de3c01bac})}, - {Sign::POS, -130, MType({0x1be2585c279c50a5, 0xffdfe15de3c01bac})}, - {Sign::POS, -129, MType({0xe0b571f5c91b0446, 0x817c9fa643880404})}, - {Sign::POS, -129, MType({0x7178594bef2def59, 0x830c17427ea55eca})}, - {Sign::POS, -129, MType({0x7178594bef2def59, 0x830c17427ea55eca})}, - {Sign::POS, -129, MType({0x9a741bb171158d2a, 0x849e6196487c1d1c})}, - {Sign::POS, -129, MType({0x9a741bb171158d2a, 0x849e6196487c1d1c})}, - {Sign::POS, -129, MType({0x1a618264446cb495, 0x863388eb55ebd295})}, - {Sign::POS, -129, MType({0x71dbdbbec51d7657, 0x87cb97c3ff9eac18})}, - {Sign::POS, -129, MType({0x71dbdbbec51d7657, 0x87cb97c3ff9eac18})}, - {Sign::POS, -129, MType({0xabe0b522230f7d14, 0x896698dce4cff76c})}, - {Sign::POS, -129, MType({0xabe0b522230f7d14, 0x896698dce4cff76c})}, - {Sign::POS, -129, MType({0xd28e8adafea703b4, 0x8b04972e9d4d3011})}, - {Sign::POS, -129, MType({0x208422d83be34b27, 0x8ca59def7b5cefc5})}, - {Sign::POS, -129, MType({0x208422d83be34b27, 0x8ca59def7b5cefc5})}, - {Sign::POS, -129, MType({0xc385cf49402af0e4, 0x8e49b8955e3ffb8a})}, - {Sign::POS, -129, MType({0xc385cf49402af0e4, 0x8e49b8955e3ffb8a})}, - {Sign::POS, -129, MType({0xda982a614e12c6dd, 0x8ff0f2d7960a075c})}, - {Sign::POS, -129, MType({0xda982a614e12c6dd, 0x8ff0f2d7960a075c})}, - {Sign::POS, -129, MType({0x38401fc1c1b5c2c, 0x919b58b0d999bbc8})}, - {Sign::POS, -129, MType({0x38401fc1c1b5c2c, 0x919b58b0d999bbc8})}, - {Sign::POS, -129, MType({0xa9b55d3f16da746a, 0x9348f6614f821394})}, - {Sign::POS, -129, MType({0xa9b55d3f16da746a, 0x9348f6614f821394})}, - {Sign::POS, -129, MType({0x88d2d1473d4f7f5, 0x94f9d870aac256a5})}, - {Sign::POS, -129, MType({0x88d2d1473d4f7f5, 0x94f9d870aac256a5})}, - {Sign::POS, -129, MType({0x7c1e117dea19e9e6, 0x96ae0bb05c35d5bd})}, - {Sign::POS, -129, MType({0x7c1e117dea19e9e6, 0x96ae0bb05c35d5bd})}, - {Sign::POS, -129, MType({0x336db0630f536fb9, 0x98659d3dd9b12532})}, - {Sign::POS, 0, MType(0)}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -136, 0xdf3b5ebb'da7e186b'65af394f'e05eafd3_u128}, + {Sign::POS, -135, 0xe01d4057'2f029c16'a8fb8d87'b30163b5_u128}, + {Sign::POS, -134, 0xa8c1263a'c3f57eb3'6bb0170e'5bb5d630_u128}, + {Sign::POS, -134, 0xe1e841bb'c26204e5'fc2ea6eb'0ea1370e_u128}, + {Sign::POS, -133, 0x8dc2eb02'274d6ff4'dc8a199a'4bb63382_u128}, + {Sign::POS, -133, 0xaacde920'361dd054'86b57ea6'10c7db33_u128}, + {Sign::POS, -133, 0xc81618eb'15421bab'5f034a40'e6a2f09d_u128}, + {Sign::POS, -133, 0xe59c7e66'c5fedb4b'594a31b2'c5cc891c_u128}, + {Sign::POS, -133, 0xf477584f'97b654de'221efda5'8221904b_u128}, + {Sign::POS, -132, 0x892e8219'75106e09'68a0dc47'567691c9_u128}, + {Sign::POS, -132, 0x9841c66e'17dfe7da'10bc94f4'4d216b49_u128}, + {Sign::POS, -132, 0x9fd7be33'18306cc5'e303ea7e'23c9d6fb_u128}, + {Sign::POS, -132, 0xaf1cb35b'f494a8dd'ce697dba'a00d4c7d_u128}, + {Sign::POS, -132, 0xbe8380a2'fa7eba5a'9c216079'dcf0ea96_u128}, + {Sign::POS, -132, 0xc643c775'8283a271'75278940'eecfc3a9_u128}, + {Sign::POS, -132, 0xd5de75ec'27e4fe68'2d3467d2'53e2d1fc_u128}, + {Sign::POS, -132, 0xddb904e8'f1272a95'ead4055d'cdec7b22_u128}, + {Sign::POS, -132, 0xed88f6bb'355fa196'e1e0dda0'b3d375a4_u128}, + {Sign::POS, -132, 0xf57e8281'ade9d92d'38dc40c4'fe11e608_u128}, + {Sign::POS, -131, 0x82c2941b'b20bbe1f'3bcdcfe7'b23976cd_u128}, + {Sign::POS, -131, 0x86cb3663'2807cdcd'456350b0'bda452a6_u128}, + {Sign::POS, -131, 0x8eeaa306'458b760a'78185dcc'37fda01a_u128}, + {Sign::POS, -131, 0x93018395'12fc1168'307643ad'bbbde1b3_u128}, + {Sign::POS, -131, 0x9b3dd1d5'50c41443'6c449d40'9f883fe3_u128}, + {Sign::POS, -131, 0x9f6356aa'03c34389'8ea7b30c'8b4ad886_u128}, + {Sign::POS, -131, 0xa7bd56cd'de5d76a2'961c6e69'0d8879b4_u128}, + {Sign::POS, -131, 0xabf1ea3e'1d7bd7cf'042643ce'd81ec14a_u128}, + {Sign::POS, -131, 0xb02b9af7'4c2f879e'4742fb3d'0b5cdd19_u128}, + {Sign::POS, -131, 0xb8ae8671'b3d7dd6c'f7e2ab36'f09e9014_u128}, + {Sign::POS, -131, 0xbcf7dabd'87c01afc'8d3fc634'85e7ff13_u128}, + {Sign::POS, -131, 0xc1467f69'4d10a581'f3edc493'75fbc5a5_u128}, + {Sign::POS, -131, 0xc9f3ef07'e1f3fc5e'5fcd7d0c'e937375f_u128}, + {Sign::POS, -131, 0xce52d50b'94fa253a'58252dad'a9f06111_u128}, + {Sign::POS, -131, 0xd2b74192'fae43777'62f01e5f'f43708ab_u128}, + {Sign::POS, -131, 0xd72142a8'4ca85abd'481d9b31'31f52639_u128}, + {Sign::POS, -131, 0xdb90e68b'8abf14af'b305ced1'419fe924_u128}, + {Sign::POS, -131, 0xe48150cf'32888b9c'849266a8'5513dc6d_u128}, + {Sign::POS, -131, 0xe90234c6'5a15e533'080ecf32'66b4dcf4_u128}, + {Sign::POS, -131, 0xed88f6bb'355fa196'e1e0dda0'b3d375a4_u128}, + {Sign::POS, -131, 0xf215a60b'6557943f'ce3537a3'a211b25b_u128}, + {Sign::POS, -131, 0xf6a85251'3757dfbd'5dab6830'7fedefcd_u128}, + {Sign::POS, -131, 0xffdfe15d'e3c01bac'1be2585c'279c50a5_u128}, + {Sign::POS, -130, 0x8242724a'155219f3'18aa3021'71017dcb_u128}, + {Sign::POS, -130, 0x849812d0'ccbb5cbd'abc7e698'502d43c0_u128}, + {Sign::POS, -130, 0x86f0dab1'ab5822b6'c339089a'51663370_u128}, + {Sign::POS, -130, 0x894cd27d'9f182c63'26f70b34'ce5cf201_u128}, + {Sign::POS, -130, 0x8bac02e8'ac3e09ac'676f20a8'7ab433df_u128}, + {Sign::POS, -130, 0x8e0e74ca'ae062e24'6db4169c'c4b83bc3_u128}, + {Sign::POS, -130, 0x90743120'1c7f651a'cd3fdb2f'ad0d1fd6_u128}, + {Sign::POS, -130, 0x92dd410a'd7bfe103'49d03e16'3250d1d4_u128}, + {Sign::POS, -130, 0x9549add2'f8a3c7e0'9ec7dc02'd5e723b9_u128}, + {Sign::POS, -130, 0x97b980e7'a743d71c'34698d03'a5442573_u128}, + {Sign::POS, -130, 0x9a2cc3df'f7548556'0522904d'1e47f3de_u128}, + {Sign::POS, -130, 0x9ca3807b'ca9fe93f'791a7264'6c87b976_u128}, + {Sign::POS, -130, 0x9f1dc0a4'b9cea286'3826f190'd655d736_u128}, + {Sign::POS, -130, 0xa19b8e6f'03b60e45'544ab3e4'8199b299_u128}, + {Sign::POS, -130, 0xa41cf41a'83643487'be775fa8'2961114e_u128}, + {Sign::POS, -130, 0xa6a1fc13'ad241953'45798e50'19e6c082_u128}, + {Sign::POS, -130, 0xa92ab0f4'92b772bd'91fb1ed0'cdc4d1fb_u128}, + {Sign::POS, -130, 0xabb71d85'ef05380d'818b8b9c'bbd17b72_u128}, + {Sign::POS, -130, 0xae474cc0'397f0d4f'a50c2fea'60c5b3b2_u128}, + {Sign::POS, -130, 0xb0db49cc'c1823c8e'58ea3498'0ad8b720_u128}, + {Sign::POS, -130, 0xb3732006'd1fbbba5'4b5f7194'1be508a4_u128}, + {Sign::POS, -130, 0xb60edafc'dd99ad1d'9e405fb8'bcb1ff1e_u128}, + {Sign::POS, -130, 0xb60edafc'dd99ad1d'9e405fb8'bcb1ff1e_u128}, + {Sign::POS, -130, 0xb8ae8671'b3d7dd6c'f7e2ab36'f09e9014_u128}, + {Sign::POS, -130, 0xbb522e5d'bf37f63b'c6696396'40c305bb_u128}, + {Sign::POS, -130, 0xbdf9def0'4cf980ff'a3dc9e46'4e98764b_u128}, + {Sign::POS, -130, 0xc0a5a490'dea95b5e'ffd3256b'59fa9c59_u128}, + {Sign::POS, -130, 0xc3558be0'85e3f4bc'b0a2d486'72a051a5_u128}, + {Sign::POS, -130, 0xc3558be0'85e3f4bc'b0a2d486'72a051a5_u128}, + {Sign::POS, -130, 0xc609a1bb'4aa98f59'acb2ca5d'4ca1c10e_u128}, + {Sign::POS, -130, 0xc8c1f339'9ca7d33b'43690b9e'3cde0d02_u128}, + {Sign::POS, -130, 0xcb7e8db1'cfe04827'18b1fd60'383f7e5a_u128}, + {Sign::POS, -130, 0xce3f7eb9'a517c969'0248757e'5f45af3d_u128}, + {Sign::POS, -130, 0xd104d427'de7fbcc4'7c4acd60'5be48bc1_u128}, + {Sign::POS, -130, 0xd104d427'de7fbcc4'7c4acd60'5be48bc1_u128}, + {Sign::POS, -130, 0xd3ce9c15'e10ec927'58ff6362'9a92652d_u128}, + {Sign::POS, -130, 0xd69ce4e1'6303fcdd'6b49be3b'd8c89f10_u128}, + {Sign::POS, -130, 0xd96fbd2e'2814c9cc'e6dd603a'881e9060_u128}, + {Sign::POS, -130, 0xd96fbd2e'2814c9cc'e6dd603a'881e9060_u128}, + {Sign::POS, -130, 0xdc4733e7'cbcbfc8c'89e281c9'8c1d705c_u128}, + {Sign::POS, -130, 0xdf235843'9aa5dd12'dc0db7cf'0cce9f32_u128}, + {Sign::POS, -130, 0xe20439c2'7a7c01b8'fdf1c5b8'46db9deb_u128}, + {Sign::POS, -130, 0xe20439c2'7a7c01b8'fdf1c5b8'46db9deb_u128}, + {Sign::POS, -130, 0xe4e9e832'e2da0c05'3dd7eab4'8869c402_u128}, + {Sign::POS, -130, 0xe7d473b2'e5db8f2a'4e8fcc90'0b41daef_u128}, + {Sign::POS, -130, 0xe7d473b2'e5db8f2a'4e8fcc90'0b41daef_u128}, + {Sign::POS, -130, 0xeac3ecb2'4a3ac7b4'7593e1a9'e917359a_u128}, + {Sign::POS, -130, 0xedb863f4'b73f982d'e7741396'b49e1ce5_u128}, + {Sign::POS, -130, 0xedb863f4'b73f982d'e7741396'b49e1ce5_u128}, + {Sign::POS, -130, 0xf0b1ea93'f34675a7'c8ba4f8f'47b85a5c_u128}, + {Sign::POS, -130, 0xf3b09202'359f9787'7007c127'6821b705_u128}, + {Sign::POS, -130, 0xf3b09202'359f9787'7007c127'6821b705_u128}, + {Sign::POS, -130, 0xf6b46c0c'8c8fdea1'7ee19afe'6db7e324_u128}, + {Sign::POS, -130, 0xf9bd8add'584687f0'edf54f37'f6d40420_u128}, + {Sign::POS, -130, 0xf9bd8add'584687f0'edf54f37'f6d40420_u128}, + {Sign::POS, -130, 0xfccc00fe'dba4e6fb'efe52ccf'03e7dee1_u128}, + {Sign::POS, -130, 0xffdfe15d'e3c01bac'1be2585c'279c50a5_u128}, + {Sign::POS, -130, 0xffdfe15d'e3c01bac'1be2585c'279c50a5_u128}, + {Sign::POS, -129, 0x817c9fa6'43880404'e0b571f5'c91b0446_u128}, + {Sign::POS, -129, 0x830c1742'7ea55eca'7178594b'ef2def59_u128}, + {Sign::POS, -129, 0x830c1742'7ea55eca'7178594b'ef2def59_u128}, + {Sign::POS, -129, 0x849e6196'487c1d1c'9a741bb1'71158d2a_u128}, + {Sign::POS, -129, 0x849e6196'487c1d1c'9a741bb1'71158d2a_u128}, + {Sign::POS, -129, 0x863388eb'55ebd295'1a618264'446cb495_u128}, + {Sign::POS, -129, 0x87cb97c3'ff9eac18'71dbdbbe'c51d7657_u128}, + {Sign::POS, -129, 0x87cb97c3'ff9eac18'71dbdbbe'c51d7657_u128}, + {Sign::POS, -129, 0x896698dc'e4cff76c'abe0b522'230f7d14_u128}, + {Sign::POS, -129, 0x896698dc'e4cff76c'abe0b522'230f7d14_u128}, + {Sign::POS, -129, 0x8b04972e'9d4d3011'd28e8ada'fea703b4_u128}, + {Sign::POS, -129, 0x8ca59def'7b5cefc5'208422d8'3be34b27_u128}, + {Sign::POS, -129, 0x8ca59def'7b5cefc5'208422d8'3be34b27_u128}, + {Sign::POS, -129, 0x8e49b895'5e3ffb8a'c385cf49'402af0e4_u128}, + {Sign::POS, -129, 0x8e49b895'5e3ffb8a'c385cf49'402af0e4_u128}, + {Sign::POS, -129, 0x8ff0f2d7'960a075c'da982a61'4e12c6dd_u128}, + {Sign::POS, -129, 0x8ff0f2d7'960a075c'da982a61'4e12c6dd_u128}, + {Sign::POS, -129, 0x919b58b0'd999bbc8'038401fc'1c1b5c2c_u128}, + {Sign::POS, -129, 0x919b58b0'd999bbc8'038401fc'1c1b5c2c_u128}, + {Sign::POS, -129, 0x9348f661'4f821394'a9b55d3f'16da746a_u128}, + {Sign::POS, -129, 0x9348f661'4f821394'a9b55d3f'16da746a_u128}, + {Sign::POS, -129, 0x94f9d870'aac256a5'088d2d14'73d4f7f5_u128}, + {Sign::POS, -129, 0x94f9d870'aac256a5'088d2d14'73d4f7f5_u128}, + {Sign::POS, -129, 0x96ae0bb0'5c35d5bd'7c1e117d'ea19e9e6_u128}, + {Sign::POS, -129, 0x96ae0bb0'5c35d5bd'7c1e117d'ea19e9e6_u128}, + {Sign::POS, -129, 0x98659d3d'd9b12532'336db063'0f536fb9_u128}, + {Sign::POS, 0, 0_u128}, }, // -log10(r) for the second step, generated by SageMath with: // @@ -188,513 +192,513 @@ const LogRR LOG10_TABLE = { // r = 2^-16 * round( 2^16 / (1 + i*2^(-14)) ); // s, m, e = RealField(128)(r).log10().sign_mantissa_exponent(); // print("{Sign::POS," if s == -1 else "{Sign::NEG,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // format_hex(m), "},"); /* .step_2 = */ { - {Sign::NEG, -137, MType({0x7f1ce002fa34131b, 0xdeca729013cd7c31})}, - {Sign::NEG, -137, MType({0x639afa085dd8b4c7, 0xdb5475b44946d986})}, - {Sign::NEG, -137, MType({0x5512632fe9a58cb, 0xd7de6b0e10cab7d2})}, - {Sign::NEG, -137, MType({0xb5380a9953117d07, 0xd468529cfc6fb395})}, - {Sign::NEG, -137, MType({0x70af2d7d53be1f31, 0xd0f22c609e474741})}, - {Sign::NEG, -137, MType({0xccd499c49b74cc2, 0xcd7bf858885dcae2})}, - {Sign::NEG, -137, MType({0x5b51ddc3987ebfb8, 0xca05b6844cba73cf})}, - {Sign::NEG, -137, MType({0x49375f5189b3782b, 0xc68f66e37d5f545a})}, - {Sign::NEG, -137, MType({0xf6e57738865c712f, 0xc3190975ac495b7a})}, - {Sign::NEG, -137, MType({0xca02b10a8c712acd, 0xbfa29e3a6b70547e})}, - {Sign::NEG, -137, MType({0x78e5038210208151, 0xbc2c25314cc6e6b6})}, - {Sign::NEG, -137, MType({0xfa099ecd71ee0ea, 0xb8b59e59e23a9524})}, - {Sign::NEG, -137, MType({0xeeb445ccb8fb09ed, 0xb53f09b3bdb3be28})}, - {Sign::NEG, -137, MType({0xc352fff18a1c02fb, 0xb1c8673e71159b33})}, - {Sign::NEG, -137, MType({0x7949e03ecf9b390b, 0xae51b6f98e3e406e})}, - {Sign::NEG, -137, MType({0x2681f33f30aadedc, 0xaadaf8e4a7069c6c})}, - {Sign::NEG, -137, MType({0xf01d5496eea213b3, 0xa7642cff4d4277d6})}, - {Sign::NEG, -137, MType({0xe92ef555ff1de975, 0xa3ed534912c0751d})}, - {Sign::NEG, -137, MType({0xeb0c7519b3e7c1e0, 0xa0766bc1894a1022})}, - {Sign::NEG, -137, MType({0xf60d204ff0fe5296, 0x9c21b6e91e7f03a3})}, - {Sign::NEG, -137, MType({0x125c19a4f057c18b, 0x98aab0491050bea8})}, - {Sign::NEG, -137, MType({0x7e9383ce1bdf9575, 0x95339bd64cd953e7})}, - {Sign::NEG, -137, MType({0xbf274f4d8f770253, 0x91bc799065cc57d6})}, - {Sign::NEG, -137, MType({0x656bd9b758fe44ba, 0x8e454976ecd836ad})}, - {Sign::NEG, -137, MType({0xbfdd2c7f388fc014, 0x8ace0b8973a63413})}, - {Sign::NEG, -137, MType({0x83fbf6ed936c493a, 0x8756bfc78bda6ad0})}, - {Sign::NEG, -137, MType({0x71bfa9a18bec01cc, 0x83df6630c713cc76})}, - {Sign::NEG, -137, MType({0xf09d19f56dbfef72, 0x8067fec4b6ec2111})}, - {Sign::NEG, -138, MType({0x4c422713b1642228, 0xf9e11305d9f00dad})}, - {Sign::NEG, -138, MType({0xc3c7c5699b7a0a4, 0xf2f20cd5f58de39a})}, - {Sign::NEG, -138, MType({0xb8db7c69e3fa0797, 0xec02eaf8e3c656ff})}, - {Sign::NEG, -138, MType({0xa083eb05506ff7ed, 0xe513ad6dc7a3a553})}, - {Sign::NEG, -138, MType({0xc21595e745f1fa15, 0xde245433c425b5c5})}, - {Sign::NEG, -138, MType({0xb9d5bcdbfe719389, 0xd734df49fc42189b})}, - {Sign::NEG, -138, MType({0xa17a1e85e93461f4, 0xd0454eaf92e4068b})}, - {Sign::NEG, -138, MType({0xe3537584da333fda, 0xc955a263aaec6016})}, - {Sign::NEG, -138, MType({0x963177f24682c2, 0xc265da656731ace5})}, - {Sign::NEG, -138, MType({0x4ac037347bcfc50e, 0xbb75f6b3ea801b1e})}, - {Sign::NEG, -138, MType({0x901a736a4364cdfd, 0xb485f74e57997ec6})}, - {Sign::NEG, -138, MType({0xbb550acc3b9d7247, 0xad95dc33d1355117})}, - {Sign::NEG, -138, MType({0x663cf2b27e8f1ffb, 0xa6a5a5637a00afdc})}, - {Sign::NEG, -138, MType({0x5f89bd08feb39952, 0x9fb552dc749e5cca})}, - {Sign::NEG, -138, MType({0x23c2623c73f494db, 0x98c4e49de3a6bcdd})}, - {Sign::NEG, -138, MType({0x4937d3b5485af61e, 0x91d45aa6e9a7d7b0})}, - {Sign::NEG, -138, MType({0xdf14214e7a6d8111, 0x8ae3b4f6a92556d9})}, - {Sign::NEG, -138, MType({0xbf7cfc14999fb4bc, 0x83f2f38c44988544})}, - {Sign::NEG, -139, MType({0xa990c0ee569a8d51, 0xfa042ccdbce09d15})}, - {Sign::NEG, -139, MType({0xa38463e9d941e1c2, 0xec223b0b32227c9e})}, - {Sign::NEG, -139, MType({0xba0324530edaa03f, 0xde4011cf2daaff31})}, - {Sign::NEG, -139, MType({0x5e997a02dad7ace7, 0xd05db117f419b857})}, - {Sign::NEG, -139, MType({0x4a14676d4d0f817e, 0xc27b18e3c9f977c7})}, - {Sign::NEG, -139, MType({0x857c002ee7a1e473, 0xb4984930f3c0481c})}, - {Sign::NEG, -139, MType({0x5923b2eb72d8012a, 0xa6b541fdb5cf6d89})}, - {Sign::NEG, -139, MType({0x21cde8f85ca1f9fd, 0x98d203485473648b})}, - {Sign::NEG, -139, MType({0xbe08e08b1d212d4, 0x8aee8d0f13e3e09e})}, - {Sign::NEG, -140, MType({0x695023998e6bd7b0, 0xfa15bea0708795e1})}, - {Sign::NEG, -140, MType({0x634cea6750617a92, 0xde4df4140b42822f})}, - {Sign::NEG, -140, MType({0xfbd7e970aef9dbb8, 0xc285ba757feb2781})}, - {Sign::NEG, -140, MType({0x9aedc1c1ba7d0695, 0xa6bd11c1564a8ace})}, - {Sign::NEG, -140, MType({0x8d306ba207233c44, 0x8af3f9f41600120a})}, - {Sign::NEG, -141, MType({0x856a0a3a00fcf3c1, 0xde54e6148d030322})}, - {Sign::NEG, -141, MType({0xb3a2c1407cf6d38d, 0xa6c0fa00de35f314})}, - {Sign::NEG, -142, MType({0xd791cf6a70c3a504, 0xde585f4c5bbbcd3d})}, - {Sign::NEG, -143, MType({0x10a633f2c4a8ea22, 0xde5a1bf627b1f68f})}, - {Sign::NEG, 0, MType({0x0, 0x0})}, - {Sign::POS, -143, MType({0xed4a68e5e6e83ddf, 0xde5d95658a729eab})}, - {Sign::POS, -142, MType({0x3281f1872cdbee94, 0xde5f522b21e3e25a})}, - {Sign::POS, -141, MType({0xf1466edaa96e356e, 0xa6c8cb3b7e5bbbfd})}, - {Sign::POS, -141, MType({0x8a607fd695dfc3d9, 0xde62cbd21e895473})}, - {Sign::POS, -140, MType({0xc36b8713ceefe2de, 0x8afed57032bebc7c})}, - {Sign::POS, -140, MType({0x5c2e76c953e3e3e6, 0xa6ccb436a3c72fa4})}, - {Sign::POS, -140, MType({0x8e4950fa5c943bbf, 0xc29b023fdcb2dccf})}, - {Sign::POS, -140, MType({0x20fa8a73c585f634, 0xde69bf8f58005dfc})}, - {Sign::POS, -140, MType({0xaa106d9b0a9717a, 0xfa38ec28905810a3})}, - {Sign::POS, -139, MType({0x85d70e032de41aec, 0x8b04440780460c2a})}, - {Sign::POS, -139, MType({0xbeee21cbb82a9a78, 0x98ec49a311cc30ab})}, - {Sign::POS, -139, MType({0xabd7b0fdd8efe6f6, 0xa6d486e8ba5151a0})}, - {Sign::POS, -139, MType({0x3221c56e2c1aa912, 0xb4bcfbda377d31cc})}, - {Sign::POS, -139, MType({0x57b795a36d9c5f19, 0xc2a5a879470c7c37})}, - {Sign::POS, -139, MType({0x131ec142c053ac3b, 0xd08e8cc7a6d0c580})}, - {Sign::POS, -139, MType({0x35e3298f4bb2aa0a, 0xde77a8c714b08d28})}, - {Sign::POS, -139, MType({0x7133dafdfc44f160, 0xec60fc794ea73ee4})}, - {Sign::POS, -139, MType({0x74b37d23121c59d5, 0xfa4a87e012c533eb})}, - {Sign::POS, -138, MType({0x93bf5f4207da8a4c, 0x841a257e8f97da22})}, - {Sign::POS, -138, MType({0xfdb5990ec6057f4e, 0x8b0f22e919107c0c})}, - {Sign::POS, -138, MType({0x2d408a58b1b202fe, 0x92043c3084f41481})}, - {Sign::POS, -138, MType({0x1759381b61dfbf01, 0x98f97155b274b1ab})}, - {Sign::POS, -138, MType({0x41e90a054df4b9f1, 0x9feec25980cedbbe})}, - {Sign::POS, -138, MType({0xa1e66c6203725d50, 0xa6e42f3ccf49959d})}, - {Sign::POS, -138, MType({0x8693d36ab45bd7ce, 0xadd9b8007d365d83})}, - {Sign::POS, -138, MType({0x91e25bb40ad3f098, 0xb4cf5ca569f12da9})}, - {Sign::POS, -138, MType({0xbdf94392c4cc7f6c, 0xbbc51d2c74e07cf0})}, - {Sign::POS, -138, MType({0x6fe37973354a82f9, 0xc2baf9967d753f89})}, - {Sign::POS, -138, MType({0x97647b4267bfd801, 0xc9b0f1e4632ae79b})}, - {Sign::POS, -138, MType({0xdbf5c32a454f7bdf, 0xd0a70617058765ee})}, - {Sign::POS, -138, MType({0xd6edfe04c37ba916, 0xd79d362f441b2a92})}, - {Sign::POS, -138, MType({0x5ad3480ccfbe9890, 0xde93822dfe812587})}, - {Sign::POS, -138, MType({0xc7d9ac765be7e325, 0xe589ea14145ec764})}, - {Sign::POS, -138, MType({0x6d8f24b9a3ca011b, 0xec806de265640204})}, - {Sign::POS, -138, MType({0xf9b654807dcdd5b2, 0xf3770d99d14b4928})}, - {Sign::POS, -138, MType({0xf4513f4745663028, 0xfa6dc93b37d99326})}, - {Sign::POS, -137, MType({0xa46e9a72d80da75f, 0x80b25063bc6f2cc6})}, - {Sign::POS, -137, MType({0xee60992b51ffac4b, 0x842dca1fba19cce6})}, - {Sign::POS, -137, MType({0x1977fa1c786886b3, 0x87a951d204deeaf3})}, - {Sign::POS, -137, MType({0xe5f7c52cdf119d5, 0x8b24e77b0cb60a84})}, - {Sign::POS, -137, MType({0x3bf9d70da1021a10, 0x8ea08b1b419bf221})}, - {Sign::POS, -137, MType({0xfd0406b07523b8e6, 0x921c3cb31392ab94})}, - {Sign::POS, -137, MType({0x453ee32c020f2a8, 0x9597fc42f2a18441})}, - {Sign::POS, -137, MType({0xcfb3ec22066bf7f6, 0x9913c9cb4ed50d72})}, - {Sign::POS, -137, MType({0x215c025bd493ecf9, 0x9c8fa54c983f1cb8})}, - {Sign::POS, -137, MType({0x39c116b7ee3a83ec, 0x9f2c93192e68232b})}, - {Sign::POS, -137, MType({0xf41f4b3ede2782f0, 0xa2a8870f24ac5f66})}, - {Sign::POS, -137, MType({0x61196927723eb75c, 0xa62488ff3c735799})}, - {Sign::POS, -137, MType({0xe615e836cb1edab, 0xa9a098e9e5e2a432})}, - {Sign::POS, -137, MType({0x6981331c5fc71cfc, 0xad1cb6cf91252372})}, - {Sign::POS, -137, MType({0x5f6a4faa054f11fa, 0xb098e2b0ae6af9c2})}, - {Sign::POS, -137, MType({0x2a68bc681a74c28, 0xb4151c8dade99205})}, - {Sign::POS, -137, MType({0x382ba24d90566403, 0xb7916466ffdb9ded})}, - {Sign::POS, -137, MType({0x6ad1abe51dd22e00, 0xbb0dba3d14811652})}, - {Sign::POS, -137, MType({0x456d3f7f59b13960, 0xbe8a1e105c1f3b85})}, - {Sign::POS, -137, MType({0x738dd8b7d66e9058, 0xc2068fe1470095a4})}, - {Sign::POS, -137, MType({0x68e123fed7ff11c6, 0xc5830fb04574f4f1})}, - {Sign::POS, -137, MType({0x2f3bd09780c3aa11, 0xc8ff9d7dc7d17225})}, - {Sign::POS, -137, MType({0x3b48887f1ce36935, 0xcc7c394a3e706ec5})}, - {Sign::POS, -137, MType({0x47ddae655ecc4633, 0xcff8e31619b19578})}, - {Sign::POS, -137, MType({0x37fa81eef4819c88, 0xd3759ae1c9f9da5b})}, - {Sign::POS, -137, MType({0xff6c4a8d747c65ed, 0xd6f260adbfb37b55})}, - {Sign::POS, -137, MType({0x921c29493a33318c, 0xda6f347a6b4e0070})}, - {Sign::POS, -137, MType({0xda0631eb65e731d8, 0xddec16483d3e3c27})}, - {Sign::POS, -137, MType({0xb3da6c07d110babc, 0xe1690617a5fe4bc2})}, - {Sign::POS, -137, MType({0xf2485c7868b8835a, 0xe4e603e9160d97a6})}, - {Sign::POS, -137, MType({0x67f5b7ed01344055, 0xe8630fbcfdf0d3ae})}, - {Sign::POS, -137, MType({0xf820df445b1d0622, 0xebe02993ce31ff7b})}, - {Sign::POS, -137, MType({0xadefc674b7eca5cd, 0xef5d516df76066d0})}, - {Sign::POS, -137, MType({0xda6be6dc057d3235, 0xf2da874bea10a1e0})}, - {Sign::POS, -137, MType({0x392bdde152ab5ff5, 0xf657cb2e16dc95a9})}, - {Sign::POS, -137, MType({0x1bab58e2ec99cf73, 0xf9d51d14ee637444})}, - {Sign::POS, -137, MType({0x9b51ef7e3388d692, 0xfd527d00e149bd3e})}, - {Sign::POS, -136, MType({0xe914c6a7f3f22fa2, 0x8067f579301c9ef6})}, - {Sign::POS, -136, MType({0xd22862eb2081c94, 0x8226b374edf088e2})}, - {Sign::POS, -136, MType({0x29ebd0b476cd8fd8, 0x83e57873e27ad153})}, - {Sign::POS, -136, MType({0x98feddc2806d01ed, 0x85a44476461854a0})}, - {Sign::POS, -136, MType({0x471bfc261a401854, 0x8763177c512896af})}, - {Sign::POS, -136, MType({0xb6f89c19b4cd1acd, 0x88b23a5b61430a16})}, - {Sign::POS, -136, MType({0xb39aaf34163fb099, 0x8a7119a85909ebe9})}, - {Sign::POS, -136, MType({0x1665f0f821541c36, 0x8c2ffff99357e887})}, - {Sign::POS, -136, MType({0xa5051754e049c1cb, 0x8deeed4f489679a6})}, - {Sign::POS, -136, MType({0x8c5a9a1c57b2e986, 0x8fade1a9b131c159})}, - {Sign::POS, -136, MType({0x1d8448438a26a9ae, 0x916cdd0905988a35})}, - {Sign::POS, -136, MType({0x8e3a0913ecd2fd02, 0x932bdf6d7e3c477d})}, - {Sign::POS, -136, MType({0xbc881a45f47f1d36, 0x94eae8d753911550})}, - {Sign::POS, -136, MType({0xf5e51c05499b06d0, 0x96a9f946be0db8d0})}, - {Sign::POS, -136, MType({0xc1a43be81a243fde, 0x986910bbf62ba04f})}, - {Sign::POS, -136, MType({0xaec3cfebe971beb7, 0x9a282f373466e378})}, - {Sign::POS, -136, MType({0x2518b29328614989, 0x9be754b8b13e437c})}, - {Sign::POS, -136, MType({0x39d6b147cbe803a4, 0x9da68140a5332b3a})}, - {Sign::POS, -136, MType({0x87765e3004ae428d, 0x9f65b4cf48c9af6d})}, - {Sign::POS, -136, MType({0x8f896ab28245bac, 0xa124ef64d4888ed6})}, - {Sign::POS, -136, MType({0xf8880fb5ca630c87, 0xa2e4310180f93263})}, - {Sign::POS, -136, MType({0xb179397cf82e935c, 0xa4a379a586a7ad62})}, - {Sign::POS, -136, MType({0x95a8cb717197ad81, 0xa662c9511e22bda3})}, - {Sign::POS, -136, MType({0xf6394a34b7f9a4a4, 0xa82220047ffbcba8})}, - {Sign::POS, -136, MType({0xffafd8c2b57884e8, 0xa9e17dbfe4c6ead0})}, - {Sign::POS, -136, MType({0xa970a643b8a6ac2b, 0xaba0e283851ad980})}, - {Sign::POS, -136, MType({0xa89b49fb749d47e0, 0xad604e4f9991014e})}, - {Sign::POS, -136, MType({0x66475ed2ac983305, 0xaf1fc1245ac5772e})}, - {Sign::POS, -136, MType({0xb4fd6209364bb36f, 0xb06f5be1bf1918e7})}, - {Sign::POS, -136, MType({0x8b5ce79b0965962a, 0xb22edb0636da31d6})}, - {Sign::POS, -136, MType({0x6724232b07396427, 0xb3ee6133f7149769})}, - {Sign::POS, -136, MType({0x2f02b14dcad8a49c, 0xb5adee6b386e62ae})}, - {Sign::POS, -136, MType({0xbd6443a81f792e07, 0xb76d82ac339058db})}, - {Sign::POS, -136, MType({0xea1cd9625749939a, 0xb92d1df72125eb7c})}, - {Sign::POS, -136, MType({0x97775e3142198913, 0xbaecc04c39dd389b})}, - {Sign::POS, -136, MType({0xc2a701b809a2bc39, 0xbcac69abb6670aeb})}, - {Sign::POS, -136, MType({0x979b990f39e662e3, 0xbe6c1a15cf76d9f6})}, - {Sign::POS, -136, MType({0x88395c463ddd82b2, 0xc02bd18abdc2ca45})}, - {Sign::POS, -136, MType({0x66f451bd9ba5ed05, 0xc1eb900aba03ad8d})}, - {Sign::POS, -136, MType({0x84cfb9413f6437a6, 0xc3ab5595fcf502d9})}, - {Sign::POS, -136, MType({0xd2c1c8d32943ca42, 0xc56b222cbf54f6b6})}, - {Sign::POS, -136, MType({0x67c0d1fd95192e6, 0xc72af5cf39e4635f})}, - {Sign::POS, -136, MType({0xc298bf9edb6441f2, 0xc8ead07da566d0e3})}, - {Sign::POS, -136, MType({0xc22d646addde3910, 0xcaaab2383aa27559})}, - {Sign::POS, -136, MType({0x7c301e5c7d1ca40, 0xcc6a9aff32603504})}, - {Sign::POS, -136, MType({0xfb444464df02505, 0xce2a8ad2c56ba27f})}, - {Sign::POS, -136, MType({0x5f1df3591ae898f, 0xcfea81b32c92feec})}, - {Sign::POS, -136, MType({0xb43caf8e7b891066, 0xd13a7f7c07506f7d})}, - {Sign::POS, -136, MType({0x597fb13f0d0fdf19, 0xd2fa82b36a610c4f})}, - {Sign::POS, -136, MType({0x3c21f1c60a60b0d6, 0xd4ba8cf83dd2a06b})}, - {Sign::POS, -136, MType({0x2b7455909a0428a4, 0xd67a9e4aba7d7ce5})}, - {Sign::POS, -136, MType({0x1438b60573d2da10, 0xd83ab6ab193ca223})}, - {Sign::POS, -136, MType({0x49f86400c5ab2b11, 0xd9fad61992edc008})}, - {Sign::POS, -136, MType({0xd3c313d148a23c35, 0xdbbafc9660713620})}, - {Sign::POS, -136, MType({0xbc56852355e0f0d5, 0xdd7b2a21baaa13cc})}, + {Sign::NEG, -137, 0xdeca7290'13cd7c31'7f1ce002'fa34131b_u128}, + {Sign::NEG, -137, 0xdb5475b4'4946d986'639afa08'5dd8b4c7_u128}, + {Sign::NEG, -137, 0xd7de6b0e'10cab7d2'05512632'fe9a58cb_u128}, + {Sign::NEG, -137, 0xd468529c'fc6fb395'b5380a99'53117d07_u128}, + {Sign::NEG, -137, 0xd0f22c60'9e474741'70af2d7d'53be1f31_u128}, + {Sign::NEG, -137, 0xcd7bf858'885dcae2'0ccd499c'49b74cc2_u128}, + {Sign::NEG, -137, 0xca05b684'4cba73cf'5b51ddc3'987ebfb8_u128}, + {Sign::NEG, -137, 0xc68f66e3'7d5f545a'49375f51'89b3782b_u128}, + {Sign::NEG, -137, 0xc3190975'ac495b7a'f6e57738'865c712f_u128}, + {Sign::NEG, -137, 0xbfa29e3a'6b70547e'ca02b10a'8c712acd_u128}, + {Sign::NEG, -137, 0xbc2c2531'4cc6e6b6'78e50382'10208151_u128}, + {Sign::NEG, -137, 0xb8b59e59'e23a9524'0fa099ec'd71ee0ea_u128}, + {Sign::NEG, -137, 0xb53f09b3'bdb3be28'eeb445cc'b8fb09ed_u128}, + {Sign::NEG, -137, 0xb1c8673e'71159b33'c352fff1'8a1c02fb_u128}, + {Sign::NEG, -137, 0xae51b6f9'8e3e406e'7949e03e'cf9b390b_u128}, + {Sign::NEG, -137, 0xaadaf8e4'a7069c6c'2681f33f'30aadedc_u128}, + {Sign::NEG, -137, 0xa7642cff'4d4277d6'f01d5496'eea213b3_u128}, + {Sign::NEG, -137, 0xa3ed5349'12c0751d'e92ef555'ff1de975_u128}, + {Sign::NEG, -137, 0xa0766bc1'894a1022'eb0c7519'b3e7c1e0_u128}, + {Sign::NEG, -137, 0x9c21b6e9'1e7f03a3'f60d204f'f0fe5296_u128}, + {Sign::NEG, -137, 0x98aab049'1050bea8'125c19a4'f057c18b_u128}, + {Sign::NEG, -137, 0x95339bd6'4cd953e7'7e9383ce'1bdf9575_u128}, + {Sign::NEG, -137, 0x91bc7990'65cc57d6'bf274f4d'8f770253_u128}, + {Sign::NEG, -137, 0x8e454976'ecd836ad'656bd9b7'58fe44ba_u128}, + {Sign::NEG, -137, 0x8ace0b89'73a63413'bfdd2c7f'388fc014_u128}, + {Sign::NEG, -137, 0x8756bfc7'8bda6ad0'83fbf6ed'936c493a_u128}, + {Sign::NEG, -137, 0x83df6630'c713cc76'71bfa9a1'8bec01cc_u128}, + {Sign::NEG, -137, 0x8067fec4'b6ec2111'f09d19f5'6dbfef72_u128}, + {Sign::NEG, -138, 0xf9e11305'd9f00dad'4c422713'b1642228_u128}, + {Sign::NEG, -138, 0xf2f20cd5'f58de39a'0c3c7c56'99b7a0a4_u128}, + {Sign::NEG, -138, 0xec02eaf8'e3c656ff'b8db7c69'e3fa0797_u128}, + {Sign::NEG, -138, 0xe513ad6d'c7a3a553'a083eb05'506ff7ed_u128}, + {Sign::NEG, -138, 0xde245433'c425b5c5'c21595e7'45f1fa15_u128}, + {Sign::NEG, -138, 0xd734df49'fc42189b'b9d5bcdb'fe719389_u128}, + {Sign::NEG, -138, 0xd0454eaf'92e4068b'a17a1e85'e93461f4_u128}, + {Sign::NEG, -138, 0xc955a263'aaec6016'e3537584'da333fda_u128}, + {Sign::NEG, -138, 0xc265da65'6731ace5'00963177'f24682c2_u128}, + {Sign::NEG, -138, 0xbb75f6b3'ea801b1e'4ac03734'7bcfc50e_u128}, + {Sign::NEG, -138, 0xb485f74e'57997ec6'901a736a'4364cdfd_u128}, + {Sign::NEG, -138, 0xad95dc33'd1355117'bb550acc'3b9d7247_u128}, + {Sign::NEG, -138, 0xa6a5a563'7a00afdc'663cf2b2'7e8f1ffb_u128}, + {Sign::NEG, -138, 0x9fb552dc'749e5cca'5f89bd08'feb39952_u128}, + {Sign::NEG, -138, 0x98c4e49d'e3a6bcdd'23c2623c'73f494db_u128}, + {Sign::NEG, -138, 0x91d45aa6'e9a7d7b0'4937d3b5'485af61e_u128}, + {Sign::NEG, -138, 0x8ae3b4f6'a92556d9'df14214e'7a6d8111_u128}, + {Sign::NEG, -138, 0x83f2f38c'44988544'bf7cfc14'999fb4bc_u128}, + {Sign::NEG, -139, 0xfa042ccd'bce09d15'a990c0ee'569a8d51_u128}, + {Sign::NEG, -139, 0xec223b0b'32227c9e'a38463e9'd941e1c2_u128}, + {Sign::NEG, -139, 0xde4011cf'2daaff31'ba032453'0edaa03f_u128}, + {Sign::NEG, -139, 0xd05db117'f419b857'5e997a02'dad7ace7_u128}, + {Sign::NEG, -139, 0xc27b18e3'c9f977c7'4a14676d'4d0f817e_u128}, + {Sign::NEG, -139, 0xb4984930'f3c0481c'857c002e'e7a1e473_u128}, + {Sign::NEG, -139, 0xa6b541fd'b5cf6d89'5923b2eb'72d8012a_u128}, + {Sign::NEG, -139, 0x98d20348'5473648b'21cde8f8'5ca1f9fd_u128}, + {Sign::NEG, -139, 0x8aee8d0f'13e3e09e'0be08e08'b1d212d4_u128}, + {Sign::NEG, -140, 0xfa15bea0'708795e1'69502399'8e6bd7b0_u128}, + {Sign::NEG, -140, 0xde4df414'0b42822f'634cea67'50617a92_u128}, + {Sign::NEG, -140, 0xc285ba75'7feb2781'fbd7e970'aef9dbb8_u128}, + {Sign::NEG, -140, 0xa6bd11c1'564a8ace'9aedc1c1'ba7d0695_u128}, + {Sign::NEG, -140, 0x8af3f9f4'1600120a'8d306ba2'07233c44_u128}, + {Sign::NEG, -141, 0xde54e614'8d030322'856a0a3a'00fcf3c1_u128}, + {Sign::NEG, -141, 0xa6c0fa00'de35f314'b3a2c140'7cf6d38d_u128}, + {Sign::NEG, -142, 0xde585f4c'5bbbcd3d'd791cf6a'70c3a504_u128}, + {Sign::NEG, -143, 0xde5a1bf6'27b1f68f'10a633f2'c4a8ea22_u128}, + {Sign::NEG, 0, 0_u128}, + {Sign::POS, -143, 0xde5d9565'8a729eab'ed4a68e5'e6e83ddf_u128}, + {Sign::POS, -142, 0xde5f522b'21e3e25a'3281f187'2cdbee94_u128}, + {Sign::POS, -141, 0xa6c8cb3b'7e5bbbfd'f1466eda'a96e356e_u128}, + {Sign::POS, -141, 0xde62cbd2'1e895473'8a607fd6'95dfc3d9_u128}, + {Sign::POS, -140, 0x8afed570'32bebc7c'c36b8713'ceefe2de_u128}, + {Sign::POS, -140, 0xa6ccb436'a3c72fa4'5c2e76c9'53e3e3e6_u128}, + {Sign::POS, -140, 0xc29b023f'dcb2dccf'8e4950fa'5c943bbf_u128}, + {Sign::POS, -140, 0xde69bf8f'58005dfc'20fa8a73'c585f634_u128}, + {Sign::POS, -140, 0xfa38ec28'905810a3'0aa106d9'b0a9717a_u128}, + {Sign::POS, -139, 0x8b044407'80460c2a'85d70e03'2de41aec_u128}, + {Sign::POS, -139, 0x98ec49a3'11cc30ab'beee21cb'b82a9a78_u128}, + {Sign::POS, -139, 0xa6d486e8'ba5151a0'abd7b0fd'd8efe6f6_u128}, + {Sign::POS, -139, 0xb4bcfbda'377d31cc'3221c56e'2c1aa912_u128}, + {Sign::POS, -139, 0xc2a5a879'470c7c37'57b795a3'6d9c5f19_u128}, + {Sign::POS, -139, 0xd08e8cc7'a6d0c580'131ec142'c053ac3b_u128}, + {Sign::POS, -139, 0xde77a8c7'14b08d28'35e3298f'4bb2aa0a_u128}, + {Sign::POS, -139, 0xec60fc79'4ea73ee4'7133dafd'fc44f160_u128}, + {Sign::POS, -139, 0xfa4a87e0'12c533eb'74b37d23'121c59d5_u128}, + {Sign::POS, -138, 0x841a257e'8f97da22'93bf5f42'07da8a4c_u128}, + {Sign::POS, -138, 0x8b0f22e9'19107c0c'fdb5990e'c6057f4e_u128}, + {Sign::POS, -138, 0x92043c30'84f41481'2d408a58'b1b202fe_u128}, + {Sign::POS, -138, 0x98f97155'b274b1ab'1759381b'61dfbf01_u128}, + {Sign::POS, -138, 0x9feec259'80cedbbe'41e90a05'4df4b9f1_u128}, + {Sign::POS, -138, 0xa6e42f3c'cf49959d'a1e66c62'03725d50_u128}, + {Sign::POS, -138, 0xadd9b800'7d365d83'8693d36a'b45bd7ce_u128}, + {Sign::POS, -138, 0xb4cf5ca5'69f12da9'91e25bb4'0ad3f098_u128}, + {Sign::POS, -138, 0xbbc51d2c'74e07cf0'bdf94392'c4cc7f6c_u128}, + {Sign::POS, -138, 0xc2baf996'7d753f89'6fe37973'354a82f9_u128}, + {Sign::POS, -138, 0xc9b0f1e4'632ae79b'97647b42'67bfd801_u128}, + {Sign::POS, -138, 0xd0a70617'058765ee'dbf5c32a'454f7bdf_u128}, + {Sign::POS, -138, 0xd79d362f'441b2a92'd6edfe04'c37ba916_u128}, + {Sign::POS, -138, 0xde93822d'fe812587'5ad3480c'cfbe9890_u128}, + {Sign::POS, -138, 0xe589ea14'145ec764'c7d9ac76'5be7e325_u128}, + {Sign::POS, -138, 0xec806de2'65640204'6d8f24b9'a3ca011b_u128}, + {Sign::POS, -138, 0xf3770d99'd14b4928'f9b65480'7dcdd5b2_u128}, + {Sign::POS, -138, 0xfa6dc93b'37d99326'f4513f47'45663028_u128}, + {Sign::POS, -137, 0x80b25063'bc6f2cc6'a46e9a72'd80da75f_u128}, + {Sign::POS, -137, 0x842dca1f'ba19cce6'ee60992b'51ffac4b_u128}, + {Sign::POS, -137, 0x87a951d2'04deeaf3'1977fa1c'786886b3_u128}, + {Sign::POS, -137, 0x8b24e77b'0cb60a84'0e5f7c52'cdf119d5_u128}, + {Sign::POS, -137, 0x8ea08b1b'419bf221'3bf9d70d'a1021a10_u128}, + {Sign::POS, -137, 0x921c3cb3'1392ab94'fd0406b0'7523b8e6_u128}, + {Sign::POS, -137, 0x9597fc42'f2a18441'0453ee32'c020f2a8_u128}, + {Sign::POS, -137, 0x9913c9cb'4ed50d72'cfb3ec22'066bf7f6_u128}, + {Sign::POS, -137, 0x9c8fa54c'983f1cb8'215c025b'd493ecf9_u128}, + {Sign::POS, -137, 0x9f2c9319'2e68232b'39c116b7'ee3a83ec_u128}, + {Sign::POS, -137, 0xa2a8870f'24ac5f66'f41f4b3e'de2782f0_u128}, + {Sign::POS, -137, 0xa62488ff'3c735799'61196927'723eb75c_u128}, + {Sign::POS, -137, 0xa9a098e9'e5e2a432'0e615e83'6cb1edab_u128}, + {Sign::POS, -137, 0xad1cb6cf'91252372'6981331c'5fc71cfc_u128}, + {Sign::POS, -137, 0xb098e2b0'ae6af9c2'5f6a4faa'054f11fa_u128}, + {Sign::POS, -137, 0xb4151c8d'ade99205'02a68bc6'81a74c28_u128}, + {Sign::POS, -137, 0xb7916466'ffdb9ded'382ba24d'90566403_u128}, + {Sign::POS, -137, 0xbb0dba3d'14811652'6ad1abe5'1dd22e00_u128}, + {Sign::POS, -137, 0xbe8a1e10'5c1f3b85'456d3f7f'59b13960_u128}, + {Sign::POS, -137, 0xc2068fe1'470095a4'738dd8b7'd66e9058_u128}, + {Sign::POS, -137, 0xc5830fb0'4574f4f1'68e123fe'd7ff11c6_u128}, + {Sign::POS, -137, 0xc8ff9d7d'c7d17225'2f3bd097'80c3aa11_u128}, + {Sign::POS, -137, 0xcc7c394a'3e706ec5'3b48887f'1ce36935_u128}, + {Sign::POS, -137, 0xcff8e316'19b19578'47ddae65'5ecc4633_u128}, + {Sign::POS, -137, 0xd3759ae1'c9f9da5b'37fa81ee'f4819c88_u128}, + {Sign::POS, -137, 0xd6f260ad'bfb37b55'ff6c4a8d'747c65ed_u128}, + {Sign::POS, -137, 0xda6f347a'6b4e0070'921c2949'3a33318c_u128}, + {Sign::POS, -137, 0xddec1648'3d3e3c27'da0631eb'65e731d8_u128}, + {Sign::POS, -137, 0xe1690617'a5fe4bc2'b3da6c07'd110babc_u128}, + {Sign::POS, -137, 0xe4e603e9'160d97a6'f2485c78'68b8835a_u128}, + {Sign::POS, -137, 0xe8630fbc'fdf0d3ae'67f5b7ed'01344055_u128}, + {Sign::POS, -137, 0xebe02993'ce31ff7b'f820df44'5b1d0622_u128}, + {Sign::POS, -137, 0xef5d516d'f76066d0'adefc674'b7eca5cd_u128}, + {Sign::POS, -137, 0xf2da874b'ea10a1e0'da6be6dc'057d3235_u128}, + {Sign::POS, -137, 0xf657cb2e'16dc95a9'392bdde1'52ab5ff5_u128}, + {Sign::POS, -137, 0xf9d51d14'ee637444'1bab58e2'ec99cf73_u128}, + {Sign::POS, -137, 0xfd527d00'e149bd3e'9b51ef7e'3388d692_u128}, + {Sign::POS, -136, 0x8067f579'301c9ef6'e914c6a7'f3f22fa2_u128}, + {Sign::POS, -136, 0x8226b374'edf088e2'0d22862e'b2081c94_u128}, + {Sign::POS, -136, 0x83e57873'e27ad153'29ebd0b4'76cd8fd8_u128}, + {Sign::POS, -136, 0x85a44476'461854a0'98feddc2'806d01ed_u128}, + {Sign::POS, -136, 0x8763177c'512896af'471bfc26'1a401854_u128}, + {Sign::POS, -136, 0x88b23a5b'61430a16'b6f89c19'b4cd1acd_u128}, + {Sign::POS, -136, 0x8a7119a8'5909ebe9'b39aaf34'163fb099_u128}, + {Sign::POS, -136, 0x8c2ffff9'9357e887'1665f0f8'21541c36_u128}, + {Sign::POS, -136, 0x8deeed4f'489679a6'a5051754'e049c1cb_u128}, + {Sign::POS, -136, 0x8fade1a9'b131c159'8c5a9a1c'57b2e986_u128}, + {Sign::POS, -136, 0x916cdd09'05988a35'1d844843'8a26a9ae_u128}, + {Sign::POS, -136, 0x932bdf6d'7e3c477d'8e3a0913'ecd2fd02_u128}, + {Sign::POS, -136, 0x94eae8d7'53911550'bc881a45'f47f1d36_u128}, + {Sign::POS, -136, 0x96a9f946'be0db8d0'f5e51c05'499b06d0_u128}, + {Sign::POS, -136, 0x986910bb'f62ba04f'c1a43be8'1a243fde_u128}, + {Sign::POS, -136, 0x9a282f37'3466e378'aec3cfeb'e971beb7_u128}, + {Sign::POS, -136, 0x9be754b8'b13e437c'2518b293'28614989_u128}, + {Sign::POS, -136, 0x9da68140'a5332b3a'39d6b147'cbe803a4_u128}, + {Sign::POS, -136, 0x9f65b4cf'48c9af6d'87765e30'04ae428d_u128}, + {Sign::POS, -136, 0xa124ef64'd4888ed6'08f896ab'28245bac_u128}, + {Sign::POS, -136, 0xa2e43101'80f93263'f8880fb5'ca630c87_u128}, + {Sign::POS, -136, 0xa4a379a5'86a7ad62'b179397c'f82e935c_u128}, + {Sign::POS, -136, 0xa662c951'1e22bda3'95a8cb71'7197ad81_u128}, + {Sign::POS, -136, 0xa8222004'7ffbcba8'f6394a34'b7f9a4a4_u128}, + {Sign::POS, -136, 0xa9e17dbf'e4c6ead0'ffafd8c2'b57884e8_u128}, + {Sign::POS, -136, 0xaba0e283'851ad980'a970a643'b8a6ac2b_u128}, + {Sign::POS, -136, 0xad604e4f'9991014e'a89b49fb'749d47e0_u128}, + {Sign::POS, -136, 0xaf1fc124'5ac5772e'66475ed2'ac983305_u128}, + {Sign::POS, -136, 0xb06f5be1'bf1918e7'b4fd6209'364bb36f_u128}, + {Sign::POS, -136, 0xb22edb06'36da31d6'8b5ce79b'0965962a_u128}, + {Sign::POS, -136, 0xb3ee6133'f7149769'6724232b'07396427_u128}, + {Sign::POS, -136, 0xb5adee6b'386e62ae'2f02b14d'cad8a49c_u128}, + {Sign::POS, -136, 0xb76d82ac'339058db'bd6443a8'1f792e07_u128}, + {Sign::POS, -136, 0xb92d1df7'2125eb7c'ea1cd962'5749939a_u128}, + {Sign::POS, -136, 0xbaecc04c'39dd389b'97775e31'42198913_u128}, + {Sign::POS, -136, 0xbcac69ab'b6670aeb'c2a701b8'09a2bc39_u128}, + {Sign::POS, -136, 0xbe6c1a15'cf76d9f6'979b990f'39e662e3_u128}, + {Sign::POS, -136, 0xc02bd18a'bdc2ca45'88395c46'3ddd82b2_u128}, + {Sign::POS, -136, 0xc1eb900a'ba03ad8d'66f451bd'9ba5ed05_u128}, + {Sign::POS, -136, 0xc3ab5595'fcf502d9'84cfb941'3f6437a6_u128}, + {Sign::POS, -136, 0xc56b222c'bf54f6b6'd2c1c8d3'2943ca42_u128}, + {Sign::POS, -136, 0xc72af5cf'39e4635f'067c0d1f'd95192e6_u128}, + {Sign::POS, -136, 0xc8ead07d'a566d0e3'c298bf9e'db6441f2_u128}, + {Sign::POS, -136, 0xcaaab238'3aa27559'c22d646a'ddde3910_u128}, + {Sign::POS, -136, 0xcc6a9aff'32603504'07c301e5'c7d1ca40_u128}, + {Sign::POS, -136, 0xce2a8ad2'c56ba27f'0fb44446'4df02505_u128}, + {Sign::POS, -136, 0xcfea81b3'2c92feec'05f1df35'91ae898f_u128}, + {Sign::POS, -136, 0xd13a7f7c'07506f7d'b43caf8e'7b891066_u128}, + {Sign::POS, -136, 0xd2fa82b3'6a610c4f'597fb13f'0d0fdf19_u128}, + {Sign::POS, -136, 0xd4ba8cf8'3dd2a06b'3c21f1c6'0a60b0d6_u128}, + {Sign::POS, -136, 0xd67a9e4a'ba7d7ce5'2b745590'9a0428a4_u128}, + {Sign::POS, -136, 0xd83ab6ab'193ca223'1438b605'73d2da10_u128}, + {Sign::POS, -136, 0xd9fad619'92edc008'49f86400'c5ab2b11_u128}, + {Sign::POS, -136, 0xdbbafc96'60713620'd3c313d1'48a23c35_u128}, + {Sign::POS, -136, 0xdd7b2a21'baaa13cc'bc568523'55e0f0d5_u128}, }, // -log10(r) for the third step, generated by SageMath with: // // for i in range(-80, 81): // r = 2^-21 * round( 2^21 / (1 + i*2^(-21)) ); // s, m, e = RealField(128)(r).log10().sign_mantissa_exponent(); - // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, "," , + // format_hex(m), "},"); /* .step_3 = */ { - {Sign::NEG, -143, MType({0x54d7e49898ca0093, 0x8af8b9b322ba8c7d})}, - {Sign::NEG, -143, MType({0xc321bbf16665f29c, 0x893c06529deffc3d})}, - {Sign::NEG, -143, MType({0x8246df7140c3e4ae, 0x877f52e433ac7ec4})}, - {Sign::NEG, -143, MType({0x1deaa9e85780e4c1, 0x85c29f67e3ef35bc})}, - {Sign::NEG, -143, MType({0xcd8a5121a9162d0, 0x8405ebddaeb742cf})}, - {Sign::NEG, -143, MType({0xb10486fa4644308d, 0x824938459403c7a7})}, - {Sign::NEG, -143, MType({0x578a2f61eedd4be8, 0x808c849f93d3e5f0})}, - {Sign::NEG, -144, MType({0x715b4a491790e8a7, 0xfd9fa1d75c4d7ea6})}, - {Sign::NEG, -144, MType({0xefb6273a04c71573, 0xfa263a53c5f6eaf4})}, - {Sign::NEG, -144, MType({0x474d901560c17807, 0xf6acd2b464a25420})}, - {Sign::NEG, -144, MType({0x6b9a5deceb80ec57, 0xf3336af9384dfd7c})}, - {Sign::NEG, -144, MType({0x2665a32f7cc64f79, 0xefba032240f82a5d})}, - {Sign::NEG, -144, MType({0x17c8a67316659363, 0xec409b2f7e9f1e16})}, - {Sign::NEG, -144, MType({0xb62cdd3ef5c8673d, 0xe8c73320f1411bfa})}, - {Sign::NEG, -144, MType({0x4e4be6d5a4a07422, 0xe54dcaf698dc675e})}, - {Sign::NEG, -144, MType({0x32f86ff08c92e22, 0xe1d462b0756f4394})}, - {Sign::NEG, -144, MType({0xce31a0d27359396f, 0xde5afa4e86f7f3ee})}, - {Sign::NEG, -144, MType({0x7efc3180aee36373, 0xdae191d0cd74bbc1})}, - {Sign::NEG, -144, MType({0xbb894b1e0ce72fc4, 0xd768293748e3de5e})}, - {Sign::NEG, -144, MType({0x230f6c7270f8be, 0xd3eec081f9439f19})}, - {Sign::NEG, -144, MType({0x9f63aaa563e9a399, 0xd07557b0de924142})}, - {Sign::NEG, -144, MType({0xc2354e441015e7eb, 0xccfbeec3f8ce082d})}, - {Sign::NEG, -144, MType({0x67d22bcf5a452a4c, 0xc98285bb47f5372c})}, - {Sign::NEG, -144, MType({0x65c46fa3e3afea18, 0xc6091c96cc061190})}, - {Sign::NEG, -144, MType({0x67e63bbe1405c20d, 0xc28fb35684fedaab})}, - {Sign::NEG, -144, MType({0xf061a284212afbad, 0xbf1649fa72ddd5ce})}, - {Sign::NEG, -144, MType({0x57b0a1901625b539, 0xbb9ce08295a1464c})}, - {Sign::NEG, -144, MType({0xcc9d1c79d93a9a1e, 0xb82376eeed476f74})}, - {Sign::NEG, -144, MType({0x5440d7a131392da8, 0xb4aa0d3f79ce9499})}, - {Sign::NEG, -144, MType({0xca0572f7c9f7a7de, 0xb130a3743b34f90a})}, - {Sign::NEG, -144, MType({0xdfa464cb37fe6455, 0xadb7398d3178e019})}, - {Sign::NEG, -144, MType({0x1d26f48efb62e2e0, 0xaa3dcf8a5c988d17})}, - {Sign::NEG, -144, MType({0xe0e635a681d259e2, 0xa6c4656bbc924352})}, - {Sign::NEG, -144, MType({0x5f8b022f27cbda35, 0xa34afb315164461d})}, - {Sign::NEG, -144, MType({0xa40df5ca390a0465, 0x9fd190db1b0cd8c6})}, - {Sign::NEG, -144, MType({0x8fb76866f01c4f2d, 0x9c582669198a3e9e})}, - {Sign::NEG, -144, MType({0xda1f690c752fdeff, 0x98debbdb4cdabaf4})}, - {Sign::NEG, -144, MType({0x112db8a3dc07ee78, 0x95655131b4fc9119})}, - {Sign::NEG, -144, MType({0x9919c4c22125c79e, 0x91ebe66c51ee045a})}, - {Sign::NEG, -144, MType({0xac6aa27226204db3, 0x8e727b8b23ad5808})}, - {Sign::NEG, -144, MType({0x5bf708fead2b1780, 0x8af9108e2a38cf72})}, - {Sign::NEG, -144, MType({0x8ee54cbc53cd19ed, 0x877fa575658eade6})}, - {Sign::NEG, -144, MType({0x2ab59d38cc6e2c5, 0x84063a40d5ad36b4})}, - {Sign::NEG, -144, MType({0x4b0eaf0a99286378, 0x808ccef07a92ad29})}, - {Sign::NEG, -145, MType({0xa448b11f012c975c, 0xfa26c708a87aa929})}, - {Sign::NEG, -145, MType({0xb0a1d584117de73b, 0xf333eff8c556e089})}, - {Sign::NEG, -145, MType({0xe890f9fb57fdabb6, 0xec4118b14bb6870e})}, - {Sign::NEG, -145, MType({0x261d48c71e693130, 0xe54e41323b962355})}, - {Sign::NEG, -145, MType({0xefecdd48ed894c32, 0xde5b697b94f23bf7})}, - {Sign::NEG, -145, MType({0x7944b9957598a88a, 0xd768918d57c75792})}, - {Sign::NEG, -145, MType({0xa208bc0875093645, 0xd075b9678411fcbf})}, - {Sign::NEG, -145, MType({0xf6bb94d89da8b432, 0xc982e10a19ceb219})}, - {Sign::NEG, -145, MType({0xb07ebbab782457b0, 0xc290087518f9fe3b})}, - {Sign::NEG, -145, MType({0xb512652945eb9165, 0xbb9d2fa8819067be})}, - {Sign::NEG, -145, MType({0x96d57890e171eea5, 0xb4aa56a4538e753c})}, - {Sign::NEG, -145, MType({0x94c5854b9cd01726, 0xadb77d688ef0ad4e})}, - {Sign::NEG, -145, MType({0x9a7eb8811ec3e6bb, 0xa6c4a3f533b3968d})}, - {Sign::NEG, -145, MType({0x403bd2ab3e0fa2d7, 0x9fd1ca4a41d3b792})}, - {Sign::NEG, -145, MType({0xcad61d29db384b6b, 0x98def067b94d96f4})}, - {Sign::NEG, -145, MType({0x2bc55fd6b8a306ec, 0x91ec164d9a1dbb4d})}, - {Sign::NEG, -145, MType({0x11fd6995111a927, 0x8af93bfbe440ab33})}, - {Sign::NEG, -145, MType({0x959a26faac7e5494, 0x8406617297b2ed3d})}, - {Sign::NEG, -146, MType({0xc10eab7266ac6bc0, 0xfa270d6368e21007})}, - {Sign::NEG, -146, MType({0xbb178b90026b2b2, 0xec41577274ef0439})}, - {Sign::NEG, -146, MType({0xac3bfd925e6b33e1, 0xde5ba1125385c43b})}, - {Sign::NEG, -146, MType({0x9d0a01a95b355319, 0xd075ea43049f5d3b})}, - {Sign::NEG, -146, MType({0x31b3b7b20a6a6496, 0xc29033048834dc64})}, - {Sign::NEG, -146, MType({0x170da891504620f4, 0xb4aa7b56de3f4ee0})}, - {Sign::NEG, -146, MType({0x53289e84744549cb, 0xa6c4c33a06b7c1d9})}, - {Sign::NEG, -146, MType({0x45519048b0ce7e7f, 0x98df0aae01974279})}, - {Sign::NEG, -146, MType({0xa6118c42bf99407e, 0x8af951b2ced6dde8})}, - {Sign::NEG, -147, MType({0xe5b474cc5a64cf6, 0xfa273090dcdf429f})}, - {Sign::NEG, -147, MType({0xa74dab3bd6067bc7, 0xde5bbcddc0b533aa})}, - {Sign::NEG, -147, MType({0x9f73f4e37357341b, 0xc290484c4921a941})}, - {Sign::NEG, -147, MType({0x31bf5d5f815220e7, 0xa6c4d2dc7616bdb0})}, - {Sign::NEG, -147, MType({0x4b987ca5fca242d7, 0x8af95c8e47868b41})}, - {Sign::NEG, -148, MType({0x19be3fabd93832c5, 0xde5bcac37ac6587d})}, - {Sign::NEG, -148, MType({0x8fd43f0c9ce444d3, 0xa6c4daadaf3d75e0})}, - {Sign::NEG, -149, MType({0x61cd853e796bc2c, 0xde5bd1b658ad4676})}, - {Sign::NEG, -150, MType({0x87d6afabfba0644f, 0xde5bd52fc7d8545f})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -150, MType({0xa9bf32001043629d, 0xde5bdc22a69d9e19})}, - {Sign::POS, -149, MType({0x8014f0f360272d82, 0xde5bdf9c1637d9ef})}, - {Sign::POS, -148, MType({0xfe94a02fc639c0e3, 0xa6c4ea5024795bd2})}, - {Sign::POS, -148, MType({0xbee710a5ace7c8d4, 0xde5be68ef5db7f99})}, - {Sign::POS, -147, MType({0x1a778d8100437e4f, 0x8af972453faf11e8})}, - {Sign::POS, -147, MType({0x97d773f8992f7051, 0xa6c4f221608e89fe})}, - {Sign::POS, -147, MType({0xc9ee5841a3afa95, 0xc29072dbdd9a0dd5})}, - {Sign::POS, -147, MType({0x7b644b13993cf4ef, 0xde5bf474b6df8331})}, - {Sign::POS, -147, MType({0x3448f66e2bd7a0ca, 0xfa2776ebec6ccfdb})}, - {Sign::POS, -146, MType({0x6a7ca5f1a87a1a3c, 0x8af97d20bf27eccd})}, - {Sign::POS, -146, MType({0x245675fe3061108f, 0x98df3f3ab64b431d})}, - {Sign::POS, -146, MType({0x64136e97019d0a3b, 0xa6c501c3dba75dc2})}, - {Sign::POS, -146, MType({0x6cdadac4d6925bd4, 0xb4aac4bc2f432fa3})}, - {Sign::POS, -146, MType({0x2899e23791d29632, 0xc2908823b125aba7})}, - {Sign::POS, -146, MType({0x28039e1f0323a4c1, 0xd0764bfa6155c4b5})}, - {Sign::POS, -146, MType({0xa2912e03afc8cc28, 0xde5c10403fda6db5})}, - {Sign::POS, -146, MType({0x7681cc9f9e0d89f9, 0xec41d4f54cba9991})}, - {Sign::POS, -146, MType({0x28dae4b7241255e1, 0xfa279a1987fd3b32})}, - {Sign::POS, -145, MType({0xf2b412f8dceda28e, 0x8406afd678d4a2c0})}, - {Sign::POS, -145, MType({0xbf5dccd967504857, 0x8af992d7c4e2d5b5})}, - {Sign::POS, -145, MType({0x3716dbf950b07f85, 0x91ec7610a82cafed})}, - {Sign::POS, -145, MType({0x69eebe0b8e5b18e1, 0x98df598122b5aadd})}, - {Sign::POS, -145, MType({0xbb583ce65af56beb, 0x9fd23d2934813ffc})}, - {Sign::POS, -145, MType({0xe22978efa7a962a0, 0xa6c52108dd92e8c1})}, - {Sign::POS, -145, MType({0xe89bf3898ef27836, 0xadb805201dee1ea3})}, - {Sign::POS, -145, MType({0x2c4c997ec90bab0b, 0xb4aae96ef5965b1a})}, - {Sign::POS, -145, MType({0x5e3bcd6f21fe6224, 0xbb9dcdf5648f179c})}, - {Sign::POS, -145, MType({0x82cd723bf1524680, 0xc290b2b36adbcda2})}, - {Sign::POS, -145, MType({0xf1c8f574935e109b, 0xc98397a9087ff6a4})}, - {Sign::POS, -145, MType({0x565959c2e4394a59, 0xd0767cd63d7f0c1c})}, - {Sign::POS, -145, MType({0xaf0d4157bc4f05be, 0xd769623b09dc8781})}, - {Sign::POS, -145, MType({0x4dd6f8576e9188b8, 0xde5c47d76d9be24e})}, - {Sign::POS, -145, MType({0xd80c7f46484eee3d, 0xe54f2dab68c095fb})}, - {Sign::POS, -145, MType({0x4667957512a6bd26, 0xec4213b6fb4e1c04})}, - {Sign::POS, -145, MType({0xe505c36d95a074fa, 0xf334f9fa2547ede1})}, - {Sign::POS, -145, MType({0x5368655f1ce3110b, 0xfa27e074e6b1850f})}, - {Sign::POS, -144, MType({0xc23a5ac57f06c112, 0x808d63939fc72d83})}, - {Sign::POS, -144, MType({0xdf39eb5890580f93, 0x8406d70897f0f4a2})}, - {Sign::POS, -144, MType({0xcd896f3e43f38669, 0x87804a995bd7d4a2})}, - {Sign::POS, -144, MType({0x83b16ff7eecace8c, 0x8af9be45eb7d8a41})}, - {Sign::POS, -144, MType({0x21ec7ae8ffa1531d, 0x8e73320e46e3d23d})}, - {Sign::POS, -144, MType({0xf227268d464ae907, 0x91eca5f26e0c6953})}, - {Sign::POS, -144, MType({0x680017af3bbaf2d3, 0x956619f260f90c44})}, - {Sign::POS, -144, MType({0x20c8069e4ae400de, 0x98df8e0e1fab77cd})}, - {Sign::POS, -144, MType({0xe381c4651a67ee13, 0x9c590245aa2568ac})}, - {Sign::POS, -144, MType({0xa0e23fffd718794e, 0x9fd2769900689ba2})}, - {Sign::POS, -144, MType({0x73508b927f485b97, 0xa34beb082276cd6d})}, - {Sign::POS, -144, MType({0x9ee5e19f2eecdb55, 0xa6c55f931051bacc})}, - {Sign::POS, -144, MType({0x916daa3c6c8fdc9d, 0xaa3ed439c9fb207f})}, - {Sign::POS, -144, MType({0xe265804b77126ed3, 0xadb848fc4f74bb45})}, - {Sign::POS, -144, MType({0x52fd36ae943fd7b4, 0xb131bddaa0c047df})}, - {Sign::POS, -144, MType({0xce16dd7f60311bf6, 0xb4ab32d4bddf830b})}, - {Sign::POS, -144, MType({0x6846c7451d8105ac, 0xb824a7eaa6d4298b})}, - {Sign::POS, -144, MType({0x5fd38e2b0650a884, 0xbb9e1d1c5b9ff81e})}, - {Sign::POS, -144, MType({0x1cb619369e1c641f, 0xbf179269dc44ab85})}, - {Sign::POS, -144, MType({0x3099a17e0461648c, 0xc29107d328c40080})}, - {Sign::POS, -144, MType({0x56dbb75e4813a12b, 0xc60a7d58411fb3d0})}, - {Sign::POS, -144, MType({0x748c47b1bbe45a07, 0xc983f2f925598236})}, - {Sign::POS, -144, MType({0x986da1064b5913e1, 0xccfd68b5d5732873})}, - {Sign::POS, -144, MType({0xfaf478d3d0b31300, 0xd076de8e516e6348})}, - {Sign::POS, -144, MType({0xfe47f0b26ba754ff, 0xd3f05482994cef77})}, - {Sign::POS, -144, MType({0x2e419b90d8e709b7, 0xd769ca92ad1089c2})}, - {Sign::POS, -144, MType({0x406d82eaca788b6f, 0xdae340be8cbaeee9})}, - {Sign::POS, -144, MType({0x140a2bff40e0d670, 0xde5cb706384ddbaf})}, - {Sign::POS, -144, MType({0xb2089d06e51d8034, 0xe1d62d69afcb0cd5})}, - {Sign::POS, -144, MType({0x4d0c626a636f2e4f, 0xe54fa3e8f3343f1f})}, - {Sign::POS, -144, MType({0x416b93f8c6f48d30, 0xe8c91a84028b2f4e})}, - {Sign::POS, -144, MType({0x152eda1dd615c6f5, 0xec42913addd19a25})}, - {Sign::POS, -144, MType({0x781173186fc07a66, 0xefbc080d85093c66})}, - {Sign::POS, -144, MType({0x43813830e974324d, 0xf3357efbf833d2d5})}, - {Sign::POS, -144, MType({0x7a9ea2ef6e1f5d41, 0xf6aef60637531a34})}, - {Sign::POS, -144, MType({0x4a3cd2525dccc623, 0xfa286d2c4268cf47})}, - {Sign::POS, -144, MType({0x8e19004ae218d5d, 0xfda1e46e1976aed1})}, - {Sign::POS, -143, MType({0x9b62aaca25d5d18a, 0x808dade5de3f3aca})}, - {Sign::POS, -143, MType({0xbee9a8d43e00613c, 0x824a69a295c0f02b})}, - {Sign::POS, -143, MType({0xd8d4b69c2056f729, 0x8407256d334155ed})}, - {Sign::POS, -143, MType({0xe7cc28605d7bb77e, 0x85c3e145b6c14a72})}, - {Sign::POS, -143, MType({0xff51b4bdc834a8f1, 0x87809d2c2041ac1c})}, - {Sign::POS, -143, MType({0x47c0774aa81c3561, 0x893d59206fc3594e})}, - {Sign::POS, -143, MType({0xfe4cf331ecb9eb62, 0x8afa1522a5473068})}, + {Sign::NEG, -143, 0x8af8b9b3'22ba8c7d'54d7e498'98ca0093_u128}, + {Sign::NEG, -143, 0x893c0652'9deffc3d'c321bbf1'6665f29c_u128}, + {Sign::NEG, -143, 0x877f52e4'33ac7ec4'8246df71'40c3e4ae_u128}, + {Sign::NEG, -143, 0x85c29f67'e3ef35bc'1deaa9e8'5780e4c1_u128}, + {Sign::NEG, -143, 0x8405ebdd'aeb742cf'0cd8a512'1a9162d0_u128}, + {Sign::NEG, -143, 0x82493845'9403c7a7'b10486fa'4644308d_u128}, + {Sign::NEG, -143, 0x808c849f'93d3e5f0'578a2f61'eedd4be8_u128}, + {Sign::NEG, -144, 0xfd9fa1d7'5c4d7ea6'715b4a49'1790e8a7_u128}, + {Sign::NEG, -144, 0xfa263a53'c5f6eaf4'efb6273a'04c71573_u128}, + {Sign::NEG, -144, 0xf6acd2b4'64a25420'474d9015'60c17807_u128}, + {Sign::NEG, -144, 0xf3336af9'384dfd7c'6b9a5dec'eb80ec57_u128}, + {Sign::NEG, -144, 0xefba0322'40f82a5d'2665a32f'7cc64f79_u128}, + {Sign::NEG, -144, 0xec409b2f'7e9f1e16'17c8a673'16659363_u128}, + {Sign::NEG, -144, 0xe8c73320'f1411bfa'b62cdd3e'f5c8673d_u128}, + {Sign::NEG, -144, 0xe54dcaf6'98dc675e'4e4be6d5'a4a07422_u128}, + {Sign::NEG, -144, 0xe1d462b0'756f4394'032f86ff'08c92e22_u128}, + {Sign::NEG, -144, 0xde5afa4e'86f7f3ee'ce31a0d2'7359396f_u128}, + {Sign::NEG, -144, 0xdae191d0'cd74bbc1'7efc3180'aee36373_u128}, + {Sign::NEG, -144, 0xd7682937'48e3de5e'bb894b1e'0ce72fc4_u128}, + {Sign::NEG, -144, 0xd3eec081'f9439f19'00230f6c'7270f8be_u128}, + {Sign::NEG, -144, 0xd07557b0'de924142'9f63aaa5'63e9a399_u128}, + {Sign::NEG, -144, 0xccfbeec3'f8ce082d'c2354e44'1015e7eb_u128}, + {Sign::NEG, -144, 0xc98285bb'47f5372c'67d22bcf'5a452a4c_u128}, + {Sign::NEG, -144, 0xc6091c96'cc061190'65c46fa3'e3afea18_u128}, + {Sign::NEG, -144, 0xc28fb356'84fedaab'67e63bbe'1405c20d_u128}, + {Sign::NEG, -144, 0xbf1649fa'72ddd5ce'f061a284'212afbad_u128}, + {Sign::NEG, -144, 0xbb9ce082'95a1464c'57b0a190'1625b539_u128}, + {Sign::NEG, -144, 0xb82376ee'ed476f74'cc9d1c79'd93a9a1e_u128}, + {Sign::NEG, -144, 0xb4aa0d3f'79ce9499'5440d7a1'31392da8_u128}, + {Sign::NEG, -144, 0xb130a374'3b34f90a'ca0572f7'c9f7a7de_u128}, + {Sign::NEG, -144, 0xadb7398d'3178e019'dfa464cb'37fe6455_u128}, + {Sign::NEG, -144, 0xaa3dcf8a'5c988d17'1d26f48e'fb62e2e0_u128}, + {Sign::NEG, -144, 0xa6c4656b'bc924352'e0e635a6'81d259e2_u128}, + {Sign::NEG, -144, 0xa34afb31'5164461d'5f8b022f'27cbda35_u128}, + {Sign::NEG, -144, 0x9fd190db'1b0cd8c6'a40df5ca'390a0465_u128}, + {Sign::NEG, -144, 0x9c582669'198a3e9e'8fb76866'f01c4f2d_u128}, + {Sign::NEG, -144, 0x98debbdb'4cdabaf4'da1f690c'752fdeff_u128}, + {Sign::NEG, -144, 0x95655131'b4fc9119'112db8a3'dc07ee78_u128}, + {Sign::NEG, -144, 0x91ebe66c'51ee045a'9919c4c2'2125c79e_u128}, + {Sign::NEG, -144, 0x8e727b8b'23ad5808'ac6aa272'26204db3_u128}, + {Sign::NEG, -144, 0x8af9108e'2a38cf72'5bf708fe'ad2b1780_u128}, + {Sign::NEG, -144, 0x877fa575'658eade6'8ee54cbc'53cd19ed_u128}, + {Sign::NEG, -144, 0x84063a40'd5ad36b4'02ab59d3'8cc6e2c5_u128}, + {Sign::NEG, -144, 0x808ccef0'7a92ad29'4b0eaf0a'99286378_u128}, + {Sign::NEG, -145, 0xfa26c708'a87aa929'a448b11f'012c975c_u128}, + {Sign::NEG, -145, 0xf333eff8'c556e089'b0a1d584'117de73b_u128}, + {Sign::NEG, -145, 0xec4118b1'4bb6870e'e890f9fb'57fdabb6_u128}, + {Sign::NEG, -145, 0xe54e4132'3b962355'261d48c7'1e693130_u128}, + {Sign::NEG, -145, 0xde5b697b'94f23bf7'efecdd48'ed894c32_u128}, + {Sign::NEG, -145, 0xd768918d'57c75792'7944b995'7598a88a_u128}, + {Sign::NEG, -145, 0xd075b967'8411fcbf'a208bc08'75093645_u128}, + {Sign::NEG, -145, 0xc982e10a'19ceb219'f6bb94d8'9da8b432_u128}, + {Sign::NEG, -145, 0xc2900875'18f9fe3b'b07ebbab'782457b0_u128}, + {Sign::NEG, -145, 0xbb9d2fa8'819067be'b5126529'45eb9165_u128}, + {Sign::NEG, -145, 0xb4aa56a4'538e753c'96d57890'e171eea5_u128}, + {Sign::NEG, -145, 0xadb77d68'8ef0ad4e'94c5854b'9cd01726_u128}, + {Sign::NEG, -145, 0xa6c4a3f5'33b3968d'9a7eb881'1ec3e6bb_u128}, + {Sign::NEG, -145, 0x9fd1ca4a'41d3b792'403bd2ab'3e0fa2d7_u128}, + {Sign::NEG, -145, 0x98def067'b94d96f4'cad61d29'db384b6b_u128}, + {Sign::NEG, -145, 0x91ec164d'9a1dbb4d'2bc55fd6'b8a306ec_u128}, + {Sign::NEG, -145, 0x8af93bfb'e440ab33'011fd699'5111a927_u128}, + {Sign::NEG, -145, 0x84066172'97b2ed3d'959a26fa'ac7e5494_u128}, + {Sign::NEG, -146, 0xfa270d63'68e21007'c10eab72'66ac6bc0_u128}, + {Sign::NEG, -146, 0xec415772'74ef0439'0bb178b9'0026b2b2_u128}, + {Sign::NEG, -146, 0xde5ba112'5385c43b'ac3bfd92'5e6b33e1_u128}, + {Sign::NEG, -146, 0xd075ea43'049f5d3b'9d0a01a9'5b355319_u128}, + {Sign::NEG, -146, 0xc2903304'8834dc64'31b3b7b2'0a6a6496_u128}, + {Sign::NEG, -146, 0xb4aa7b56'de3f4ee0'170da891'504620f4_u128}, + {Sign::NEG, -146, 0xa6c4c33a'06b7c1d9'53289e84'744549cb_u128}, + {Sign::NEG, -146, 0x98df0aae'01974279'45519048'b0ce7e7f_u128}, + {Sign::NEG, -146, 0x8af951b2'ced6dde8'a6118c42'bf99407e_u128}, + {Sign::NEG, -147, 0xfa273090'dcdf429f'0e5b474c'c5a64cf6_u128}, + {Sign::NEG, -147, 0xde5bbcdd'c0b533aa'a74dab3b'd6067bc7_u128}, + {Sign::NEG, -147, 0xc290484c'4921a941'9f73f4e3'7357341b_u128}, + {Sign::NEG, -147, 0xa6c4d2dc'7616bdb0'31bf5d5f'815220e7_u128}, + {Sign::NEG, -147, 0x8af95c8e'47868b41'4b987ca5'fca242d7_u128}, + {Sign::NEG, -148, 0xde5bcac3'7ac6587d'19be3fab'd93832c5_u128}, + {Sign::NEG, -148, 0xa6c4daad'af3d75e0'8fd43f0c'9ce444d3_u128}, + {Sign::NEG, -149, 0xde5bd1b6'58ad4676'061cd853'e796bc2c_u128}, + {Sign::NEG, -150, 0xde5bd52f'c7d8545f'87d6afab'fba0644f_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -150, 0xde5bdc22'a69d9e19'a9bf3200'1043629d_u128}, + {Sign::POS, -149, 0xde5bdf9c'1637d9ef'8014f0f3'60272d82_u128}, + {Sign::POS, -148, 0xa6c4ea50'24795bd2'fe94a02f'c639c0e3_u128}, + {Sign::POS, -148, 0xde5be68e'f5db7f99'bee710a5'ace7c8d4_u128}, + {Sign::POS, -147, 0x8af97245'3faf11e8'1a778d81'00437e4f_u128}, + {Sign::POS, -147, 0xa6c4f221'608e89fe'97d773f8'992f7051_u128}, + {Sign::POS, -147, 0xc29072db'dd9a0dd5'0c9ee584'1a3afa95_u128}, + {Sign::POS, -147, 0xde5bf474'b6df8331'7b644b13'993cf4ef_u128}, + {Sign::POS, -147, 0xfa2776eb'ec6ccfdb'3448f66e'2bd7a0ca_u128}, + {Sign::POS, -146, 0x8af97d20'bf27eccd'6a7ca5f1'a87a1a3c_u128}, + {Sign::POS, -146, 0x98df3f3a'b64b431d'245675fe'3061108f_u128}, + {Sign::POS, -146, 0xa6c501c3'dba75dc2'64136e97'019d0a3b_u128}, + {Sign::POS, -146, 0xb4aac4bc'2f432fa3'6cdadac4'd6925bd4_u128}, + {Sign::POS, -146, 0xc2908823'b125aba7'2899e237'91d29632_u128}, + {Sign::POS, -146, 0xd0764bfa'6155c4b5'28039e1f'0323a4c1_u128}, + {Sign::POS, -146, 0xde5c1040'3fda6db5'a2912e03'afc8cc28_u128}, + {Sign::POS, -146, 0xec41d4f5'4cba9991'7681cc9f'9e0d89f9_u128}, + {Sign::POS, -146, 0xfa279a19'87fd3b32'28dae4b7'241255e1_u128}, + {Sign::POS, -145, 0x8406afd6'78d4a2c0'f2b412f8'dceda28e_u128}, + {Sign::POS, -145, 0x8af992d7'c4e2d5b5'bf5dccd9'67504857_u128}, + {Sign::POS, -145, 0x91ec7610'a82cafed'3716dbf9'50b07f85_u128}, + {Sign::POS, -145, 0x98df5981'22b5aadd'69eebe0b'8e5b18e1_u128}, + {Sign::POS, -145, 0x9fd23d29'34813ffc'bb583ce6'5af56beb_u128}, + {Sign::POS, -145, 0xa6c52108'dd92e8c1'e22978ef'a7a962a0_u128}, + {Sign::POS, -145, 0xadb80520'1dee1ea3'e89bf389'8ef27836_u128}, + {Sign::POS, -145, 0xb4aae96e'f5965b1a'2c4c997e'c90bab0b_u128}, + {Sign::POS, -145, 0xbb9dcdf5'648f179c'5e3bcd6f'21fe6224_u128}, + {Sign::POS, -145, 0xc290b2b3'6adbcda2'82cd723b'f1524680_u128}, + {Sign::POS, -145, 0xc98397a9'087ff6a4'f1c8f574'935e109b_u128}, + {Sign::POS, -145, 0xd0767cd6'3d7f0c1c'565959c2'e4394a59_u128}, + {Sign::POS, -145, 0xd769623b'09dc8781'af0d4157'bc4f05be_u128}, + {Sign::POS, -145, 0xde5c47d7'6d9be24e'4dd6f857'6e9188b8_u128}, + {Sign::POS, -145, 0xe54f2dab'68c095fb'd80c7f46'484eee3d_u128}, + {Sign::POS, -145, 0xec4213b6'fb4e1c04'46679575'12a6bd26_u128}, + {Sign::POS, -145, 0xf334f9fa'2547ede1'e505c36d'95a074fa_u128}, + {Sign::POS, -145, 0xfa27e074'e6b1850f'5368655f'1ce3110b_u128}, + {Sign::POS, -144, 0x808d6393'9fc72d83'c23a5ac5'7f06c112_u128}, + {Sign::POS, -144, 0x8406d708'97f0f4a2'df39eb58'90580f93_u128}, + {Sign::POS, -144, 0x87804a99'5bd7d4a2'cd896f3e'43f38669_u128}, + {Sign::POS, -144, 0x8af9be45'eb7d8a41'83b16ff7'eecace8c_u128}, + {Sign::POS, -144, 0x8e73320e'46e3d23d'21ec7ae8'ffa1531d_u128}, + {Sign::POS, -144, 0x91eca5f2'6e0c6953'f227268d'464ae907_u128}, + {Sign::POS, -144, 0x956619f2'60f90c44'680017af'3bbaf2d3_u128}, + {Sign::POS, -144, 0x98df8e0e'1fab77cd'20c8069e'4ae400de_u128}, + {Sign::POS, -144, 0x9c590245'aa2568ac'e381c465'1a67ee13_u128}, + {Sign::POS, -144, 0x9fd27699'00689ba2'a0e23fff'd718794e_u128}, + {Sign::POS, -144, 0xa34beb08'2276cd6d'73508b92'7f485b97_u128}, + {Sign::POS, -144, 0xa6c55f93'1051bacc'9ee5e19f'2eecdb55_u128}, + {Sign::POS, -144, 0xaa3ed439'c9fb207f'916daa3c'6c8fdc9d_u128}, + {Sign::POS, -144, 0xadb848fc'4f74bb45'e265804b'77126ed3_u128}, + {Sign::POS, -144, 0xb131bdda'a0c047df'52fd36ae'943fd7b4_u128}, + {Sign::POS, -144, 0xb4ab32d4'bddf830b'ce16dd7f'60311bf6_u128}, + {Sign::POS, -144, 0xb824a7ea'a6d4298b'6846c745'1d8105ac_u128}, + {Sign::POS, -144, 0xbb9e1d1c'5b9ff81e'5fd38e2b'0650a884_u128}, + {Sign::POS, -144, 0xbf179269'dc44ab85'1cb61936'9e1c641f_u128}, + {Sign::POS, -144, 0xc29107d3'28c40080'3099a17e'0461648c_u128}, + {Sign::POS, -144, 0xc60a7d58'411fb3d0'56dbb75e'4813a12b_u128}, + {Sign::POS, -144, 0xc983f2f9'25598236'748c47b1'bbe45a07_u128}, + {Sign::POS, -144, 0xccfd68b5'd5732873'986da106'4b5913e1_u128}, + {Sign::POS, -144, 0xd076de8e'516e6348'faf478d3'd0b31300_u128}, + {Sign::POS, -144, 0xd3f05482'994cef77'fe47f0b2'6ba754ff_u128}, + {Sign::POS, -144, 0xd769ca92'ad1089c2'2e419b90'd8e709b7_u128}, + {Sign::POS, -144, 0xdae340be'8cbaeee9'406d82ea'ca788b6f_u128}, + {Sign::POS, -144, 0xde5cb706'384ddbaf'140a2bff'40e0d670_u128}, + {Sign::POS, -144, 0xe1d62d69'afcb0cd5'b2089d06'e51d8034_u128}, + {Sign::POS, -144, 0xe54fa3e8'f3343f1f'4d0c626a'636f2e4f_u128}, + {Sign::POS, -144, 0xe8c91a84'028b2f4e'416b93f8'c6f48d30_u128}, + {Sign::POS, -144, 0xec42913a'ddd19a25'152eda1d'd615c6f5_u128}, + {Sign::POS, -144, 0xefbc080d'85093c66'78117318'6fc07a66_u128}, + {Sign::POS, -144, 0xf3357efb'f833d2d5'43813830'e974324d_u128}, + {Sign::POS, -144, 0xf6aef606'37531a34'7a9ea2ef'6e1f5d41_u128}, + {Sign::POS, -144, 0xfa286d2c'4268cf47'4a3cd252'5dccc623_u128}, + {Sign::POS, -144, 0xfda1e46e'1976aed1'08e19004'ae218d5d_u128}, + {Sign::POS, -143, 0x808dade5'de3f3aca'9b62aaca'25d5d18a_u128}, + {Sign::POS, -143, 0x824a69a2'95c0f02b'bee9a8d4'3e00613c_u128}, + {Sign::POS, -143, 0x8407256d'334155ed'd8d4b69c'2056f729_u128}, + {Sign::POS, -143, 0x85c3e145'b6c14a72'e7cc2860'5d7bb77e_u128}, + {Sign::POS, -143, 0x87809d2c'2041ac1c'ff51b4bd'c834a8f1_u128}, + {Sign::POS, -143, 0x893d5920'6fc3594e'47c0774a'a81c3561_u128}, + {Sign::POS, -143, 0x8afa1522'a5473068'fe4cf331'ecb9eb62_u128}, }, // -log10(r) for the fourth step, generated by SageMath with: // // for i in range(-65, 65): // r = 2^-28 * round( 2^28 / (1 + i*2^(-28)) ); // s, m, e = RealField(128)(r).log10().sign_mantissa_exponent(); - // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ",", + // format_hex(m), "},"); /* .step_4 = */ { - {Sign::NEG, -151, MType({0xe471a82bbedbe0ae, 0xe1d5464122cf95a4})}, - {Sign::NEG, -151, MType({0xaf6e93be8e4c1764, 0xde5bd6ec7f7bc110})}, - {Sign::NEG, -151, MType({0xe44848f0a5779499, 0xdae26797a490f80e})}, - {Sign::NEG, -151, MType({0x90205533f4e70566, 0xd768f842920f3a98})}, - {Sign::NEG, -151, MType({0xc01844ace3729e48, 0xd3ef88ed47f688a6})}, - {Sign::NEG, -151, MType({0x8151a2324e41c7c4, 0xd0761997c646e232})}, - {Sign::NEG, -151, MType({0xe0edf74d88cacafd, 0xccfcaa420d004734})}, - {Sign::NEG, -151, MType({0xec0ecc3a5cd27e58, 0xc9833aec1c22b7a6})}, - {Sign::NEG, -151, MType({0xafd5a7e70a6bf214, 0xc609cb95f3ae3381})}, - {Sign::NEG, -151, MType({0x39640ff447f81ceb, 0xc2905c3f93a2babe})}, - {Sign::NEG, -151, MType({0x95db88b5422588b1, 0xbf16ece8fc004d55})}, - {Sign::NEG, -151, MType({0xd25d952f9beffeec, 0xbb9d7d922cc6eb40})}, - {Sign::NEG, -151, MType({0xfc0bb71b6ea03578, 0xb8240e3b25f69478})}, - {Sign::NEG, -151, MType({0x20076ee349cb7b20, 0xb4aa9ee3e78f48f7})}, - {Sign::NEG, -151, MType({0x4b723ba43353643d, 0xb1312f8c719108b4})}, - {Sign::NEG, -151, MType({0x8b6d9b2da7657754, 0xadb7c034c3fbd3a9})}, - {Sign::NEG, -151, MType({0xed1b0a01987ad9b4, 0xaa3e50dcdecfa9cf})}, - {Sign::NEG, -151, MType({0x7d9c03546f57fc11, 0xa6c4e184c20c8b20})}, - {Sign::NEG, -151, MType({0x4a12010d0b0c4727, 0xa34b722c6db27794})}, - {Sign::NEG, -151, MType({0x5f9e7bc4c0f1c851, 0x9fd202d3e1c16f24})}, - {Sign::NEG, -151, MType({0xcb62eac75cacde29, 0x9c58937b1e3971c9})}, - {Sign::NEG, -151, MType({0x9a80c413202be52a, 0x98df2422231a7f7d})}, - {Sign::NEG, -151, MType({0xda197c58c3a6e445, 0x9565b4c8f0649838})}, - {Sign::NEG, -151, MType({0x974e86fb759f3988, 0x91ec456f8617bbf4})}, - {Sign::NEG, -151, MType({0xdf415610dadf46b3, 0x8e72d615e433eaa9})}, - {Sign::NEG, -151, MType({0xbf135a610e7a1ddc, 0x8af966bc0ab92451})}, - {Sign::NEG, -151, MType({0x43e60366a1cb2e09, 0x877ff761f9a768e5})}, - {Sign::NEG, -151, MType({0x7adabf4e9c75efce, 0x84068807b0feb85d})}, - {Sign::NEG, -151, MType({0x7112faf87c6591ee, 0x808d18ad30bf12b3})}, - {Sign::NEG, -152, MType({0x676043ec6b994be5, 0xfa2752a4f1d0efc0})}, - {Sign::NEG, -152, MType({0x9fa73d186649999d, 0xf33473ef12f5cfb9})}, - {Sign::NEG, -152, MType({0xa53db362aa5cc6f0, 0xec419538c4ecc544})}, - {Sign::NEG, -152, MType({0x9266761de5e05f13, 0xe54eb68207b5d053})}, - {Sign::NEG, -152, MType({0x81645201b36e17ba, 0xde5bd7cadb50f0d8})}, - {Sign::NEG, -152, MType({0x8c7a112a9a2b2a52, 0xd768f9133fbe26c5})}, - {Sign::NEG, -152, MType({0xcdea7b1a0dc7ad42, 0xd0761a5b34fd720c})}, - {Sign::NEG, -152, MType({0x5ff854b66e7ded1f, 0xc9833ba2bb0ed2a0})}, - {Sign::NEG, -152, MType({0x5ce6604b0911c5ed, 0xc2905ce9d1f24872})}, - {Sign::NEG, -152, MType({0xdef75d8816cffc59, 0xbb9d7e3079a7d374})}, - {Sign::NEG, -152, MType({0x6e0982bd8d96ef, 0xb4aa9f76b22f739a})}, - {Sign::NEG, -152, MType({0xdb8d1eb50fa7375c, 0xadb7c0bc7b8928d3})}, - {Sign::NEG, -152, MType({0x8a9754fe0c0073a7, 0xa6c4e201d5b4f314})}, - {Sign::NEG, -152, MType({0x27cf61a19e032f69, 0x9fd20346c0b2d24e})}, - {Sign::NEG, -152, MType({0xcd77f7489d9ef50b, 0x98df248b3c82c672})}, - {Sign::NEG, -152, MType({0x95d3c600cf484f03, 0x91ec45cf4924cf74})}, - {Sign::NEG, -152, MType({0x9b257b3ce3f82109, 0x8af96712e698ed45})}, - {Sign::NEG, -152, MType({0xf7afc1d4792b015a, 0x8406885614df1fd7})}, - {Sign::NEG, -153, MType({0x8b6a840831c123d8, 0xfa275331a7eece3b})}, - {Sign::NEG, -153, MType({0x3ef142da7335b35a, 0xec4195b647c38612})}, - {Sign::NEG, -153, MType({0x3e79062c7cbb3b7d, 0xde5bd83a093c6718})}, - {Sign::NEG, -153, MType({0xbe870ed4ed5b755b, 0xd0761abcec597131})}, - {Sign::NEG, -153, MType({0xf3a098743d20fb64, 0xc2905d3ef11aa442})}, - {Sign::NEG, -153, MType({0x124ad974bd15fbca, 0xb4aa9fc017800030})}, - {Sign::NEG, -153, MType({0x4f0b030a9742eb00, 0xa6c4e2405f8984dd})}, - {Sign::NEG, -153, MType({0xde664133cead362d, 0x98df24bfc937322e})}, - {Sign::NEG, -153, MType({0xf4e1bab83f55f5a1, 0x8af9673e54890808})}, - {Sign::NEG, -154, MType({0x8e0522533c713e98, 0xfa27537802fe0c9f})}, - {Sign::NEG, -154, MType({0x129bc1c6f293726e, 0xde5bd871a03259cf})}, - {Sign::NEG, -154, MType({0xe09182166eeb17eb, 0xc2905d6980aef768})}, - {Sign::NEG, -154, MType({0x60f08720313daa3f, 0xa6c4e25fa473e535})}, - {Sign::NEG, -154, MType({0xfcc2ea566b3af38b, 0x8af967540b8122fc})}, - {Sign::NEG, -155, MType({0x3a25757e00f4e3a0, 0xde5bd88d6bad6110})}, - {Sign::NEG, -155, MType({0x55d3f9e70cf177b8, 0xa6c4e26f46e91b3e})}, - {Sign::NEG, -156, MType({0x3d4aac85125398d0, 0xde5bd89b516ae82a})}, - {Sign::NEG, -157, MType({0x9ab5a849a06f400d, 0xde5bd8a24449ac95})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -157, MType({0xd3cc88fd4ef34c2, 0xde5bd8b02a073729})}, - {Sign::POS, -156, MType({0x225916c2b3f33c90, 0xde5bd8b71ce5fd51})}, - {Sign::POS, -155, MType({0x17847f98acf08d54, 0xa6c4e28e8bd3930a})}, - {Sign::POS, -155, MType({0x44397830931fddd, 0xde5bd8c502a38b5e})}, - {Sign::POS, -154, MType({0xc2ab385913176984, 0x8af9677f79717409})}, - {Sign::POS, -154, MType({0xe454dec82bde52e5, 0xa6c4e29e2e48d4cc})}, - {Sign::POS, -154, MType({0xfe1522b0470d7d7f, 0xc2905dbe9fd7e82f})}, - {Sign::POS, -154, MType({0xa6e2721f2afc3cce, 0xde5bd8e0ce1eae6a})}, - {Sign::POS, -154, MType({0x75b3458eec3c106c, 0xfa275404b91d27b4})}, - {Sign::POS, -153, MType({0x80bf0ff2f6cd9f93, 0x8af967953069aa22})}, - {Sign::POS, -153, MType({0xf09cc73b7013b906, 0x98df2528e2a09a29})}, - {Sign::POS, -153, MType({0x55ee1480619827c4, 0xa6c4e2bd7333640c})}, - {Sign::POS, -153, MType({0x7c2e48d772250b3c, 0xb4aaa052e22207e5})}, - {Sign::POS, -153, MType({0x2ed8ba8c6fa81c98, 0xc2905de92f6c85d1})}, - {Sign::POS, -153, MType({0x3968c5214f33fc4f, 0xd0761b805b12ddeb})}, - {Sign::POS, -153, MType({0x6759c94e2d017fad, 0xde5bd9186515104f})}, - {Sign::POS, -153, MType({0x84272d014c70fe58, 0xec4196b14d731d19})}, - {Sign::POS, -153, MType({0x5b4c5b5f180b9fe1, 0xfa27544b142d0465})}, - {Sign::POS, -152, MType({0x5c22626110c254a4, 0x840688f2dca16327})}, - {Sign::POS, -152, MType({0xb345ef5d90dd6545, 0x8af967c09e5a3178})}, - {Sign::POS, -152, MType({0x98ce92087c5cb614, 0x91ec468ecf40ed34})}, - {Sign::POS, -152, MType({0xf27a0a6056dcfe57, 0x98df255d6f559668})}, - {Sign::POS, -152, MType({0xa6061afeb7929f24, 0x9fd2042c7e982d23})}, - {Sign::POS, -152, MType({0x99308918494a4a20, 0xa6c4e2fbfd08b172})}, - {Sign::POS, -152, MType({0xb1b71c7cca69a844, 0xadb7c1cbeaa72363})}, - {Sign::POS, -152, MType({0xd5579f970cf000a9, 0xb4aaa09c47738304})}, - {Sign::POS, -152, MType({0xe9cfdf6cf676df42, 0xbb9d7f6d136dd063})}, - {Sign::POS, -152, MType({0xd4ddab9f8032bbab, 0xc2905e3e4e960b8e})}, - {Sign::POS, -152, MType({0x7c3ed66ab6f39fe9, 0xc9833d0ff8ec3493})}, - {Sign::POS, -152, MType({0xc5b134a5bb25cf2e, 0xd0761be212704b7f})}, - {Sign::POS, -152, MType({0x96f29dc2c0d26ca0, 0xd768fab49b225061})}, - {Sign::POS, -152, MType({0xd5c0ebcf0fa0221e, 0xde5bd98793024346})}, - {Sign::POS, -152, MType({0x67d9fb7302d3c705, 0xe54eb85afa10243d})}, - {Sign::POS, -152, MType({0x32fbabf2095106f1, 0xec41972ed04bf353})}, - {Sign::POS, -152, MType({0x1ce3df2aa59b0889, 0xf334760315b5b096})}, - {Sign::POS, -152, MType({0xb5079966dd5143e, 0xfa2754d7ca4d5c14})}, - {Sign::POS, -151, MType({0x71ffb12505e19d89, 0x808d19d677097aed})}, - {Sign::POS, -151, MType({0x4657417a9e657eae, 0x8406894140833efc})}, - {Sign::POS, -151, MType({0x758de3f168f9f8c9, 0x877ff8ac4193fa3d})}, - {Sign::POS, -151, MType({0xf2828ffc57f43581, 0x8af968177a3bacb7})}, - {Sign::POS, -151, MType({0xb0143e5be77b1053, 0x8e72d782ea7a5672})}, - {Sign::POS, -151, MType({0xa121e91e1d8769ef, 0x91ec46ee924ff774})}, - {Sign::POS, -151, MType({0xb88a8b9e89e47b9c, 0x9565b65a71bc8fc4})}, - {Sign::POS, -151, MType({0xe92d228646302a9c, 0x98df25c688c01f69})}, - {Sign::POS, -151, MType({0x25e8abcbf5db5b8c, 0x9c589532d75aa66b})}, - {Sign::POS, -151, MType({0x619c26b3c62a45c8, 0x9fd2049f5d8c24cf})}, - {Sign::POS, -151, MType({0x8f2693cf6e34c6cc, 0xa34b740c1b549a9d})}, - {Sign::POS, -151, MType({0xa166f4fe2ee6b59a, 0xa6c4e37910b407dc})}, - {Sign::POS, -151, MType({0x8b3c4d6cd3003616, 0xaa3e52e63daa6c93})}, - {Sign::POS, -151, MType({0x3f85a195af160c71, 0xadb7c253a237c8c9})}, - {Sign::POS, -151, MType({0xb121f740a191f084, 0xb13131c13e5c1c84})}, - {Sign::POS, -151, MType({0xd2f0558312b2e136, 0xb4aaa12f121767cc})}, - {Sign::POS, -151, MType({0x97cfc4bff48d77de, 0xb824109d1d69aaa8})}, - {Sign::POS, -151, MType({0xf29f4ea7c30c3ba5, 0xbb9d800b6052e51e})}, - {Sign::POS, -151, MType({0xd63dfe3883eff4e9, 0xbf16ef79dad31736})}, - {Sign::POS, -151, MType({0x358adfbdc6d0009f, 0xc2905ee88cea40f7})}, - {Sign::POS, -151, MType({0x36500d0a51aa3b6, 0xc609ce5776986267})}, - {Sign::POS, -151, MType({0x32ab7057c2155e78, 0xc9833dc697dd7b8d})}, - {Sign::POS, -151, MType({0xb63d3e874add3ff0, 0xccfcad35f0b98c70})}, - {Sign::POS, -151, MType({0x80f97ce0f6673948, 0xd0761ca5812c9518})}, - {Sign::POS, -151, MType({0x85bf3e340580712d, 0xd3ef8c154936958b})}, - {Sign::POS, -151, MType({0xb76d969d42ce9734, 0xd768fb8548d78dd0})}, - {Sign::POS, -151, MType({0x8e39b8702d0373a, 0xdae26af5800f7def})}, - {Sign::POS, -151, MType({0x6d0063a923dd0cc6, 0xde5bda65eede65ed})}, + {Sign::NEG, -151, 0xe1d54641'22cf95a4'e471a82b'bedbe0ae_u128}, + {Sign::NEG, -151, 0xde5bd6ec'7f7bc110'af6e93be'8e4c1764_u128}, + {Sign::NEG, -151, 0xdae26797'a490f80e'e44848f0'a5779499_u128}, + {Sign::NEG, -151, 0xd768f842'920f3a98'90205533'f4e70566_u128}, + {Sign::NEG, -151, 0xd3ef88ed'47f688a6'c01844ac'e3729e48_u128}, + {Sign::NEG, -151, 0xd0761997'c646e232'8151a232'4e41c7c4_u128}, + {Sign::NEG, -151, 0xccfcaa42'0d004734'e0edf74d'88cacafd_u128}, + {Sign::NEG, -151, 0xc9833aec'1c22b7a6'ec0ecc3a'5cd27e58_u128}, + {Sign::NEG, -151, 0xc609cb95'f3ae3381'afd5a7e7'0a6bf214_u128}, + {Sign::NEG, -151, 0xc2905c3f'93a2babe'39640ff4'47f81ceb_u128}, + {Sign::NEG, -151, 0xbf16ece8'fc004d55'95db88b5'422588b1_u128}, + {Sign::NEG, -151, 0xbb9d7d92'2cc6eb40'd25d952f'9beffeec_u128}, + {Sign::NEG, -151, 0xb8240e3b'25f69478'fc0bb71b'6ea03578_u128}, + {Sign::NEG, -151, 0xb4aa9ee3'e78f48f7'20076ee3'49cb7b20_u128}, + {Sign::NEG, -151, 0xb1312f8c'719108b4'4b723ba4'3353643d_u128}, + {Sign::NEG, -151, 0xadb7c034'c3fbd3a9'8b6d9b2d'a7657754_u128}, + {Sign::NEG, -151, 0xaa3e50dc'decfa9cf'ed1b0a01'987ad9b4_u128}, + {Sign::NEG, -151, 0xa6c4e184'c20c8b20'7d9c0354'6f57fc11_u128}, + {Sign::NEG, -151, 0xa34b722c'6db27794'4a12010d'0b0c4727_u128}, + {Sign::NEG, -151, 0x9fd202d3'e1c16f24'5f9e7bc4'c0f1c851_u128}, + {Sign::NEG, -151, 0x9c58937b'1e3971c9'cb62eac7'5cacde29_u128}, + {Sign::NEG, -151, 0x98df2422'231a7f7d'9a80c413'202be52a_u128}, + {Sign::NEG, -151, 0x9565b4c8'f0649838'da197c58'c3a6e445_u128}, + {Sign::NEG, -151, 0x91ec456f'8617bbf4'974e86fb'759f3988_u128}, + {Sign::NEG, -151, 0x8e72d615'e433eaa9'df415610'dadf46b3_u128}, + {Sign::NEG, -151, 0x8af966bc'0ab92451'bf135a61'0e7a1ddc_u128}, + {Sign::NEG, -151, 0x877ff761'f9a768e5'43e60366'a1cb2e09_u128}, + {Sign::NEG, -151, 0x84068807'b0feb85d'7adabf4e'9c75efce_u128}, + {Sign::NEG, -151, 0x808d18ad'30bf12b3'7112faf8'7c6591ee_u128}, + {Sign::NEG, -152, 0xfa2752a4'f1d0efc0'676043ec'6b994be5_u128}, + {Sign::NEG, -152, 0xf33473ef'12f5cfb9'9fa73d18'6649999d_u128}, + {Sign::NEG, -152, 0xec419538'c4ecc544'a53db362'aa5cc6f0_u128}, + {Sign::NEG, -152, 0xe54eb682'07b5d053'9266761d'e5e05f13_u128}, + {Sign::NEG, -152, 0xde5bd7ca'db50f0d8'81645201'b36e17ba_u128}, + {Sign::NEG, -152, 0xd768f913'3fbe26c5'8c7a112a'9a2b2a52_u128}, + {Sign::NEG, -152, 0xd0761a5b'34fd720c'cdea7b1a'0dc7ad42_u128}, + {Sign::NEG, -152, 0xc9833ba2'bb0ed2a0'5ff854b6'6e7ded1f_u128}, + {Sign::NEG, -152, 0xc2905ce9'd1f24872'5ce6604b'0911c5ed_u128}, + {Sign::NEG, -152, 0xbb9d7e30'79a7d374'def75d88'16cffc59_u128}, + {Sign::NEG, -152, 0xb4aa9f76'b22f739a'006e0982'bd8d96ef_u128}, + {Sign::NEG, -152, 0xadb7c0bc'7b8928d3'db8d1eb5'0fa7375c_u128}, + {Sign::NEG, -152, 0xa6c4e201'd5b4f314'8a9754fe'0c0073a7_u128}, + {Sign::NEG, -152, 0x9fd20346'c0b2d24e'27cf61a1'9e032f69_u128}, + {Sign::NEG, -152, 0x98df248b'3c82c672'cd77f748'9d9ef50b_u128}, + {Sign::NEG, -152, 0x91ec45cf'4924cf74'95d3c600'cf484f03_u128}, + {Sign::NEG, -152, 0x8af96712'e698ed45'9b257b3c'e3f82109_u128}, + {Sign::NEG, -152, 0x84068856'14df1fd7'f7afc1d4'792b015a_u128}, + {Sign::NEG, -153, 0xfa275331'a7eece3b'8b6a8408'31c123d8_u128}, + {Sign::NEG, -153, 0xec4195b6'47c38612'3ef142da'7335b35a_u128}, + {Sign::NEG, -153, 0xde5bd83a'093c6718'3e79062c'7cbb3b7d_u128}, + {Sign::NEG, -153, 0xd0761abc'ec597131'be870ed4'ed5b755b_u128}, + {Sign::NEG, -153, 0xc2905d3e'f11aa442'f3a09874'3d20fb64_u128}, + {Sign::NEG, -153, 0xb4aa9fc0'17800030'124ad974'bd15fbca_u128}, + {Sign::NEG, -153, 0xa6c4e240'5f8984dd'4f0b030a'9742eb00_u128}, + {Sign::NEG, -153, 0x98df24bf'c937322e'de664133'cead362d_u128}, + {Sign::NEG, -153, 0x8af9673e'54890808'f4e1bab8'3f55f5a1_u128}, + {Sign::NEG, -154, 0xfa275378'02fe0c9f'8e052253'3c713e98_u128}, + {Sign::NEG, -154, 0xde5bd871'a03259cf'129bc1c6'f293726e_u128}, + {Sign::NEG, -154, 0xc2905d69'80aef768'e0918216'6eeb17eb_u128}, + {Sign::NEG, -154, 0xa6c4e25f'a473e535'60f08720'313daa3f_u128}, + {Sign::NEG, -154, 0x8af96754'0b8122fc'fcc2ea56'6b3af38b_u128}, + {Sign::NEG, -155, 0xde5bd88d'6bad6110'3a25757e'00f4e3a0_u128}, + {Sign::NEG, -155, 0xa6c4e26f'46e91b3e'55d3f9e7'0cf177b8_u128}, + {Sign::NEG, -156, 0xde5bd89b'516ae82a'3d4aac85'125398d0_u128}, + {Sign::NEG, -157, 0xde5bd8a2'4449ac95'9ab5a849'a06f400d_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -157, 0xde5bd8b0'2a073729'0d3cc88f'd4ef34c2_u128}, + {Sign::POS, -156, 0xde5bd8b7'1ce5fd51'225916c2'b3f33c90_u128}, + {Sign::POS, -155, 0xa6c4e28e'8bd3930a'17847f98'acf08d54_u128}, + {Sign::POS, -155, 0xde5bd8c5'02a38b5e'04439783'0931fddd_u128}, + {Sign::POS, -154, 0x8af9677f'79717409'c2ab3859'13176984_u128}, + {Sign::POS, -154, 0xa6c4e29e'2e48d4cc'e454dec8'2bde52e5_u128}, + {Sign::POS, -154, 0xc2905dbe'9fd7e82f'fe1522b0'470d7d7f_u128}, + {Sign::POS, -154, 0xde5bd8e0'ce1eae6a'a6e2721f'2afc3cce_u128}, + {Sign::POS, -154, 0xfa275404'b91d27b4'75b3458e'ec3c106c_u128}, + {Sign::POS, -153, 0x8af96795'3069aa22'80bf0ff2'f6cd9f93_u128}, + {Sign::POS, -153, 0x98df2528'e2a09a29'f09cc73b'7013b906_u128}, + {Sign::POS, -153, 0xa6c4e2bd'7333640c'55ee1480'619827c4_u128}, + {Sign::POS, -153, 0xb4aaa052'e22207e5'7c2e48d7'72250b3c_u128}, + {Sign::POS, -153, 0xc2905de9'2f6c85d1'2ed8ba8c'6fa81c98_u128}, + {Sign::POS, -153, 0xd0761b80'5b12ddeb'3968c521'4f33fc4f_u128}, + {Sign::POS, -153, 0xde5bd918'6515104f'6759c94e'2d017fad_u128}, + {Sign::POS, -153, 0xec4196b1'4d731d19'84272d01'4c70fe58_u128}, + {Sign::POS, -153, 0xfa27544b'142d0465'5b4c5b5f'180b9fe1_u128}, + {Sign::POS, -152, 0x840688f2'dca16327'5c226261'10c254a4_u128}, + {Sign::POS, -152, 0x8af967c0'9e5a3178'b345ef5d'90dd6545_u128}, + {Sign::POS, -152, 0x91ec468e'cf40ed34'98ce9208'7c5cb614_u128}, + {Sign::POS, -152, 0x98df255d'6f559668'f27a0a60'56dcfe57_u128}, + {Sign::POS, -152, 0x9fd2042c'7e982d23'a6061afe'b7929f24_u128}, + {Sign::POS, -152, 0xa6c4e2fb'fd08b172'99308918'494a4a20_u128}, + {Sign::POS, -152, 0xadb7c1cb'eaa72363'b1b71c7c'ca69a844_u128}, + {Sign::POS, -152, 0xb4aaa09c'47738304'd5579f97'0cf000a9_u128}, + {Sign::POS, -152, 0xbb9d7f6d'136dd063'e9cfdf6c'f676df42_u128}, + {Sign::POS, -152, 0xc2905e3e'4e960b8e'd4ddab9f'8032bbab_u128}, + {Sign::POS, -152, 0xc9833d0f'f8ec3493'7c3ed66a'b6f39fe9_u128}, + {Sign::POS, -152, 0xd0761be2'12704b7f'c5b134a5'bb25cf2e_u128}, + {Sign::POS, -152, 0xd768fab4'9b225061'96f29dc2'c0d26ca0_u128}, + {Sign::POS, -152, 0xde5bd987'93024346'd5c0ebcf'0fa0221e_u128}, + {Sign::POS, -152, 0xe54eb85a'fa10243d'67d9fb73'02d3c705_u128}, + {Sign::POS, -152, 0xec41972e'd04bf353'32fbabf2'095106f1_u128}, + {Sign::POS, -152, 0xf3347603'15b5b096'1ce3df2a'a59b0889_u128}, + {Sign::POS, -152, 0xfa2754d7'ca4d5c14'0b507996'6dd5143e_u128}, + {Sign::POS, -151, 0x808d19d6'77097aed'71ffb125'05e19d89_u128}, + {Sign::POS, -151, 0x84068941'40833efc'4657417a'9e657eae_u128}, + {Sign::POS, -151, 0x877ff8ac'4193fa3d'758de3f1'68f9f8c9_u128}, + {Sign::POS, -151, 0x8af96817'7a3bacb7'f2828ffc'57f43581_u128}, + {Sign::POS, -151, 0x8e72d782'ea7a5672'b0143e5b'e77b1053_u128}, + {Sign::POS, -151, 0x91ec46ee'924ff774'a121e91e'1d8769ef_u128}, + {Sign::POS, -151, 0x9565b65a'71bc8fc4'b88a8b9e'89e47b9c_u128}, + {Sign::POS, -151, 0x98df25c6'88c01f69'e92d2286'46302a9c_u128}, + {Sign::POS, -151, 0x9c589532'd75aa66b'25e8abcb'f5db5b8c_u128}, + {Sign::POS, -151, 0x9fd2049f'5d8c24cf'619c26b3'c62a45c8_u128}, + {Sign::POS, -151, 0xa34b740c'1b549a9d'8f2693cf'6e34c6cc_u128}, + {Sign::POS, -151, 0xa6c4e379'10b407dc'a166f4fe'2ee6b59a_u128}, + {Sign::POS, -151, 0xaa3e52e6'3daa6c93'8b3c4d6c'd3003616_u128}, + {Sign::POS, -151, 0xadb7c253'a237c8c9'3f85a195'af160c71_u128}, + {Sign::POS, -151, 0xb13131c1'3e5c1c84'b121f740'a191f084_u128}, + {Sign::POS, -151, 0xb4aaa12f'121767cc'd2f05583'12b2e136_u128}, + {Sign::POS, -151, 0xb824109d'1d69aaa8'97cfc4bf'f48d77de_u128}, + {Sign::POS, -151, 0xbb9d800b'6052e51e'f29f4ea7'c30c3ba5_u128}, + {Sign::POS, -151, 0xbf16ef79'dad31736'd63dfe38'83eff4e9_u128}, + {Sign::POS, -151, 0xc2905ee8'8cea40f7'358adfbd'c6d0009f_u128}, + {Sign::POS, -151, 0xc609ce57'76986267'036500d0'a51aa3b6_u128}, + {Sign::POS, -151, 0xc9833dc6'97dd7b8d'32ab7057'c2155e78_u128}, + {Sign::POS, -151, 0xccfcad35'f0b98c70'b63d3e87'4add3ff0_u128}, + {Sign::POS, -151, 0xd0761ca5'812c9518'80f97ce0'f6673948_u128}, + {Sign::POS, -151, 0xd3ef8c15'4936958b'85bf3e34'0580712d_u128}, + {Sign::POS, -151, 0xd768fb85'48d78dd0'b76d969d'42ce9734_u128}, + {Sign::POS, -151, 0xdae26af5'800f7def'08e39b87'02d0373a_u128}, + {Sign::POS, -151, 0xde5bda65'eede65ed'6d0063a9'23dd0cc6_u128}, }}; // > P = fpminimax(log10(1 + x)/x, 3, [|128...|], [-0x1.0002143p-29 , 0x1p-29]); @@ -702,10 +706,10 @@ const LogRR LOG10_TABLE = { // > dirtyinfnorm(log10(1 + x)/x - P, [-0x1.0002143p-29 , 0x1p-29]); // 0x1.64fb8...p-123 const Float128 BIG_COEFFS[4]{ - {Sign::NEG, -131, MType({0x6903c4ce1582517d, 0xde5bd8a9373f89a7})}, - {Sign::POS, -130, MType({0xb8a21791624e2e8a, 0x943d3b1b7a1af679})}, - {Sign::NEG, -130, MType({0x355baaafabc25990, 0xde5bd8a937287195})}, - {Sign::POS, -129, MType({0x355baaafad33dbd9, 0xde5bd8a937287195})}, + {Sign::NEG, -131, 0xde5bd8a9'373f89a7'6903c4ce'1582517d_u128}, + {Sign::POS, -130, 0x943d3b1b'7a1af679'b8a21791'624e2e8a_u128}, + {Sign::NEG, -130, 0xde5bd8a9'37287195'355baaaf'abc25990_u128}, + {Sign::POS, -129, 0xde5bd8a9'37287195'355baaaf'ad33dbd9_u128}, }; // Reuse the output of the fast pass range reduction. diff --git a/libc/src/math/generic/log1p.cpp b/libc/src/math/generic/log1p.cpp index 0edab70124c955..12710cfe0de21a 100644 --- a/libc/src/math/generic/log1p.cpp +++ b/libc/src/math/generic/log1p.cpp @@ -14,6 +14,7 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" +#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "common_constants.h" @@ -22,19 +23,24 @@ namespace LIBC_NAMESPACE { // 128-bit precision dyadic floating point numbers. using Float128 = typename fputil::DyadicFloat<128>; -using MType = typename Float128::MantissaType; using Sign = fputil::Sign; +using LIBC_NAMESPACE::operator""_u128; namespace { // Extra errors from P is from using x^2 to reduce evaluation latency. constexpr double P_ERR = 0x1.0p-50; -// log(2) with 128-bit prepcision generated by SageMath with: -// sage: (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent(); -// sage: print("MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})"); +// log(2) with 128-bit precision generated by SageMath with: +// def format_hex(value): +// l = hex(value)[2:] +// n = 8 +// x = [l[i:i + n] for i in range(0, len(l), n)] +// return "0x" + "'".join(x) + "_uint128" +// (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent(); +// print(format_hex(m)); const Float128 LOG_2(Sign::POS, /*exponent=*/-128, /*mantissa=*/ - MType({0xc9e3b39803f2f6af, 0xb17217f7d1cf79ab})); + 0xb17217f7'd1cf79ab'c9e3b398'03f2f6af_u128); // R1[i] = 2^-8 * nearestint( 2^8 / (1 + i * 2^-7) ) constexpr double R1[129] = { @@ -245,139 +251,137 @@ constexpr double P_COEFFS[6] = {-0x1p-1, // for i in range(129): // r = 2^-8 * round( 2^8 / (1 + i*2^(-7)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); -// print("{Sign::POS,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) % -// 2^64), -// "})},"); +// print("{Sign::POS,", e, ", format_hex(m), "},"); const Float128 LOG_R1[129] = { - {Sign::POS, 0, MType(0)}, - {Sign::POS, -134, MType({0x662d417ced007a46, 0x8080abac46f38946})}, - {Sign::POS, -133, MType({0x91d082dce3ddcd38, 0x8102b2c49ac23a4f})}, - {Sign::POS, -133, MType({0xda5f3cc0b3251dbd, 0xc24929464655f45c})}, - {Sign::POS, -132, MType({0xb9e3aea6c444ef07, 0x820aec4f3a222380})}, - {Sign::POS, -132, MType({0x521016bd904dc968, 0xa33576a16f1f4c64})}, - {Sign::POS, -132, MType({0x27cca0bcc06c2f92, 0xb3e4a796a5dac208})}, - {Sign::POS, -132, MType({0xa9dda17056e45ed5, 0xd5779687d887e0d1})}, - {Sign::POS, -132, MType({0x606d89093278a939, 0xf7518e0035c3dd83})}, - {Sign::POS, -131, MType({0xa7c9859530a45153, 0x8cb9de8a32ab368a})}, - {Sign::POS, -131, MType({0x976d3b5b45f6ca0b, 0x9defad3e8f73217a})}, - {Sign::POS, -131, MType({0x3e858f08597b3a69, 0xa6988ae903f562ed})}, - {Sign::POS, -131, MType({0x6a677b4c8bec22e1, 0xb8069857560707a3})}, - {Sign::POS, -131, MType({0xeaf51f66692844ba, 0xc99af2eaca4c4570})}, - {Sign::POS, -131, MType({0x46bbf837b4d320c6, 0xd273b2058de1bd49})}, - {Sign::POS, -131, MType({0x196ab34ce0bccd12, 0xe442c00de2591b47})}, - {Sign::POS, -131, MType({0x3f4e2e660317d55f, 0xed393b1c22351280})}, - {Sign::POS, -131, MType({0xc17bd40d8d9291ec, 0xff4489cedeab2ca6})}, - {Sign::POS, -130, MType({0x9c5a0fe396f40f1e, 0x88bc74113f23def1})}, - {Sign::POS, -130, MType({0x88713268840cbcc0, 0x8d515bf11fb94f1c})}, - {Sign::POS, -130, MType({0x65c0da506a088484, 0x968b08643409ceb6})}, - {Sign::POS, -130, MType({0x411a5b944aca8708, 0x9b2fe580ac80b17d})}, - {Sign::POS, -130, MType({0xa9fb6cf0ecb411b7, 0xa489ec199dab06f2})}, - {Sign::POS, -130, MType({0xcad2fb8d48054ae0, 0xa93f2f250dac67d1})}, - {Sign::POS, -130, MType({0x149767e410316d2c, 0xadfa035aa1ed8fdc})}, - {Sign::POS, -130, MType({0x34c7bc3d32750fde, 0xb780945bab55dce4})}, - {Sign::POS, -130, MType({0x8f6ebcfb2016a439, 0xbc4c6c2a226399ef})}, - {Sign::POS, -130, MType({0xaa8b6997a402bf30, 0xc5f57f59c7f46155})}, - {Sign::POS, -130, MType({0x2c507fb7a3d0bf6a, 0xcad2d6e7b80bf914})}, - {Sign::POS, -130, MType({0xd0cb02f33f79c16c, 0xcfb6203844b3209a})}, - {Sign::POS, -130, MType({0x58a98f2ad65bee9b, 0xd98ec2bade71e539})}, - {Sign::POS, -130, MType({0x4d57da945b5d0aaa, 0xde8439c1dec56877})}, - {Sign::POS, -130, MType({0x4e9a750b6b68781d, 0xe37fde37807b84e3})}, - {Sign::POS, -130, MType({0xc524848e3443e040, 0xe881bf932af3dac0})}, - {Sign::POS, -130, MType({0x3b020fa1820c9492, 0xf29877ff38809091})}, - {Sign::POS, -130, MType({0x54d2238f75f969b1, 0xf7ad6f26e7ff2ef7})}, - {Sign::POS, -130, MType({0xca0cdf301431b60f, 0xfcc8e3659d9bcbec})}, - {Sign::POS, -129, MType({0xf5bd0b5b3479d5f4, 0x80f572b1363487b9})}, - {Sign::POS, -129, MType({0x163ceae88f720f1e, 0x86216b3b0b17188b})}, - {Sign::POS, -129, MType({0x9c5a0fe396f40f1e, 0x88bc74113f23def1})}, - {Sign::POS, -129, MType({0xf7a5168126a58b9a, 0x8b5ae65d67db9acd})}, - {Sign::POS, -129, MType({0x5147bdb6ddcaf59c, 0x8dfccb1ad35ca6ed})}, - {Sign::POS, -129, MType({0xae91aeba609c8877, 0x90a22b6875c6a1f7})}, - {Sign::POS, -129, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, - {Sign::POS, -129, MType({0x4a5004f3ef063313, 0x95f783e6e49a9cfa})}, - {Sign::POS, -129, MType({0xd878bbe3d392be25, 0x9b5b3bb5f088b766})}, - {Sign::POS, -129, MType({0x5b035eae273a855f, 0x9e1293b9998c1daa})}, - {Sign::POS, -129, MType({0xbb2438273918db7e, 0xa0cda11eaf46390d})}, - {Sign::POS, -129, MType({0xf698298adddd7f32, 0xa38c6e138e20d831})}, - {Sign::POS, -129, MType({0xe4f5275c2d15c21f, 0xa64f04f0b961df76})}, - {Sign::POS, -129, MType({0x8164c759686a2209, 0xa9157039c51ebe70})}, - {Sign::POS, -129, MType({0xf72ea07749ce6bd3, 0xabdfba9e468fd6f6})}, - {Sign::POS, -129, MType({0x7dd6e688ebb13b03, 0xaeadeefacaf97d35})}, - {Sign::POS, -129, MType({0x18ce51fff99479cd, 0xb1801859d56249dc})}, - {Sign::POS, -129, MType({0x2756eba00bc33978, 0xb45641f4e350a0d3})}, - {Sign::POS, -129, MType({0xbe1116c3466beb6d, 0xb730773578cb90b2})}, - {Sign::POS, -129, MType({0x49dc60b2b059a60b, 0xba0ec3b633dd8b09})}, - {Sign::POS, -129, MType({0x2efd17781bb3afec, 0xbcf13343e7d9ec7d})}, - {Sign::POS, -129, MType({0x37eda996244bccb0, 0xbfd7d1dec0a8df6f})}, - {Sign::POS, -129, MType({0x33337789d592e296, 0xc2c2abbb6e5fd56f})}, - {Sign::POS, -129, MType({0x1a18fb8f9f9ef280, 0xc5b1cd44596fa51e})}, - {Sign::POS, -129, MType({0x688ce7c1a75e341a, 0xc8a5431adfb44ca5})}, - {Sign::POS, -129, MType({0x2d7e9307c70c0668, 0xcb9d1a189ab56e76})}, - {Sign::POS, -129, MType({0x2d7e9307c70c0668, 0xcb9d1a189ab56e76})}, - {Sign::POS, -129, MType({0xef2f3f4f861ad6a9, 0xce995f50af69d861})}, - {Sign::POS, -129, MType({0x7f9d79f51dcc7301, 0xd19a201127d3c645})}, - {Sign::POS, -129, MType({0x5f53bd2e406e66e7, 0xd49f69e456cf1b79})}, - {Sign::POS, -129, MType({0xad88bba7d0cee8e0, 0xd7a94a92466e833a})}, - {Sign::POS, -129, MType({0x96c20cca6efe2ac5, 0xdab7d02231484a92})}, - {Sign::POS, -129, MType({0xf40a666c87842843, 0xddcb08dc0717d85b})}, - {Sign::POS, -129, MType({0x7fe8e1802aba24d6, 0xe0e30349fd1cec80})}, - {Sign::POS, -129, MType({0x7fe8e1802aba24d6, 0xe0e30349fd1cec80})}, - {Sign::POS, -129, MType({0x3eadb651b49ac53a, 0xe3ffce3a2aa64922})}, - {Sign::POS, -129, MType({0x304e1653e71d9973, 0xe72178c0323a1a0f})}, - {Sign::POS, -129, MType({0xe9a767a80d6d97e8, 0xea481236f7d35baf})}, - {Sign::POS, -129, MType({0x4f91cf4b33e42998, 0xed73aa4264b0ade9})}, - {Sign::POS, -129, MType({0x4f91cf4b33e42998, 0xed73aa4264b0ade9})}, - {Sign::POS, -129, MType({0xfc66eb6408ff6433, 0xf0a450d139366ca6})}, - {Sign::POS, -129, MType({0xac8d42f78d3e65d3, 0xf3da161eed6b9aaf})}, - {Sign::POS, -129, MType({0x5a470250d40ebe90, 0xf7150ab5a09f27f4})}, - {Sign::POS, -129, MType({0x5a470250d40ebe90, 0xf7150ab5a09f27f4})}, - {Sign::POS, -129, MType({0xb780a545a1b54dcf, 0xfa553f7018c966f2})}, - {Sign::POS, -129, MType({0x8f05924d258c14c5, 0xfd9ac57bd244217e})}, - {Sign::POS, -128, MType({0x89d1b09c70c4010a, 0x8072d72d903d588b})}, - {Sign::POS, -128, MType({0x89d1b09c70c4010a, 0x8072d72d903d588b})}, - {Sign::POS, -128, MType({0x30d58c3f7e2ea1f, 0x821b05f3b01d6774})}, - {Sign::POS, -128, MType({0x20f6fafe8fbb68b9, 0x83c5f8299e2b4091})}, - {Sign::POS, -128, MType({0xe21f9f89c1ab80b2, 0x8573b71682a7d21a})}, - {Sign::POS, -128, MType({0xe21f9f89c1ab80b2, 0x8573b71682a7d21a})}, - {Sign::POS, -128, MType({0x1e005d06dbfa8f8, 0x87244c308e670a66})}, - {Sign::POS, -128, MType({0x223111a707b6de2c, 0x88d7c11e3ad53cdc})}, - {Sign::POS, -128, MType({0x223111a707b6de2c, 0x88d7c11e3ad53cdc})}, - {Sign::POS, -128, MType({0x2eb628dba173c82d, 0x8a8e1fb794b09134})}, - {Sign::POS, -128, MType({0xbe2ad19415fe25a5, 0x8c47720791e53313})}, - {Sign::POS, -128, MType({0xbe2ad19415fe25a5, 0x8c47720791e53313})}, - {Sign::POS, -128, MType({0xbddae1ccce247838, 0x8e03c24d73003959})}, - {Sign::POS, -128, MType({0x9b00bf167e95da67, 0x8fc31afe30b2c6de})}, - {Sign::POS, -128, MType({0x9b00bf167e95da67, 0x8fc31afe30b2c6de})}, - {Sign::POS, -128, MType({0x9b92199ed1a4bab1, 0x918586c5f5e4bf01})}, - {Sign::POS, -128, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, - {Sign::POS, -128, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, - {Sign::POS, -128, MType({0xf3cbc416a2418012, 0x9513c36876083695})}, - {Sign::POS, -128, MType({0xbe1188fbc94e2f15, 0x96dfaabd86fa1646})}, - {Sign::POS, -128, MType({0xbe1188fbc94e2f15, 0x96dfaabd86fa1646})}, - {Sign::POS, -128, MType({0x1d2f89321647b358, 0x98aed221a03458b6})}, - {Sign::POS, -128, MType({0x1d2f89321647b358, 0x98aed221a03458b6})}, - {Sign::POS, -128, MType({0xe549f9aaea3cb5e1, 0x9a81456cec642e0f})}, - {Sign::POS, -128, MType({0xa2554b2dd4619e63, 0x9c5710b8cbb73a42})}, - {Sign::POS, -128, MType({0xa2554b2dd4619e63, 0x9c5710b8cbb73a42})}, - {Sign::POS, -128, MType({0x30603d87b6df81ad, 0x9e304061b5fda919})}, - {Sign::POS, -128, MType({0x30603d87b6df81ad, 0x9e304061b5fda919})}, - {Sign::POS, -128, MType({0x67879c5a30cd1242, 0xa00ce1092e5498c3})}, - {Sign::POS, -128, MType({0x67879c5a30cd1242, 0xa00ce1092e5498c3})}, - {Sign::POS, -128, MType({0xb7efae08e597e16, 0xa1ecff97c91e267b})}, - {Sign::POS, -128, MType({0x83594fab088c0d65, 0xa3d0a93f45169a4a})}, - {Sign::POS, -128, MType({0x83594fab088c0d65, 0xa3d0a93f45169a4a})}, - {Sign::POS, -128, MType({0xaf6a62a0dec6e073, 0xa5b7eb7cb860fb88})}, - {Sign::POS, -128, MType({0xaf6a62a0dec6e073, 0xa5b7eb7cb860fb88})}, - {Sign::POS, -128, MType({0x49362382a768847a, 0xa7a2d41ad270c9d7})}, - {Sign::POS, -128, MType({0x49362382a768847a, 0xa7a2d41ad270c9d7})}, - {Sign::POS, -128, MType({0x8ba4aea614d05701, 0xa991713433c2b998})}, - {Sign::POS, -128, MType({0x8ba4aea614d05701, 0xa991713433c2b998})}, - {Sign::POS, -128, MType({0x7fe6607ba902ef3c, 0xab83d135dc633301})}, - {Sign::POS, -128, MType({0x7fe6607ba902ef3c, 0xab83d135dc633301})}, - {Sign::POS, -128, MType({0xd60864fd949b4bd3, 0xad7a02e1b24efd31})}, - {Sign::POS, -128, MType({0xd60864fd949b4bd3, 0xad7a02e1b24efd31})}, - {Sign::POS, -128, MType({0x66d235ee63073dd, 0xaf74155120c9011c})}, - {Sign::POS, -128, MType({0x66d235ee63073dd, 0xaf74155120c9011c})}, - {Sign::POS, 0, MType(0)}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -134, 0x8080abac'46f38946'662d417c'ed007a46_u128}, + {Sign::POS, -133, 0x8102b2c4'9ac23a4f'91d082dc'e3ddcd38_u128}, + {Sign::POS, -133, 0xc2492946'4655f45c'da5f3cc0'b3251dbd_u128}, + {Sign::POS, -132, 0x820aec4f'3a222380'b9e3aea6'c444ef07_u128}, + {Sign::POS, -132, 0xa33576a1'6f1f4c64'521016bd'904dc968_u128}, + {Sign::POS, -132, 0xb3e4a796'a5dac208'27cca0bc'c06c2f92_u128}, + {Sign::POS, -132, 0xd5779687'd887e0d1'a9dda170'56e45ed5_u128}, + {Sign::POS, -132, 0xf7518e00'35c3dd83'606d8909'3278a939_u128}, + {Sign::POS, -131, 0x8cb9de8a'32ab368a'a7c98595'30a45153_u128}, + {Sign::POS, -131, 0x9defad3e'8f73217a'976d3b5b'45f6ca0b_u128}, + {Sign::POS, -131, 0xa6988ae9'03f562ed'3e858f08'597b3a69_u128}, + {Sign::POS, -131, 0xb8069857'560707a3'6a677b4c'8bec22e1_u128}, + {Sign::POS, -131, 0xc99af2ea'ca4c4570'eaf51f66'692844ba_u128}, + {Sign::POS, -131, 0xd273b205'8de1bd49'46bbf837'b4d320c6_u128}, + {Sign::POS, -131, 0xe442c00d'e2591b47'196ab34c'e0bccd12_u128}, + {Sign::POS, -131, 0xed393b1c'22351280'3f4e2e66'0317d55f_u128}, + {Sign::POS, -131, 0xff4489ce'deab2ca6'c17bd40d'8d9291ec_u128}, + {Sign::POS, -130, 0x88bc7411'3f23def1'9c5a0fe3'96f40f1e_u128}, + {Sign::POS, -130, 0x8d515bf1'1fb94f1c'88713268'840cbcc0_u128}, + {Sign::POS, -130, 0x968b0864'3409ceb6'65c0da50'6a088484_u128}, + {Sign::POS, -130, 0x9b2fe580'ac80b17d'411a5b94'4aca8708_u128}, + {Sign::POS, -130, 0xa489ec19'9dab06f2'a9fb6cf0'ecb411b7_u128}, + {Sign::POS, -130, 0xa93f2f25'0dac67d1'cad2fb8d'48054ae0_u128}, + {Sign::POS, -130, 0xadfa035a'a1ed8fdc'149767e4'10316d2c_u128}, + {Sign::POS, -130, 0xb780945b'ab55dce4'34c7bc3d'32750fde_u128}, + {Sign::POS, -130, 0xbc4c6c2a'226399ef'8f6ebcfb'2016a439_u128}, + {Sign::POS, -130, 0xc5f57f59'c7f46155'aa8b6997'a402bf30_u128}, + {Sign::POS, -130, 0xcad2d6e7'b80bf914'2c507fb7'a3d0bf6a_u128}, + {Sign::POS, -130, 0xcfb62038'44b3209a'd0cb02f3'3f79c16c_u128}, + {Sign::POS, -130, 0xd98ec2ba'de71e539'58a98f2a'd65bee9b_u128}, + {Sign::POS, -130, 0xde8439c1'dec56877'4d57da94'5b5d0aaa_u128}, + {Sign::POS, -130, 0xe37fde37'807b84e3'4e9a750b'6b68781d_u128}, + {Sign::POS, -130, 0xe881bf93'2af3dac0'c524848e'3443e040_u128}, + {Sign::POS, -130, 0xf29877ff'38809091'3b020fa1'820c9492_u128}, + {Sign::POS, -130, 0xf7ad6f26'e7ff2ef7'54d2238f'75f969b1_u128}, + {Sign::POS, -130, 0xfcc8e365'9d9bcbec'ca0cdf30'1431b60f_u128}, + {Sign::POS, -129, 0x80f572b1'363487b9'f5bd0b5b'3479d5f4_u128}, + {Sign::POS, -129, 0x86216b3b'0b17188b'163ceae8'8f720f1e_u128}, + {Sign::POS, -129, 0x88bc7411'3f23def1'9c5a0fe3'96f40f1e_u128}, + {Sign::POS, -129, 0x8b5ae65d'67db9acd'f7a51681'26a58b9a_u128}, + {Sign::POS, -129, 0x8dfccb1a'd35ca6ed'5147bdb6'ddcaf59c_u128}, + {Sign::POS, -129, 0x90a22b68'75c6a1f7'ae91aeba'609c8877_u128}, + {Sign::POS, -129, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, + {Sign::POS, -129, 0x95f783e6'e49a9cfa'4a5004f3'ef063313_u128}, + {Sign::POS, -129, 0x9b5b3bb5'f088b766'd878bbe3'd392be25_u128}, + {Sign::POS, -129, 0x9e1293b9'998c1daa'5b035eae'273a855f_u128}, + {Sign::POS, -129, 0xa0cda11e'af46390d'bb243827'3918db7e_u128}, + {Sign::POS, -129, 0xa38c6e13'8e20d831'f698298a'dddd7f32_u128}, + {Sign::POS, -129, 0xa64f04f0'b961df76'e4f5275c'2d15c21f_u128}, + {Sign::POS, -129, 0xa9157039'c51ebe70'8164c759'686a2209_u128}, + {Sign::POS, -129, 0xabdfba9e'468fd6f6'f72ea077'49ce6bd3_u128}, + {Sign::POS, -129, 0xaeadeefa'caf97d35'7dd6e688'ebb13b03_u128}, + {Sign::POS, -129, 0xb1801859'd56249dc'18ce51ff'f99479cd_u128}, + {Sign::POS, -129, 0xb45641f4'e350a0d3'2756eba0'0bc33978_u128}, + {Sign::POS, -129, 0xb7307735'78cb90b2'be1116c3'466beb6d_u128}, + {Sign::POS, -129, 0xba0ec3b6'33dd8b09'49dc60b2'b059a60b_u128}, + {Sign::POS, -129, 0xbcf13343'e7d9ec7d'2efd1778'1bb3afec_u128}, + {Sign::POS, -129, 0xbfd7d1de'c0a8df6f'37eda996'244bccb0_u128}, + {Sign::POS, -129, 0xc2c2abbb'6e5fd56f'33337789'd592e296_u128}, + {Sign::POS, -129, 0xc5b1cd44'596fa51e'1a18fb8f'9f9ef280_u128}, + {Sign::POS, -129, 0xc8a5431a'dfb44ca5'688ce7c1'a75e341a_u128}, + {Sign::POS, -129, 0xcb9d1a18'9ab56e76'2d7e9307'c70c0668_u128}, + {Sign::POS, -129, 0xcb9d1a18'9ab56e76'2d7e9307'c70c0668_u128}, + {Sign::POS, -129, 0xce995f50'af69d861'ef2f3f4f'861ad6a9_u128}, + {Sign::POS, -129, 0xd19a2011'27d3c645'7f9d79f5'1dcc7301_u128}, + {Sign::POS, -129, 0xd49f69e4'56cf1b79'5f53bd2e'406e66e7_u128}, + {Sign::POS, -129, 0xd7a94a92'466e833a'ad88bba7'd0cee8e0_u128}, + {Sign::POS, -129, 0xdab7d022'31484a92'96c20cca'6efe2ac5_u128}, + {Sign::POS, -129, 0xddcb08dc'0717d85b'f40a666c'87842843_u128}, + {Sign::POS, -129, 0xe0e30349'fd1cec80'7fe8e180'2aba24d6_u128}, + {Sign::POS, -129, 0xe0e30349'fd1cec80'7fe8e180'2aba24d6_u128}, + {Sign::POS, -129, 0xe3ffce3a'2aa64922'3eadb651'b49ac53a_u128}, + {Sign::POS, -129, 0xe72178c0'323a1a0f'304e1653'e71d9973_u128}, + {Sign::POS, -129, 0xea481236'f7d35baf'e9a767a8'0d6d97e8_u128}, + {Sign::POS, -129, 0xed73aa42'64b0ade9'4f91cf4b'33e42998_u128}, + {Sign::POS, -129, 0xed73aa42'64b0ade9'4f91cf4b'33e42998_u128}, + {Sign::POS, -129, 0xf0a450d1'39366ca6'fc66eb64'08ff6433_u128}, + {Sign::POS, -129, 0xf3da161e'ed6b9aaf'ac8d42f7'8d3e65d3_u128}, + {Sign::POS, -129, 0xf7150ab5'a09f27f4'5a470250'd40ebe90_u128}, + {Sign::POS, -129, 0xf7150ab5'a09f27f4'5a470250'd40ebe90_u128}, + {Sign::POS, -129, 0xfa553f70'18c966f2'b780a545'a1b54dcf_u128}, + {Sign::POS, -129, 0xfd9ac57b'd244217e'8f05924d'258c14c5_u128}, + {Sign::POS, -128, 0x8072d72d'903d588b'89d1b09c'70c4010a_u128}, + {Sign::POS, -128, 0x8072d72d'903d588b'89d1b09c'70c4010a_u128}, + {Sign::POS, -128, 0x821b05f3'b01d6774'030d58c3'f7e2ea1f_u128}, + {Sign::POS, -128, 0x83c5f829'9e2b4091'20f6fafe'8fbb68b9_u128}, + {Sign::POS, -128, 0x8573b716'82a7d21a'e21f9f89'c1ab80b2_u128}, + {Sign::POS, -128, 0x8573b716'82a7d21a'e21f9f89'c1ab80b2_u128}, + {Sign::POS, -128, 0x87244c30'8e670a66'01e005d0'6dbfa8f8_u128}, + {Sign::POS, -128, 0x88d7c11e'3ad53cdc'223111a7'07b6de2c_u128}, + {Sign::POS, -128, 0x88d7c11e'3ad53cdc'223111a7'07b6de2c_u128}, + {Sign::POS, -128, 0x8a8e1fb7'94b09134'2eb628db'a173c82d_u128}, + {Sign::POS, -128, 0x8c477207'91e53313'be2ad194'15fe25a5_u128}, + {Sign::POS, -128, 0x8c477207'91e53313'be2ad194'15fe25a5_u128}, + {Sign::POS, -128, 0x8e03c24d'73003959'bddae1cc'ce247838_u128}, + {Sign::POS, -128, 0x8fc31afe'30b2c6de'9b00bf16'7e95da67_u128}, + {Sign::POS, -128, 0x8fc31afe'30b2c6de'9b00bf16'7e95da67_u128}, + {Sign::POS, -128, 0x918586c5'f5e4bf01'9b92199e'd1a4bab1_u128}, + {Sign::POS, -128, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, + {Sign::POS, -128, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, + {Sign::POS, -128, 0x9513c368'76083695'f3cbc416'a2418012_u128}, + {Sign::POS, -128, 0x96dfaabd'86fa1646'be1188fb'c94e2f15_u128}, + {Sign::POS, -128, 0x96dfaabd'86fa1646'be1188fb'c94e2f15_u128}, + {Sign::POS, -128, 0x98aed221'a03458b6'1d2f8932'1647b358_u128}, + {Sign::POS, -128, 0x98aed221'a03458b6'1d2f8932'1647b358_u128}, + {Sign::POS, -128, 0x9a81456c'ec642e0f'e549f9aa'ea3cb5e1_u128}, + {Sign::POS, -128, 0x9c5710b8'cbb73a42'a2554b2d'd4619e63_u128}, + {Sign::POS, -128, 0x9c5710b8'cbb73a42'a2554b2d'd4619e63_u128}, + {Sign::POS, -128, 0x9e304061'b5fda919'30603d87'b6df81ad_u128}, + {Sign::POS, -128, 0x9e304061'b5fda919'30603d87'b6df81ad_u128}, + {Sign::POS, -128, 0xa00ce109'2e5498c3'67879c5a'30cd1242_u128}, + {Sign::POS, -128, 0xa00ce109'2e5498c3'67879c5a'30cd1242_u128}, + {Sign::POS, -128, 0xa1ecff97'c91e267b'0b7efae0'8e597e16_u128}, + {Sign::POS, -128, 0xa3d0a93f'45169a4a'83594fab'088c0d65_u128}, + {Sign::POS, -128, 0xa3d0a93f'45169a4a'83594fab'088c0d65_u128}, + {Sign::POS, -128, 0xa5b7eb7c'b860fb88'af6a62a0'dec6e073_u128}, + {Sign::POS, -128, 0xa5b7eb7c'b860fb88'af6a62a0'dec6e073_u128}, + {Sign::POS, -128, 0xa7a2d41a'd270c9d7'49362382'a768847a_u128}, + {Sign::POS, -128, 0xa7a2d41a'd270c9d7'49362382'a768847a_u128}, + {Sign::POS, -128, 0xa9917134'33c2b998'8ba4aea6'14d05701_u128}, + {Sign::POS, -128, 0xa9917134'33c2b998'8ba4aea6'14d05701_u128}, + {Sign::POS, -128, 0xab83d135'dc633301'7fe6607b'a902ef3c_u128}, + {Sign::POS, -128, 0xab83d135'dc633301'7fe6607b'a902ef3c_u128}, + {Sign::POS, -128, 0xad7a02e1'b24efd31'd60864fd'949b4bd3_u128}, + {Sign::POS, -128, 0xad7a02e1'b24efd31'd60864fd'949b4bd3_u128}, + {Sign::POS, -128, 0xaf741551'20c9011c'066d235e'e63073dd_u128}, + {Sign::POS, -128, 0xaf741551'20c9011c'066d235e'e63073dd_u128}, + {Sign::POS, 0, 0_u128}, }; // Logarithm range reduction - Step 2: @@ -431,196 +435,196 @@ constexpr double S2[198] = { // r = 2^-18 * round( 2^18 / (1 + i*2^(-14)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", -// MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); +// format_hex(m), "},"); const Float128 LOG_R2[198] = { - {Sign::NEG, -135, MType({0xa0e061c5f7431c5e, 0xb67dab2a1a5742a4})}, - {Sign::NEG, -135, MType({0x5d5bfe7b969ed6ec, 0xb4807f24af682939})}, - {Sign::NEG, -135, MType({0x4d08702ddfabc23f, 0xb2834b35b4d54d5f})}, - {Sign::NEG, -135, MType({0xd4d366508b9953df, 0xb0860f5ceba9be95})}, - {Sign::NEG, -135, MType({0xac18a289f8f214a9, 0xae68f71aa09e8847})}, - {Sign::NEG, -135, MType({0xd5b42054abb88c45, 0xac6baaeed676e8f1})}, - {Sign::NEG, -135, MType({0x9809d58ee484964, 0xaa6e56d87cd632d6})}, - {Sign::NEG, -135, MType({0xb9e6fc7c72f06d73, 0xa870fad754bb8791})}, - {Sign::NEG, -135, MType({0x6f78d6d0105c00e2, 0xa67396eb1f231892})}, - {Sign::NEG, -135, MType({0x28f712629209148, 0xa4762b139d0626e7})}, - {Sign::NEG, -135, MType({0xc98d898ef172df02, 0xa258dfd10aedaa67})}, - {Sign::NEG, -135, MType({0xfcc37c3c3062bfa1, 0xa05b63a373e60a83})}, - {Sign::NEG, -135, MType({0x3eb450db05763c36, 0x9e5ddf89cf42f501})}, - {Sign::NEG, -135, MType({0x7146a86fd458b775, 0x9c605383ddf1b88c})}, - {Sign::NEG, -135, MType({0xc20a0c9281474436, 0x9a62bf9160dcb286})}, - {Sign::NEG, -135, MType({0xcdc57316ec4aebc3, 0x986523b218eb4ed6})}, - {Sign::NEG, -135, MType({0xc060dad74cef4273, 0x96677fe5c70207b9})}, - {Sign::NEG, -135, MType({0xed8def1a3e433499, 0x9449f92d2ff44633})}, - {Sign::NEG, -135, MType({0x3ce7a1f85c27b4fc, 0x924c45073220b5e0})}, - {Sign::NEG, -135, MType({0xf2ca893449f7f2cb, 0x904e88f368fea63f})}, - {Sign::NEG, -135, MType({0x8d77d9fabd2853cf, 0x8e50c4f1956699ed})}, - {Sign::NEG, -135, MType({0x93e828d75b58ded4, 0x8c52f901782e20ec})}, - {Sign::NEG, -135, MType({0x9f9605b053c5acf0, 0x8a552522d227d87a})}, - {Sign::NEG, -135, MType({0x62a149393bca7241, 0x8857495564236ae0})}, - {Sign::NEG, -135, MType({0xaea6b56ce89203d4, 0x86398719b66bac7c})}, - {Sign::NEG, -135, MType({0x242bd86d00609b2, 0x843b9aef044e4dcc})}, - {Sign::NEG, -135, MType({0xdaabf92774bac84e, 0x823da6d4c89c6927})}, - {Sign::NEG, -135, MType({0xa1c6f3fc242ef8d0, 0x803faacac419abf2})}, - {Sign::NEG, -136, MType({0xa225ebc02e6d9dd4, 0xfc834da16f0d9f57})}, - {Sign::NEG, -136, MType({0xc33f6ad340ae18a9, 0xf88735ccc7433381})}, - {Sign::NEG, -136, MType({0x70b2a4d38a242244, 0xf48b0e171249b6bc})}, - {Sign::NEG, -136, MType({0x1d54819048b811b0, 0xf08ed67fd190e280})}, - {Sign::NEG, -136, MType({0x9c21b650afe9ede0, 0xec52ca07ed95f236})}, - {Sign::NEG, -136, MType({0x935519c96d30e463, 0xe85671adecd28aac})}, - {Sign::NEG, -136, MType({0xba88f6f2e2672cfe, 0xe45a0970dc912ca7})}, - {Sign::NEG, -136, MType({0xb1a8b84657ae069, 0xe05d91503e298bc8})}, - {Sign::NEG, -136, MType({0xea3bff8d197b20a1, 0xdc61094b92ed70ef})}, - {Sign::NEG, -136, MType({0xcdbb931d6fecc249, 0xd86471625c28b9e5})}, - {Sign::NEG, -136, MType({0xd971d560d5f00820, 0xd467c9941b2158f5})}, - {Sign::NEG, -136, MType({0x75563561244c090b, 0xd06b11e051175493})}, - {Sign::NEG, -136, MType({0xdc393c9a3f3b380f, 0xcc6e4a467f44c6fa})}, - {Sign::NEG, -136, MType({0xe6abe6e9e4ee2096, 0xc831a4c6f6fa709d})}, - {Sign::NEG, -136, MType({0x3ce3c8228583a66e, 0xc434bc6124a0f16e})}, - {Sign::NEG, -136, MType({0xb96a79f5c5a4963a, 0xc037c413c61bfd93})}, - {Sign::NEG, -136, MType({0xaaef27337008679f, 0xbc3abbde5c8d9bde})}, - {Sign::NEG, -136, MType({0xa49a3fcaddc8bc5a, 0xb83da3c06911e509})}, - {Sign::NEG, -136, MType({0xe0254feb785362fa, 0xb4407bb96cbf035a})}, - {Sign::NEG, -136, MType({0x9893a4e25ab9dc95, 0xb04343c8e8a53245})}, - {Sign::NEG, -136, MType({0x5d8b0f40a3708915, 0xac45fbee5dcebe0b})}, - {Sign::NEG, -136, MType({0x5f4c11c2c7a58c69, 0xa848a4294d40035d})}, - {Sign::NEG, -136, MType({0xb348cc5df706ffba, 0xa44b3c7937f76efd})}, - {Sign::NEG, -136, MType({0x9159f2c55a18befd, 0xa04dc4dd9eed7d60})}, - {Sign::NEG, -136, MType({0xbdfdee41fe6a5a02, 0x9c1064563058bef3})}, - {Sign::NEG, -136, MType({0x4580ddf89853254d, 0x9812cbe346475a24})}, - {Sign::NEG, -136, MType({0xac75e10d61fc3ee8, 0x9415238353489ffb})}, - {Sign::NEG, -136, MType({0xcad9b30b29736155, 0x90176b35d83ce8e2})}, - {Sign::NEG, -136, MType({0x6f881deb98fc45f3, 0x8c19a2fa55fe9b14})}, - {Sign::NEG, -136, MType({0x70a04b63b7248c96, 0x881bcad04d622a3e})}, - {Sign::NEG, -136, MType({0xb4823fb48035eddd, 0x841de2b73f361722})}, - {Sign::NEG, -136, MType({0x3364ccb5b13cd47f, 0x801feaaeac42ef38})}, - {Sign::NEG, -137, MType({0xe306977b049f0ad5, 0xf843c56c2a969897})}, - {Sign::NEG, -137, MType({0xe3c4d9e9619bc045, 0xf0479599f617a843})}, - {Sign::NEG, -137, MType({0x4356d525b5e6432d, 0xe84b45e5bc76702c})}, - {Sign::NEG, -137, MType({0x7839dcd7989339ab, 0xe04ed64e7f14697a})}, - {Sign::NEG, -137, MType({0x4e21f045ecb76f23, 0xd85246d33f47230b})}, - {Sign::NEG, -137, MType({0x902e248dd4ba9b28, 0xd0559772fe5840b0})}, - {Sign::NEG, -137, MType({0xa44449067ef92e01, 0xc858c82cbd857a72})}, - {Sign::NEG, -137, MType({0x17926207cc22e4e6, 0xc05bd8ff7e009bd2})}, - {Sign::NEG, -137, MType({0x1c349622f3fa5d82, 0xb85ec9ea40ef8309})}, - {Sign::NEG, -137, MType({0x97fa2fd0c9dc723e, 0xafe1c6ece1a058dd})}, - {Sign::NEG, -137, MType({0x983e80897cf1e60f, 0xa7e47606048b1a65})}, - {Sign::NEG, -137, MType({0x7199cd06ae5d39b3, 0x9fe705341d236102})}, - {Sign::NEG, -137, MType({0x43cd18a72a051a96, 0x97e974762c5e8f58})}, - {Sign::NEG, -137, MType({0x7b6d1248c3e1fd40, 0x8febc3cb332616ff})}, - {Sign::NEG, -137, MType({0xf5572a8814c703af, 0x87edf332325777c5})}, - {Sign::NEG, -138, MType({0x26828c92649a3a39, 0xffe0055455887de0})}, - {Sign::NEG, -138, MType({0x82c550bd1216d82a, 0xefe3e4643a640cf3})}, - {Sign::NEG, -138, MType({0xda6959f7f0e01bf0, 0xdfe7839214b4e8ae})}, - {Sign::NEG, -138, MType({0xda93e2fa85a8f214, 0xcfeae2dbe5d6736d})}, - {Sign::NEG, -138, MType({0xb47505bfa5a03b06, 0xbfee023faf0c2480})}, - {Sign::NEG, -138, MType({0xb1475a5180a43520, 0xaff0e1bb718186ad})}, - {Sign::NEG, -138, MType({0xa8740b91c95df537, 0x9ff3814d2e4a36b2})}, - {Sign::NEG, -138, MType({0x57d895d35921b59c, 0x8ff5e0f2e661e1c6})}, - {Sign::NEG, -139, MType({0x3c56c598c659c2a3, 0xfff0015535588833})}, - {Sign::NEG, -139, MType({0x2ef8ec33ed9d782a, 0xdff3c0e497ea4eb1})}, - {Sign::NEG, -139, MType({0x379eba7e6465ff63, 0xbff7008ff5e0c257})}, - {Sign::NEG, -139, MType({0x3f972b783fcab757, 0x9ff9c0535073a370})}, - {Sign::NEG, -140, MType({0xde026e271ee0549d, 0xfff8005551558885})}, - {Sign::NEG, -140, MType({0xeceb47ea01f6c632, 0xbffb8023febc0c25})}, - {Sign::NEG, -141, MType({0x7333c57857e1ed52, 0xfffc001554d55888})}, - {Sign::NEG, -142, MType({0x87dde026fa704374, 0xfffe000555455588})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -141, MType({0x44999abe2fe2cc65, 0x80010002aab2aac4})}, - {Sign::POS, -140, MType({0x4eef381581464ccb, 0x8002000aaaeaac44})}, - {Sign::POS, -140, MType({0xdfeb485085f6f454, 0xc004802401440c26})}, - {Sign::POS, -139, MType({0x99abe3be3a1c6e93, 0x8004002aacaac445})}, - {Sign::POS, -139, MType({0x6bc1e20eac8448b4, 0xa00640535a37a37a})}, - {Sign::POS, -139, MType({0x979eedc064c242fd, 0xc00900900a20c275})}, - {Sign::POS, -139, MType({0xc72446cc1bf728bd, 0xe00c40e4bd6e4efd})}, - {Sign::POS, -138, MType({0xf381b821bbb569e5, 0x800800aabaac446e})}, - {Sign::POS, -138, MType({0x569b26aaa485ea5c, 0x900a20f319a3e273})}, - {Sign::POS, -138, MType({0x2dcf56c83c80b028, 0xa00c814d7c6a37f8})}, - {Sign::POS, -138, MType({0x5f69768284463b9b, 0xb00f21bbe3e388ee})}, - {Sign::POS, -138, MType({0xb48ea6c05e2773a1, 0xc0120240510c284c})}, - {Sign::POS, -138, MType({0x14d9d76196d8043a, 0xd01522dcc4f87991})}, - {Sign::POS, -138, MType({0xe016a611a4415d72, 0xe018839340d4f241})}, - {Sign::POS, -138, MType({0x661e135f49a47c40, 0xf01c2465c5e61b6f})}, - {Sign::POS, -137, MType({0xbe6bf0fa435e8383, 0x801002ab2ac4499a})}, - {Sign::POS, -137, MType({0x9a31ba0cbc030353, 0x881213337898871e})}, - {Sign::POS, -137, MType({0x54b57dfe0c4c840f, 0x901443cccd362c9f})}, - {Sign::POS, -137, MType({0x7ad1e9c315328f7e, 0x98169478296fad41})}, - {Sign::POS, -137, MType({0x1f3f686cf3d6be22, 0xa01905368e2389b3})}, - {Sign::POS, -137, MType({0xf105b66ec4703ede, 0xa81b9608fc3c50ec})}, - {Sign::POS, -137, MType({0x610848c68df4d233, 0xb01e46f074b0a0f3})}, - {Sign::POS, -137, MType({0x2e0efddf33a20464, 0xb7a0e9ed7613acb0})}, - {Sign::POS, -137, MType({0xc2cdb3c750f127b4, 0xbfa3d9008e042ffb})}, - {Sign::POS, -137, MType({0xbd9533786d3f4c49, 0xc7a6e82ba36a7073})}, - {Sign::POS, -137, MType({0x82e237c9a4d450e3, 0xcfaa176fb76c8eb1})}, - {Sign::POS, -137, MType({0xc00b46a4d0e3dfd0, 0xd7ad66cdcb3cbe14})}, - {Sign::POS, -137, MType({0xea999c0df8546710, 0xdfb0d646e0194584})}, - {Sign::POS, -137, MType({0xcec6c2a9ad974f4f, 0xe7b465dbf74c8032})}, - {Sign::POS, -137, MType({0x2d2045da1570a07c, 0xefb8158e122cde5a})}, - {Sign::POS, -137, MType({0x6752e9b2381e3edc, 0xf7bbe55e321ce603})}, - {Sign::POS, -137, MType({0x3c1ed52728e00e40, 0xffbfd54d588b33c5})}, - {Sign::POS, -136, MType({0x493b0d873fb9a340, 0x83e1f2ae43793dc3})}, - {Sign::POS, -136, MType({0x29e38750c9d26893, 0x87e40ac65f6cc4a0})}, - {Sign::POS, -136, MType({0xaab9e8327258ac3f, 0x8be632ef80e9a0df})}, - {Sign::POS, -136, MType({0x28bc403d8a5f3c63, 0x8fe86b2a28bf51b3})}, - {Sign::POS, -136, MType({0xf720c1c97227fcdc, 0x93eab376d7c36377})}, - {Sign::POS, -136, MType({0x6ad9a3e3d11b66c1, 0x97ed0bd60ed17018})}, - {Sign::POS, -136, MType({0xedb27b79c90b4019, 0x9bef74484ecb1f6c})}, - {Sign::POS, -136, MType({0xa092a0d7ab21722a, 0x9fb1c4cd27012e19})}, - {Sign::POS, -136, MType({0x535d52f0939a4d02, 0xa3b44c65b71c2d85})}, - {Sign::POS, -136, MType({0x90a57e11edc1864e, 0xa7b6e412cadcb3dc})}, - {Sign::POS, -136, MType({0x68e9c90160031159, 0xabb98bd4e33c4381})}, - {Sign::POS, -136, MType({0xbf60594f929adeb8, 0xafbc43ac813a6ea3})}, - {Sign::POS, -136, MType({0x8a42158886775205, 0xb3bf0b9a25dcd7a2})}, - {Sign::POS, -136, MType({0x1ab45417663dee9e, 0xb7c1e39e522f316d})}, - {Sign::POS, -136, MType({0x6c51ae3ce1aea68a, 0xbbc4cbb987433fe4})}, - {Sign::POS, -136, MType({0x7c52ae8b40ebabb7, 0xbfc7c3ec4630d83c})}, - {Sign::POS, -136, MType({0xa857126f7cfaaa67, 0xc3cacc371015e15d})}, - {Sign::POS, -136, MType({0x14d05662cd29464a, 0xc7cde49a66165446})}, - {Sign::POS, -136, MType({0x8379db06ef3cd6bb, 0xcb90da1644d29bb7})}, - {Sign::POS, -136, MType({0x9025f4c67dd38bb6, 0xcf9411aa99ddb7de})}, - {Sign::POS, -136, MType({0xd6f8a61c892032ee, 0xd3975958f681086d})}, - {Sign::POS, -136, MType({0x9a2f20b4e2332d47, 0xd79ab121dbf8714c})}, - {Sign::POS, -136, MType({0x3c767d61f51d375b, 0xdb9e1905cb85ea59})}, - {Sign::POS, -136, MType({0xd4b2bd65bb25493c, 0xdfa1910546717fca})}, - {Sign::POS, -136, MType({0xc96c1254a30ef91f, 0xe3a51920ce095292})}, - {Sign::POS, -136, MType({0x73e324ce0946b214, 0xe7a8b158e3a198be})}, - {Sign::POS, -136, MType({0xcacd125a12bac62c, 0xebac59ae08949dd8})}, - {Sign::POS, -136, MType({0xcafdc27227b71eaa, 0xef6fd620b2b7a503})}, - {Sign::POS, -136, MType({0x688d4282f6026aa3, 0xf3739daf959aaafc})}, - {Sign::POS, -136, MType({0xe54e9e3804464cdd, 0xf777755d03f4e0b6})}, - {Sign::POS, -136, MType({0xcb78b383f4b59dce, 0xfb7b5d297f388a12})}, - {Sign::POS, -136, MType({0xee055fc515062c04, 0xff7f551588de024f})}, - {Sign::POS, -135, MType({0x207812b43382acdd, 0x81c1ae90d131de38})}, - {Sign::POS, -135, MType({0xdc90c4c4b61f3a87, 0x83c3baa726a721cc})}, - {Sign::POS, -135, MType({0x1a03f13fb2c978b1, 0x85c5cece05941dbc})}, - {Sign::POS, -135, MType({0xb36f282e83a7dc36, 0x87c7eb05aec1304f})}, - {Sign::POS, -135, MType({0xd82a46616d4c393f, 0x89a9eccd56a980c0})}, - {Sign::POS, -135, MType({0xbc6ff84713c9babd, 0x8bac18a640185360})}, - {Sign::POS, -135, MType({0x9f7942a516fc2d8a, 0x8dae4c90b22574f4})}, - {Sign::POS, -135, MType({0x15e50cfd9b29b427, 0x8fb0888ceda546ab})}, - {Sign::POS, -135, MType({0x9f465296ae7dd49a, 0x91b2cc9b336f3718})}, - {Sign::POS, -135, MType({0xb49c1eb9b348e6e4, 0x93b518bbc45dc268})}, - {Sign::POS, -135, MType({0xdaa320cd64c9d9c7, 0x95b76ceee14e728e})}, - {Sign::POS, -135, MType({0x75a91950ffe1e3b5, 0x9799a333de49b963})}, - {Sign::POS, -135, MType({0x5c6abcbf43f03f14, 0x999c070ba32068cd})}, - {Sign::POS, -135, MType({0x5a9e7f265d1ed157, 0x9b9e72f6b295ad4f})}, - {Sign::POS, -135, MType({0xefeb98d02a195c17, 0x9da0e6f54d9318fd})}, - {Sign::POS, -135, MType({0x2aa503a3110ab5a7, 0x9fa36307b5054ca8})}, - {Sign::POS, -135, MType({0xd0fe7e05869eb825, 0xa1a5e72e29dbf808})}, - {Sign::POS, -135, MType({0xe80a28f4e1e500d2, 0xa3884a68a750cb10})}, - {Sign::POS, -135, MType({0x531064151ca6e30b, 0xa58ade36aeef9f0b})}, - {Sign::POS, -135, MType({0x27c01ffa8e2e3c4b, 0xa78d7a1982c4b08f})}, - {Sign::POS, -135, MType({0x7ba9408dc857d568, 0xa9901e1163cbbbf5})}, - {Sign::POS, -135, MType({0x104d1e3331d3b4fa, 0xab92ca1e93038d76})}, - {Sign::POS, -135, MType({0x9343c846fcdf9137, 0xad957e41516e0158})}, - {Sign::POS, -135, MType({0x3977e89aec59bfa2, 0xaf780e79b2514889})}, - {Sign::POS, -135, MType({0x913d4e3dc55c3e6e, 0xb17ad246ef3713bc})}, - {Sign::POS, -135, MType({0x777b52a9e70d8bcc, 0xb37d9e2a7a56b09d})}, - {Sign::POS, -135, MType({0x55de916fd30591de, 0xb580722494be0c91})}, - {Sign::POS, -135, MType({0xe79cfb37be2861e4, 0xb7834e357f7e2600})}, - {Sign::POS, -135, MType({0x90983104d3805389, 0xb986325d7bab0c89})}, - {Sign::POS, -135, MType({0x59e3b2ec71ce64f4, 0xbb68ef9c254aa378})}, - {Sign::POS, -135, MType({0xe83183bf3dd612ef, 0xbd6be3718c77636f})}, - {Sign::POS, -135, MType({0xc4e3b0ac2fd52b7f, 0xbf6edf5ec44d9d35})}, + {Sign::NEG, -135, 0xb67dab2a'1a5742a4'a0e061c5'f7431c5e_u128}, + {Sign::NEG, -135, 0xb4807f24'af682939'5d5bfe7b'969ed6ec_u128}, + {Sign::NEG, -135, 0xb2834b35'b4d54d5f'4d08702d'dfabc23f_u128}, + {Sign::NEG, -135, 0xb0860f5c'eba9be95'd4d36650'8b9953df_u128}, + {Sign::NEG, -135, 0xae68f71a'a09e8847'ac18a289'f8f214a9_u128}, + {Sign::NEG, -135, 0xac6baaee'd676e8f1'd5b42054'abb88c45_u128}, + {Sign::NEG, -135, 0xaa6e56d8'7cd632d6'09809d58'ee484964_u128}, + {Sign::NEG, -135, 0xa870fad7'54bb8791'b9e6fc7c'72f06d73_u128}, + {Sign::NEG, -135, 0xa67396eb'1f231892'6f78d6d0'105c00e2_u128}, + {Sign::NEG, -135, 0xa4762b13'9d0626e7'028f7126'29209148_u128}, + {Sign::NEG, -135, 0xa258dfd1'0aedaa67'c98d898e'f172df02_u128}, + {Sign::NEG, -135, 0xa05b63a3'73e60a83'fcc37c3c'3062bfa1_u128}, + {Sign::NEG, -135, 0x9e5ddf89'cf42f501'3eb450db'05763c36_u128}, + {Sign::NEG, -135, 0x9c605383'ddf1b88c'7146a86f'd458b775_u128}, + {Sign::NEG, -135, 0x9a62bf91'60dcb286'c20a0c92'81474436_u128}, + {Sign::NEG, -135, 0x986523b2'18eb4ed6'cdc57316'ec4aebc3_u128}, + {Sign::NEG, -135, 0x96677fe5'c70207b9'c060dad7'4cef4273_u128}, + {Sign::NEG, -135, 0x9449f92d'2ff44633'ed8def1a'3e433499_u128}, + {Sign::NEG, -135, 0x924c4507'3220b5e0'3ce7a1f8'5c27b4fc_u128}, + {Sign::NEG, -135, 0x904e88f3'68fea63f'f2ca8934'49f7f2cb_u128}, + {Sign::NEG, -135, 0x8e50c4f1'956699ed'8d77d9fa'bd2853cf_u128}, + {Sign::NEG, -135, 0x8c52f901'782e20ec'93e828d7'5b58ded4_u128}, + {Sign::NEG, -135, 0x8a552522'd227d87a'9f9605b0'53c5acf0_u128}, + {Sign::NEG, -135, 0x88574955'64236ae0'62a14939'3bca7241_u128}, + {Sign::NEG, -135, 0x86398719'b66bac7c'aea6b56c'e89203d4_u128}, + {Sign::NEG, -135, 0x843b9aef'044e4dcc'0242bd86'd00609b2_u128}, + {Sign::NEG, -135, 0x823da6d4'c89c6927'daabf927'74bac84e_u128}, + {Sign::NEG, -135, 0x803faaca'c419abf2'a1c6f3fc'242ef8d0_u128}, + {Sign::NEG, -136, 0xfc834da1'6f0d9f57'a225ebc0'2e6d9dd4_u128}, + {Sign::NEG, -136, 0xf88735cc'c7433381'c33f6ad3'40ae18a9_u128}, + {Sign::NEG, -136, 0xf48b0e17'1249b6bc'70b2a4d3'8a242244_u128}, + {Sign::NEG, -136, 0xf08ed67f'd190e280'1d548190'48b811b0_u128}, + {Sign::NEG, -136, 0xec52ca07'ed95f236'9c21b650'afe9ede0_u128}, + {Sign::NEG, -136, 0xe85671ad'ecd28aac'935519c9'6d30e463_u128}, + {Sign::NEG, -136, 0xe45a0970'dc912ca7'ba88f6f2'e2672cfe_u128}, + {Sign::NEG, -136, 0xe05d9150'3e298bc8'0b1a8b84'657ae069_u128}, + {Sign::NEG, -136, 0xdc61094b'92ed70ef'ea3bff8d'197b20a1_u128}, + {Sign::NEG, -136, 0xd8647162'5c28b9e5'cdbb931d'6fecc249_u128}, + {Sign::NEG, -136, 0xd467c994'1b2158f5'd971d560'd5f00820_u128}, + {Sign::NEG, -136, 0xd06b11e0'51175493'75563561'244c090b_u128}, + {Sign::NEG, -136, 0xcc6e4a46'7f44c6fa'dc393c9a'3f3b380f_u128}, + {Sign::NEG, -136, 0xc831a4c6'f6fa709d'e6abe6e9'e4ee2096_u128}, + {Sign::NEG, -136, 0xc434bc61'24a0f16e'3ce3c822'8583a66e_u128}, + {Sign::NEG, -136, 0xc037c413'c61bfd93'b96a79f5'c5a4963a_u128}, + {Sign::NEG, -136, 0xbc3abbde'5c8d9bde'aaef2733'7008679f_u128}, + {Sign::NEG, -136, 0xb83da3c0'6911e509'a49a3fca'ddc8bc5a_u128}, + {Sign::NEG, -136, 0xb4407bb9'6cbf035a'e0254feb'785362fa_u128}, + {Sign::NEG, -136, 0xb04343c8'e8a53245'9893a4e2'5ab9dc95_u128}, + {Sign::NEG, -136, 0xac45fbee'5dcebe0b'5d8b0f40'a3708915_u128}, + {Sign::NEG, -136, 0xa848a429'4d40035d'5f4c11c2'c7a58c69_u128}, + {Sign::NEG, -136, 0xa44b3c79'37f76efd'b348cc5d'f706ffba_u128}, + {Sign::NEG, -136, 0xa04dc4dd'9eed7d60'9159f2c5'5a18befd_u128}, + {Sign::NEG, -136, 0x9c106456'3058bef3'bdfdee41'fe6a5a02_u128}, + {Sign::NEG, -136, 0x9812cbe3'46475a24'4580ddf8'9853254d_u128}, + {Sign::NEG, -136, 0x94152383'53489ffb'ac75e10d'61fc3ee8_u128}, + {Sign::NEG, -136, 0x90176b35'd83ce8e2'cad9b30b'29736155_u128}, + {Sign::NEG, -136, 0x8c19a2fa'55fe9b14'6f881deb'98fc45f3_u128}, + {Sign::NEG, -136, 0x881bcad0'4d622a3e'70a04b63'b7248c96_u128}, + {Sign::NEG, -136, 0x841de2b7'3f361722'b4823fb4'8035eddd_u128}, + {Sign::NEG, -136, 0x801feaae'ac42ef38'3364ccb5'b13cd47f_u128}, + {Sign::NEG, -137, 0xf843c56c'2a969897'e306977b'049f0ad5_u128}, + {Sign::NEG, -137, 0xf0479599'f617a843'e3c4d9e9'619bc045_u128}, + {Sign::NEG, -137, 0xe84b45e5'bc76702c'4356d525'b5e6432d_u128}, + {Sign::NEG, -137, 0xe04ed64e'7f14697a'7839dcd7'989339ab_u128}, + {Sign::NEG, -137, 0xd85246d3'3f47230b'4e21f045'ecb76f23_u128}, + {Sign::NEG, -137, 0xd0559772'fe5840b0'902e248d'd4ba9b28_u128}, + {Sign::NEG, -137, 0xc858c82c'bd857a72'a4444906'7ef92e01_u128}, + {Sign::NEG, -137, 0xc05bd8ff'7e009bd2'17926207'cc22e4e6_u128}, + {Sign::NEG, -137, 0xb85ec9ea'40ef8309'1c349622'f3fa5d82_u128}, + {Sign::NEG, -137, 0xafe1c6ec'e1a058dd'97fa2fd0'c9dc723e_u128}, + {Sign::NEG, -137, 0xa7e47606'048b1a65'983e8089'7cf1e60f_u128}, + {Sign::NEG, -137, 0x9fe70534'1d236102'7199cd06'ae5d39b3_u128}, + {Sign::NEG, -137, 0x97e97476'2c5e8f58'43cd18a7'2a051a96_u128}, + {Sign::NEG, -137, 0x8febc3cb'332616ff'7b6d1248'c3e1fd40_u128}, + {Sign::NEG, -137, 0x87edf332'325777c5'f5572a88'14c703af_u128}, + {Sign::NEG, -138, 0xffe00554'55887de0'26828c92'649a3a39_u128}, + {Sign::NEG, -138, 0xefe3e464'3a640cf3'82c550bd'1216d82a_u128}, + {Sign::NEG, -138, 0xdfe78392'14b4e8ae'da6959f7'f0e01bf0_u128}, + {Sign::NEG, -138, 0xcfeae2db'e5d6736d'da93e2fa'85a8f214_u128}, + {Sign::NEG, -138, 0xbfee023f'af0c2480'b47505bf'a5a03b06_u128}, + {Sign::NEG, -138, 0xaff0e1bb'718186ad'b1475a51'80a43520_u128}, + {Sign::NEG, -138, 0x9ff3814d'2e4a36b2'a8740b91'c95df537_u128}, + {Sign::NEG, -138, 0x8ff5e0f2'e661e1c6'57d895d3'5921b59c_u128}, + {Sign::NEG, -139, 0xfff00155'35588833'3c56c598'c659c2a3_u128}, + {Sign::NEG, -139, 0xdff3c0e4'97ea4eb1'2ef8ec33'ed9d782a_u128}, + {Sign::NEG, -139, 0xbff7008f'f5e0c257'379eba7e'6465ff63_u128}, + {Sign::NEG, -139, 0x9ff9c053'5073a370'3f972b78'3fcab757_u128}, + {Sign::NEG, -140, 0xfff80055'51558885'de026e27'1ee0549d_u128}, + {Sign::NEG, -140, 0xbffb8023'febc0c25'eceb47ea'01f6c632_u128}, + {Sign::NEG, -141, 0xfffc0015'54d55888'7333c578'57e1ed52_u128}, + {Sign::NEG, -142, 0xfffe0005'55455588'87dde026'fa704374_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -141, 0x80010002'aab2aac4'44999abe'2fe2cc65_u128}, + {Sign::POS, -140, 0x8002000a'aaeaac44'4eef3815'81464ccb_u128}, + {Sign::POS, -140, 0xc0048024'01440c26'dfeb4850'85f6f454_u128}, + {Sign::POS, -139, 0x8004002a'acaac445'99abe3be'3a1c6e93_u128}, + {Sign::POS, -139, 0xa0064053'5a37a37a'6bc1e20e'ac8448b4_u128}, + {Sign::POS, -139, 0xc0090090'0a20c275'979eedc0'64c242fd_u128}, + {Sign::POS, -139, 0xe00c40e4'bd6e4efd'c72446cc'1bf728bd_u128}, + {Sign::POS, -138, 0x800800aa'baac446e'f381b821'bbb569e5_u128}, + {Sign::POS, -138, 0x900a20f3'19a3e273'569b26aa'a485ea5c_u128}, + {Sign::POS, -138, 0xa00c814d'7c6a37f8'2dcf56c8'3c80b028_u128}, + {Sign::POS, -138, 0xb00f21bb'e3e388ee'5f697682'84463b9b_u128}, + {Sign::POS, -138, 0xc0120240'510c284c'b48ea6c0'5e2773a1_u128}, + {Sign::POS, -138, 0xd01522dc'c4f87991'14d9d761'96d8043a_u128}, + {Sign::POS, -138, 0xe0188393'40d4f241'e016a611'a4415d72_u128}, + {Sign::POS, -138, 0xf01c2465'c5e61b6f'661e135f'49a47c40_u128}, + {Sign::POS, -137, 0x801002ab'2ac4499a'be6bf0fa'435e8383_u128}, + {Sign::POS, -137, 0x88121333'7898871e'9a31ba0c'bc030353_u128}, + {Sign::POS, -137, 0x901443cc'cd362c9f'54b57dfe'0c4c840f_u128}, + {Sign::POS, -137, 0x98169478'296fad41'7ad1e9c3'15328f7e_u128}, + {Sign::POS, -137, 0xa0190536'8e2389b3'1f3f686c'f3d6be22_u128}, + {Sign::POS, -137, 0xa81b9608'fc3c50ec'f105b66e'c4703ede_u128}, + {Sign::POS, -137, 0xb01e46f0'74b0a0f3'610848c6'8df4d233_u128}, + {Sign::POS, -137, 0xb7a0e9ed'7613acb0'2e0efddf'33a20464_u128}, + {Sign::POS, -137, 0xbfa3d900'8e042ffb'c2cdb3c7'50f127b4_u128}, + {Sign::POS, -137, 0xc7a6e82b'a36a7073'bd953378'6d3f4c49_u128}, + {Sign::POS, -137, 0xcfaa176f'b76c8eb1'82e237c9'a4d450e3_u128}, + {Sign::POS, -137, 0xd7ad66cd'cb3cbe14'c00b46a4'd0e3dfd0_u128}, + {Sign::POS, -137, 0xdfb0d646'e0194584'ea999c0d'f8546710_u128}, + {Sign::POS, -137, 0xe7b465db'f74c8032'cec6c2a9'ad974f4f_u128}, + {Sign::POS, -137, 0xefb8158e'122cde5a'2d2045da'1570a07c_u128}, + {Sign::POS, -137, 0xf7bbe55e'321ce603'6752e9b2'381e3edc_u128}, + {Sign::POS, -137, 0xffbfd54d'588b33c5'3c1ed527'28e00e40_u128}, + {Sign::POS, -136, 0x83e1f2ae'43793dc3'493b0d87'3fb9a340_u128}, + {Sign::POS, -136, 0x87e40ac6'5f6cc4a0'29e38750'c9d26893_u128}, + {Sign::POS, -136, 0x8be632ef'80e9a0df'aab9e832'7258ac3f_u128}, + {Sign::POS, -136, 0x8fe86b2a'28bf51b3'28bc403d'8a5f3c63_u128}, + {Sign::POS, -136, 0x93eab376'd7c36377'f720c1c9'7227fcdc_u128}, + {Sign::POS, -136, 0x97ed0bd6'0ed17018'6ad9a3e3'd11b66c1_u128}, + {Sign::POS, -136, 0x9bef7448'4ecb1f6c'edb27b79'c90b4019_u128}, + {Sign::POS, -136, 0x9fb1c4cd'27012e19'a092a0d7'ab21722a_u128}, + {Sign::POS, -136, 0xa3b44c65'b71c2d85'535d52f0'939a4d02_u128}, + {Sign::POS, -136, 0xa7b6e412'cadcb3dc'90a57e11'edc1864e_u128}, + {Sign::POS, -136, 0xabb98bd4'e33c4381'68e9c901'60031159_u128}, + {Sign::POS, -136, 0xafbc43ac'813a6ea3'bf60594f'929adeb8_u128}, + {Sign::POS, -136, 0xb3bf0b9a'25dcd7a2'8a421588'86775205_u128}, + {Sign::POS, -136, 0xb7c1e39e'522f316d'1ab45417'663dee9e_u128}, + {Sign::POS, -136, 0xbbc4cbb9'87433fe4'6c51ae3c'e1aea68a_u128}, + {Sign::POS, -136, 0xbfc7c3ec'4630d83c'7c52ae8b'40ebabb7_u128}, + {Sign::POS, -136, 0xc3cacc37'1015e15d'a857126f'7cfaaa67_u128}, + {Sign::POS, -136, 0xc7cde49a'66165446'14d05662'cd29464a_u128}, + {Sign::POS, -136, 0xcb90da16'44d29bb7'8379db06'ef3cd6bb_u128}, + {Sign::POS, -136, 0xcf9411aa'99ddb7de'9025f4c6'7dd38bb6_u128}, + {Sign::POS, -136, 0xd3975958'f681086d'd6f8a61c'892032ee_u128}, + {Sign::POS, -136, 0xd79ab121'dbf8714c'9a2f20b4'e2332d47_u128}, + {Sign::POS, -136, 0xdb9e1905'cb85ea59'3c767d61'f51d375b_u128}, + {Sign::POS, -136, 0xdfa19105'46717fca'd4b2bd65'bb25493c_u128}, + {Sign::POS, -136, 0xe3a51920'ce095292'c96c1254'a30ef91f_u128}, + {Sign::POS, -136, 0xe7a8b158'e3a198be'73e324ce'0946b214_u128}, + {Sign::POS, -136, 0xebac59ae'08949dd8'cacd125a'12bac62c_u128}, + {Sign::POS, -136, 0xef6fd620'b2b7a503'cafdc272'27b71eaa_u128}, + {Sign::POS, -136, 0xf3739daf'959aaafc'688d4282'f6026aa3_u128}, + {Sign::POS, -136, 0xf777755d'03f4e0b6'e54e9e38'04464cdd_u128}, + {Sign::POS, -136, 0xfb7b5d29'7f388a12'cb78b383'f4b59dce_u128}, + {Sign::POS, -136, 0xff7f5515'88de024f'ee055fc5'15062c04_u128}, + {Sign::POS, -135, 0x81c1ae90'd131de38'207812b4'3382acdd_u128}, + {Sign::POS, -135, 0x83c3baa7'26a721cc'dc90c4c4'b61f3a87_u128}, + {Sign::POS, -135, 0x85c5cece'05941dbc'1a03f13f'b2c978b1_u128}, + {Sign::POS, -135, 0x87c7eb05'aec1304f'b36f282e'83a7dc36_u128}, + {Sign::POS, -135, 0x89a9eccd'56a980c0'd82a4661'6d4c393f_u128}, + {Sign::POS, -135, 0x8bac18a6'40185360'bc6ff847'13c9babd_u128}, + {Sign::POS, -135, 0x8dae4c90'b22574f4'9f7942a5'16fc2d8a_u128}, + {Sign::POS, -135, 0x8fb0888c'eda546ab'15e50cfd'9b29b427_u128}, + {Sign::POS, -135, 0x91b2cc9b'336f3718'9f465296'ae7dd49a_u128}, + {Sign::POS, -135, 0x93b518bb'c45dc268'b49c1eb9'b348e6e4_u128}, + {Sign::POS, -135, 0x95b76cee'e14e728e'daa320cd'64c9d9c7_u128}, + {Sign::POS, -135, 0x9799a333'de49b963'75a91950'ffe1e3b5_u128}, + {Sign::POS, -135, 0x999c070b'a32068cd'5c6abcbf'43f03f14_u128}, + {Sign::POS, -135, 0x9b9e72f6'b295ad4f'5a9e7f26'5d1ed157_u128}, + {Sign::POS, -135, 0x9da0e6f5'4d9318fd'efeb98d0'2a195c17_u128}, + {Sign::POS, -135, 0x9fa36307'b5054ca8'2aa503a3'110ab5a7_u128}, + {Sign::POS, -135, 0xa1a5e72e'29dbf808'd0fe7e05'869eb825_u128}, + {Sign::POS, -135, 0xa3884a68'a750cb10'e80a28f4'e1e500d2_u128}, + {Sign::POS, -135, 0xa58ade36'aeef9f0b'53106415'1ca6e30b_u128}, + {Sign::POS, -135, 0xa78d7a19'82c4b08f'27c01ffa'8e2e3c4b_u128}, + {Sign::POS, -135, 0xa9901e11'63cbbbf5'7ba9408d'c857d568_u128}, + {Sign::POS, -135, 0xab92ca1e'93038d76'104d1e33'31d3b4fa_u128}, + {Sign::POS, -135, 0xad957e41'516e0158'9343c846'fcdf9137_u128}, + {Sign::POS, -135, 0xaf780e79'b2514889'3977e89a'ec59bfa2_u128}, + {Sign::POS, -135, 0xb17ad246'ef3713bc'913d4e3d'c55c3e6e_u128}, + {Sign::POS, -135, 0xb37d9e2a'7a56b09d'777b52a9'e70d8bcc_u128}, + {Sign::POS, -135, 0xb5807224'94be0c91'55de916f'd30591de_u128}, + {Sign::POS, -135, 0xb7834e35'7f7e2600'e79cfb37'be2861e4_u128}, + {Sign::POS, -135, 0xb986325d'7bab0c89'90983104'd3805389_u128}, + {Sign::POS, -135, 0xbb68ef9c'254aa378'59e3b2ec'71ce64f4_u128}, + {Sign::POS, -135, 0xbd6be371'8c77636f'e83183bf'3dd612ef_u128}, + {Sign::POS, -135, 0xbf6edf5e'c44d9d35'c4e3b0ac'2fd52b7f_u128}, }; // Logarithm range reduction - Step 3: @@ -660,147 +664,147 @@ constexpr double S3[139] = { // r = 2^-21 * round( 2^21 / (1 + i*2^(-21)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", -// MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); +// format_hex(m), "},"); const Float128 LOG_R3[139] = { - {Sign::NEG, -142, MType({0xe39d3faf42340ed7, 0x89ff6b38d5de2622})}, - {Sign::NEG, -142, MType({0x7ff3326682c02485, 0x87ff6f80ccb40f16})}, - {Sign::NEG, -142, MType({0x5caf4fbe343cf928, 0x85ff73b8c3cdf731})}, - {Sign::NEG, -142, MType({0xcdb6e554348f7fe8, 0x83ff77e0bb2ade79})}, - {Sign::NEG, -142, MType({0xef009c2457de25d, 0x81ff7bf8b2c9c4f6})}, - {Sign::NEG, -143, MType({0x8883333c57b57c74, 0xffff000155535558})}, - {Sign::NEG, -143, MType({0xf32668f39c70d183, 0xfbff07f145931f44})}, - {Sign::NEG, -143, MType({0x459a73c6a6486fe3, 0xf7ff0fc13650e7bd})}, - {Sign::NEG, -143, MType({0x37b18cca7dd3a29f, 0xf3ff1771278aaecd})}, - {Sign::NEG, -143, MType({0x513f610d21bcfc78, 0xefff1f01193e7480})}, - {Sign::NEG, -143, MType({0xea190b95c0690b7b, 0xebff26710b6a38e1})}, - {Sign::NEG, -143, MType({0x2a150f64f0ad1743, 0xe7ff2dc0fe0bfbfd})}, - {Sign::NEG, -143, MType({0x90b5174e995e9d1, 0xe3ff34f0f121bddd})}, - {Sign::NEG, -143, MType({0x4ed512b9b93ea2bf, 0xdfff3c00e4a97e8c})}, - {Sign::NEG, -143, MType({0x934cea217ab794a2, 0xdbff42f0d8a13e15})}, - {Sign::NEG, -143, MType({0x3e4ebe948afd2c76, 0xd7ff49c0cd06fc83})}, - {Sign::NEG, -143, MType({0x87b7c0f5bcfee2e1, 0xd3ff5070c1d8b9df})}, - {Sign::NEG, -143, MType({0x776666228cb6371b, 0xcfff5700b7147634})}, - {Sign::NEG, -143, MType({0xe53a60f3514db358, 0xcbff5d70acb8318b})}, - {Sign::NEG, -143, MType({0x79149c3b6e57fa86, 0xc7ff63c0a2c1ebef})}, - {Sign::NEG, -143, MType({0xaad734c98416df2a, 0xc3ff69f0992fa568})}, - {Sign::NEG, -143, MType({0xc26573679ed28334, 0xbfff70008fff5e00})}, - {Sign::NEG, -143, MType({0xd7a3c6db6540809f, 0xbbff75f0872f15c0})}, - {Sign::NEG, -143, MType({0xd277bde645fb1aad, 0xb7ff7bc07ebcccb1})}, - {Sign::NEG, -143, MType({0x6ac80145a4087793, 0xb3ff817076a682dc})}, - {Sign::NEG, -143, MType({0x287c4db30271e265, 0xafff87006eea3849})}, - {Sign::NEG, -143, MType({0x637d6de42eeb151e, 0xabff8c706785ed00})}, - {Sign::NEG, -143, MType({0x43b5348b6b898a8c, 0xa7ff91c06077a10a})}, - {Sign::NEG, -143, MType({0xc10e7657978bd7f6, 0xa3ff96f059bd546e})}, - {Sign::NEG, -143, MType({0xa37503f457310e59, 0x9fff9c0053550735})}, - {Sign::NEG, -143, MType({0x82d5a40a3aa022ff, 0x9bffa0f04d3cb966})}, - {Sign::NEG, -143, MType({0xc71e0d3ee3df5f4d, 0x97ffa5c047726b08})}, - {Sign::NEG, -143, MType({0xa83ce0352bdbd79b, 0x93ffaa7041f41c23})}, - {Sign::NEG, -143, MType({0x2e21a18d4680e8e4, 0x8fffaf003cbfccbe})}, - {Sign::NEG, -143, MType({0x30bcb3e4e5dfbd28, 0x8bffb37037d37cdf})}, - {Sign::NEG, -143, MType({0x57ff51d75c66d64a, 0x87ffb7c0332d2c8d})}, - {Sign::NEG, -143, MType({0x1bdb87fdbe299f43, 0x83ffbbf02ecadbcf})}, - {Sign::NEG, -144, MType({0x88885dde02700703, 0xffff800055551555})}, - {Sign::NEG, -144, MType({0xd259ca803a0c1870, 0xf7ff87e04d94724c})}, - {Sign::NEG, -144, MType({0xe514130851c7070a, 0xefff8f80464fce8f})}, - {Sign::NEG, -144, MType({0x30a16898f3073a64, 0xe7ff96e03f832a2a})}, - {Sign::NEG, -144, MType({0xc4ed64517b2949ce, 0xdfff9e00392a8526})}, - {Sign::NEG, -144, MType({0x51e4fb4e32cf6350, 0xd7ffa4e03341df90})}, - {Sign::NEG, -144, MType({0x277672a88350bcce, 0xcfffab802dc53971})}, - {Sign::NEG, -144, MType({0x359153772a490f06, 0xc7ffb1e028b092d3})}, - {Sign::NEG, -144, MType({0xc265ece6b481a0e, 0xbfffb80023ffebc0})}, - {Sign::NEG, -144, MType({0xdb2781c03fa132f6, 0xb7ffbde01faf4440})}, - {Sign::NEG, -144, MType({0x7287c95c845ada33, 0xafffc3801bba9c5e})}, - {Sign::NEG, -144, MType({0x423b56b1263e5a77, 0xa7ffc8e0181df421})}, - {Sign::NEG, -144, MType({0x5a3752ca4c076fa3, 0x9fffce0014d54b91})}, - {Sign::NEG, -144, MType({0x6a71e2b27eb3f573, 0x97ffd2e011dca2b6})}, - {Sign::NEG, -144, MType({0xc2e21b72cff39d8f, 0x8fffd7800f2ff997})}, - {Sign::NEG, -144, MType({0x537ff612feb7ac9e, 0x87ffdbe00ccb503c})}, - {Sign::NEG, -145, MType({0x5888873333c57c18, 0xffffc00015554d55})}, - {Sign::NEG, -145, MType({0xfa51421842311c42, 0xefffc7c01193f9d1})}, - {Sign::NEG, -145, MType({0x2c4ed6de475b942c, 0xdfffcf000e4aa5fa})}, - {Sign::NEG, -145, MType({0xce77678cbb6fcb88, 0xcfffd5c00b7151d8})}, - {Sign::NEG, -145, MType({0xc26629a679ed3b, 0xbfffdc0008fffd78})}, - {Sign::NEG, -145, MType({0x23287cb9d3072728, 0xafffe1c006eea8e1})}, - {Sign::NEG, -145, MType({0xd5a37540fd057315, 0x9fffe7000535541c})}, - {Sign::NEG, -145, MType({0xf82e21c1fce36810, 0x8fffebc003cbff32})}, - {Sign::NEG, -146, MType({0x5588887ddde02702, 0xffffe00005555455})}, - {Sign::NEG, -146, MType({0x9ac4ed72adf5b295, 0xdfffe7800392aa14})}, - {Sign::NEG, -146, MType({0xc26648066b482, 0xbfffee00023fffaf})}, - {Sign::NEG, -146, MType({0x455a3754b292c077, 0x9ffff380014d552e})}, - {Sign::NEG, -147, MType({0x5558888833333c58, 0xfffff00001555535})}, - {Sign::NEG, -147, MType({0xe000c2665736679f, 0xbffff700008ffff5})}, - {Sign::NEG, -148, MType({0x5555888885ddde02, 0xfffff80000555551})}, - {Sign::NEG, -149, MType({0xd555588888733334, 0xfffffc0000155554})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -148, MType({0xeaaaac44444eeeef, 0x80000200000aaaaa})}, - {Sign::POS, -147, MType({0xaaaac444459999ac, 0x80000400002aaaac})}, - {Sign::POS, -147, MType({0x2000c2667596679f, 0xc00009000090000a})}, - {Sign::POS, -146, MType({0xaaac44446eeef381, 0x8000080000aaaaba})}, - {Sign::POS, -146, MType({0x655a3755f81815cc, 0xa0000c80014d557c})}, - {Sign::POS, -146, MType({0xc26684c66b482, 0xc000120002400051})}, - {Sign::POS, -146, MType({0xbac4ed7c40fb07eb, 0xe00018800392ab40})}, - {Sign::POS, -145, MType({0xaac44449999abe2c, 0x8000100002aaab2a})}, - {Sign::POS, -145, MType({0x82e21d79cbb6812, 0x9000144003cc00cd})}, - {Sign::POS, -145, MType({0xd5a37569adb01dc3, 0xa00019000535568d})}, - {Sign::POS, -145, MType({0x33287d01e8c9d1d9, 0xb0001e4006eeac74})}, - {Sign::POS, -145, MType({0xc266a32679ed48, 0xc000240009000288})}, - {Sign::POS, -145, MType({0xde77685122b2764b, 0xd0002a400b7158d1})}, - {Sign::POS, -145, MType({0x2c4ed810a8063f03, 0xe00031000e4aaf5b})}, - {Sign::POS, -145, MType({0xa5143e7be891c8f, 0xf00038401194062e})}, - {Sign::POS, -144, MType({0xac4444eeef3813a1, 0x800020000aaaaeaa})}, - {Sign::POS, -144, MType({0x5b7ff7fe1339025b, 0x880024200ccb5a6e})}, - {Sign::POS, -144, MType({0x42e21e26caf39e33, 0x900028800f300668})}, - {Sign::POS, -144, MType({0xf271e66fa5554bc6, 0x98002d2011dcb29e})}, - {Sign::POS, -144, MType({0x5a3757e0615cc676, 0xa000320014d55f19})}, - {Sign::POS, -144, MType({0xca3b5d8210ca5cab, 0xa8003720181e0bde})}, - {Sign::POS, -144, MType({0xf287d25f3cb032bb, 0xb0003c801bbab8f6})}, - {Sign::POS, -144, MType({0xe3278d840be28cdb, 0xb80042201faf6669})}, - {Sign::POS, -144, MType({0xc266dfe6b482076, 0xc000480024001440})}, - {Sign::POS, -144, MType({0x3d9166de380a6d3d, 0xc8004e2028b0c282})}, - {Sign::POS, -144, MType({0xa7768b356ba61e4b, 0xd00054802dc57139})}, - {Sign::POS, -144, MType({0xd9e51a1849db73c1, 0xd8005b203342206f})}, - {Sign::POS, -144, MType({0xc4ed8a9d907eb521, 0xe0006200392ad02e})}, - {Sign::POS, -144, MType({0xb8a197dea928acd7, 0xe80069203f838080})}, - {Sign::POS, -144, MType({0x65144cf7dcc72d3b, 0xf000708046503170})}, - {Sign::POS, -144, MType({0xda5a1108890d9f6a, 0xf80078204d94e308})}, - {Sign::POS, -143, MType({0xc4445999abe2ce2c, 0x800040002aaacaaa})}, - {Sign::POS, -143, MType({0x1fdbbb4f3bffc832, 0x840044102ecb2431})}, - {Sign::POS, -143, MType({0x97ff8f39ec91b4ee, 0x88004840332d7e1d})}, - {Sign::POS, -143, MType({0x74bcfcf0b3f0a95d, 0x8c004c9037d3d876})}, - {Sign::POS, -143, MType({0x2e21f80ca6813aff, 0x900051003cc03342})}, - {Sign::POS, -143, MType({0x6c3d4629170ce87f, 0x9400559041f48e87})}, - {Sign::POS, -143, MType({0x71e84e3b80a8881, 0x98005a404772ea4d})}, - {Sign::POS, -143, MType({0x6d62fdcbdd6bec3, 0x9c005f104d3d469a})}, - {Sign::POS, -143, MType({0xa375a6b701dc77c0, 0xa00064005355a375})}, - {Sign::POS, -143, MType({0x450f331826ad6b05, 0xa400691059be00e7})}, - {Sign::POS, -143, MType({0x83b60ea8bd0aa459, 0xa8006e4060785ef6})}, - {Sign::POS, -143, MType({0x277e691469dd13f5, 0xac0073906786bdab})}, - {Sign::POS, -143, MType({0x287d6e0a0d1e25eb, 0xb00079006eeb1d0d})}, - {Sign::POS, -143, MType({0xaec94b3be9b060f5, 0xb4007e9076a77d24})}, - {Sign::POS, -143, MType({0x1279365fce280cce, 0xb80084407ebdddfa})}, - {Sign::POS, -143, MType({0xdba5732f3e83e04a, 0xbc008a1087303f95})}, - {Sign::POS, -143, MType({0xc26759679ed5b754, 0xc00090009000a200})}, - {Sign::POS, -143, MType({0xaed95aca5edb5109, 0xc400961099310543})}, - {Sign::POS, -143, MType({0xb917091d2687160f, 0xc8009c40a2c36967})}, - {Sign::POS, -143, MType({0x293d1c2a0378e75d, 0xcc00a290acb9ce76})}, - {Sign::POS, -143, MType({0x776977bf9766f5a7, 0xd000a900b7163478})}, - {Sign::POS, -143, MType({0x4bbb31b14776a18b, 0xd400af90c1da9b78})}, - {Sign::POS, -143, MType({0x7e5297d76c8564ba, 0xd800b640cd09037f})}, - {Sign::POS, -143, MType({0x1751360f8461c447, 0xdc00bd10d8a36c98})}, - {Sign::POS, -143, MType({0x4ed9dc3c63f44c41, 0xe000c400e4abd6cc})}, - {Sign::POS, -143, MType({0x8d10a4466a5894d5, 0xe400cb10f1244226})}, - {Sign::POS, -143, MType({0x6a1af81bb4e6510e, 0xe800d240fe0eaeb1})}, - {Sign::POS, -143, MType({0xae1f97b0542a677a, 0xec00d9910b6d1c77})}, - {Sign::POS, -143, MType({0x51469efe81d014cc, 0xf000e10119418b84})}, - {Sign::POS, -143, MType({0x7bb98c06d77a18b4, 0xf400e891278dfbe2})}, - {Sign::POS, -143, MType({0x85a344d0868bed17, 0xf800f04136546d9d})}, - {Sign::POS, -143, MType({0xf7301d6990e307cc, 0xfc00f8114596e0c0})}, - {Sign::POS, -142, MType({0x4446eef38140138f, 0x80008000aaabaaac})}, - {Sign::POS, -142, MType({0x10f5e43296105497, 0x82008408b2cbe5b8})}, - {Sign::POS, -142, MType({0xedbd4f83ef63f730, 0x84008820bb2d2189})}, - {Sign::POS, -142, MType({0xfeb654fd541c638e, 0x86008c48c3d05e27})}, - {Sign::POS, -142, MType({0x7ffadeb8882f7674, 0x88009080ccb69b98})}, - {Sign::POS, -142, MType({0xc5a59fd36bd44397, 0x8a0094c8d5e0d9e1})}, + {Sign::NEG, -142, 0x89ff6b38'd5de2622'e39d3faf'42340ed7_u128}, + {Sign::NEG, -142, 0x87ff6f80'ccb40f16'7ff33266'82c02485_u128}, + {Sign::NEG, -142, 0x85ff73b8'c3cdf731'5caf4fbe'343cf928_u128}, + {Sign::NEG, -142, 0x83ff77e0'bb2ade79'cdb6e554'348f7fe8_u128}, + {Sign::NEG, -142, 0x81ff7bf8'b2c9c4f6'0ef009c2'457de25d_u128}, + {Sign::NEG, -143, 0xffff0001'55535558'8883333c'57b57c74_u128}, + {Sign::NEG, -143, 0xfbff07f1'45931f44'f32668f3'9c70d183_u128}, + {Sign::NEG, -143, 0xf7ff0fc1'3650e7bd'459a73c6'a6486fe3_u128}, + {Sign::NEG, -143, 0xf3ff1771'278aaecd'37b18cca'7dd3a29f_u128}, + {Sign::NEG, -143, 0xefff1f01'193e7480'513f610d'21bcfc78_u128}, + {Sign::NEG, -143, 0xebff2671'0b6a38e1'ea190b95'c0690b7b_u128}, + {Sign::NEG, -143, 0xe7ff2dc0'fe0bfbfd'2a150f64'f0ad1743_u128}, + {Sign::NEG, -143, 0xe3ff34f0'f121bddd'090b5174'e995e9d1_u128}, + {Sign::NEG, -143, 0xdfff3c00'e4a97e8c'4ed512b9'b93ea2bf_u128}, + {Sign::NEG, -143, 0xdbff42f0'd8a13e15'934cea21'7ab794a2_u128}, + {Sign::NEG, -143, 0xd7ff49c0'cd06fc83'3e4ebe94'8afd2c76_u128}, + {Sign::NEG, -143, 0xd3ff5070'c1d8b9df'87b7c0f5'bcfee2e1_u128}, + {Sign::NEG, -143, 0xcfff5700'b7147634'77666622'8cb6371b_u128}, + {Sign::NEG, -143, 0xcbff5d70'acb8318b'e53a60f3'514db358_u128}, + {Sign::NEG, -143, 0xc7ff63c0'a2c1ebef'79149c3b'6e57fa86_u128}, + {Sign::NEG, -143, 0xc3ff69f0'992fa568'aad734c9'8416df2a_u128}, + {Sign::NEG, -143, 0xbfff7000'8fff5e00'c2657367'9ed28334_u128}, + {Sign::NEG, -143, 0xbbff75f0'872f15c0'd7a3c6db'6540809f_u128}, + {Sign::NEG, -143, 0xb7ff7bc0'7ebcccb1'd277bde6'45fb1aad_u128}, + {Sign::NEG, -143, 0xb3ff8170'76a682dc'6ac80145'a4087793_u128}, + {Sign::NEG, -143, 0xafff8700'6eea3849'287c4db3'0271e265_u128}, + {Sign::NEG, -143, 0xabff8c70'6785ed00'637d6de4'2eeb151e_u128}, + {Sign::NEG, -143, 0xa7ff91c0'6077a10a'43b5348b'6b898a8c_u128}, + {Sign::NEG, -143, 0xa3ff96f0'59bd546e'c10e7657'978bd7f6_u128}, + {Sign::NEG, -143, 0x9fff9c00'53550735'a37503f4'57310e59_u128}, + {Sign::NEG, -143, 0x9bffa0f0'4d3cb966'82d5a40a'3aa022ff_u128}, + {Sign::NEG, -143, 0x97ffa5c0'47726b08'c71e0d3e'e3df5f4d_u128}, + {Sign::NEG, -143, 0x93ffaa70'41f41c23'a83ce035'2bdbd79b_u128}, + {Sign::NEG, -143, 0x8fffaf00'3cbfccbe'2e21a18d'4680e8e4_u128}, + {Sign::NEG, -143, 0x8bffb370'37d37cdf'30bcb3e4'e5dfbd28_u128}, + {Sign::NEG, -143, 0x87ffb7c0'332d2c8d'57ff51d7'5c66d64a_u128}, + {Sign::NEG, -143, 0x83ffbbf0'2ecadbcf'1bdb87fd'be299f43_u128}, + {Sign::NEG, -144, 0xffff8000'55551555'88885dde'02700703_u128}, + {Sign::NEG, -144, 0xf7ff87e0'4d94724c'd259ca80'3a0c1870_u128}, + {Sign::NEG, -144, 0xefff8f80'464fce8f'e5141308'51c7070a_u128}, + {Sign::NEG, -144, 0xe7ff96e0'3f832a2a'30a16898'f3073a64_u128}, + {Sign::NEG, -144, 0xdfff9e00'392a8526'c4ed6451'7b2949ce_u128}, + {Sign::NEG, -144, 0xd7ffa4e0'3341df90'51e4fb4e'32cf6350_u128}, + {Sign::NEG, -144, 0xcfffab80'2dc53971'277672a8'8350bcce_u128}, + {Sign::NEG, -144, 0xc7ffb1e0'28b092d3'35915377'2a490f06_u128}, + {Sign::NEG, -144, 0xbfffb800'23ffebc0'0c265ece'6b481a0e_u128}, + {Sign::NEG, -144, 0xb7ffbde0'1faf4440'db2781c0'3fa132f6_u128}, + {Sign::NEG, -144, 0xafffc380'1bba9c5e'7287c95c'845ada33_u128}, + {Sign::NEG, -144, 0xa7ffc8e0'181df421'423b56b1'263e5a77_u128}, + {Sign::NEG, -144, 0x9fffce00'14d54b91'5a3752ca'4c076fa3_u128}, + {Sign::NEG, -144, 0x97ffd2e0'11dca2b6'6a71e2b2'7eb3f573_u128}, + {Sign::NEG, -144, 0x8fffd780'0f2ff997'c2e21b72'cff39d8f_u128}, + {Sign::NEG, -144, 0x87ffdbe0'0ccb503c'537ff612'feb7ac9e_u128}, + {Sign::NEG, -145, 0xffffc000'15554d55'58888733'33c57c18_u128}, + {Sign::NEG, -145, 0xefffc7c0'1193f9d1'fa514218'42311c42_u128}, + {Sign::NEG, -145, 0xdfffcf00'0e4aa5fa'2c4ed6de'475b942c_u128}, + {Sign::NEG, -145, 0xcfffd5c0'0b7151d8'ce77678c'bb6fcb88_u128}, + {Sign::NEG, -145, 0xbfffdc00'08fffd78'00c26629'a679ed3b_u128}, + {Sign::NEG, -145, 0xafffe1c0'06eea8e1'23287cb9'd3072728_u128}, + {Sign::NEG, -145, 0x9fffe700'0535541c'd5a37540'fd057315_u128}, + {Sign::NEG, -145, 0x8fffebc0'03cbff32'f82e21c1'fce36810_u128}, + {Sign::NEG, -146, 0xffffe000'05555455'5588887d'dde02702_u128}, + {Sign::NEG, -146, 0xdfffe780'0392aa14'9ac4ed72'adf5b295_u128}, + {Sign::NEG, -146, 0xbfffee00'023fffaf'000c2664'8066b482_u128}, + {Sign::NEG, -146, 0x9ffff380'014d552e'455a3754'b292c077_u128}, + {Sign::NEG, -147, 0xfffff000'01555535'55588888'33333c58_u128}, + {Sign::NEG, -147, 0xbffff700'008ffff5'e000c266'5736679f_u128}, + {Sign::NEG, -148, 0xfffff800'00555551'55558888'85ddde02_u128}, + {Sign::NEG, -149, 0xfffffc00'00155554'd5555888'88733334_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -148, 0x80000200'000aaaaa'eaaaac44'444eeeef_u128}, + {Sign::POS, -147, 0x80000400'002aaaac'aaaac444'459999ac_u128}, + {Sign::POS, -147, 0xc0000900'0090000a'2000c266'7596679f_u128}, + {Sign::POS, -146, 0x80000800'00aaaaba'aaac4444'6eeef381_u128}, + {Sign::POS, -146, 0xa0000c80'014d557c'655a3755'f81815cc_u128}, + {Sign::POS, -146, 0xc0001200'02400051'000c2668'4c66b482_u128}, + {Sign::POS, -146, 0xe0001880'0392ab40'bac4ed7c'40fb07eb_u128}, + {Sign::POS, -145, 0x80001000'02aaab2a'aac44449'999abe2c_u128}, + {Sign::POS, -145, 0x90001440'03cc00cd'082e21d7'9cbb6812_u128}, + {Sign::POS, -145, 0xa0001900'0535568d'd5a37569'adb01dc3_u128}, + {Sign::POS, -145, 0xb0001e40'06eeac74'33287d01'e8c9d1d9_u128}, + {Sign::POS, -145, 0xc0002400'09000288'00c266a3'2679ed48_u128}, + {Sign::POS, -145, 0xd0002a40'0b7158d1'de776851'22b2764b_u128}, + {Sign::POS, -145, 0xe0003100'0e4aaf5b'2c4ed810'a8063f03_u128}, + {Sign::POS, -145, 0xf0003840'1194062e'0a5143e7'be891c8f_u128}, + {Sign::POS, -144, 0x80002000'0aaaaeaa'ac4444ee'ef3813a1_u128}, + {Sign::POS, -144, 0x88002420'0ccb5a6e'5b7ff7fe'1339025b_u128}, + {Sign::POS, -144, 0x90002880'0f300668'42e21e26'caf39e33_u128}, + {Sign::POS, -144, 0x98002d20'11dcb29e'f271e66f'a5554bc6_u128}, + {Sign::POS, -144, 0xa0003200'14d55f19'5a3757e0'615cc676_u128}, + {Sign::POS, -144, 0xa8003720'181e0bde'ca3b5d82'10ca5cab_u128}, + {Sign::POS, -144, 0xb0003c80'1bbab8f6'f287d25f'3cb032bb_u128}, + {Sign::POS, -144, 0xb8004220'1faf6669'e3278d84'0be28cdb_u128}, + {Sign::POS, -144, 0xc0004800'24001440'0c266dfe'6b482076_u128}, + {Sign::POS, -144, 0xc8004e20'28b0c282'3d9166de'380a6d3d_u128}, + {Sign::POS, -144, 0xd0005480'2dc57139'a7768b35'6ba61e4b_u128}, + {Sign::POS, -144, 0xd8005b20'3342206f'd9e51a18'49db73c1_u128}, + {Sign::POS, -144, 0xe0006200'392ad02e'c4ed8a9d'907eb521_u128}, + {Sign::POS, -144, 0xe8006920'3f838080'b8a197de'a928acd7_u128}, + {Sign::POS, -144, 0xf0007080'46503170'65144cf7'dcc72d3b_u128}, + {Sign::POS, -144, 0xf8007820'4d94e308'da5a1108'890d9f6a_u128}, + {Sign::POS, -143, 0x80004000'2aaacaaa'c4445999'abe2ce2c_u128}, + {Sign::POS, -143, 0x84004410'2ecb2431'1fdbbb4f'3bffc832_u128}, + {Sign::POS, -143, 0x88004840'332d7e1d'97ff8f39'ec91b4ee_u128}, + {Sign::POS, -143, 0x8c004c90'37d3d876'74bcfcf0'b3f0a95d_u128}, + {Sign::POS, -143, 0x90005100'3cc03342'2e21f80c'a6813aff_u128}, + {Sign::POS, -143, 0x94005590'41f48e87'6c3d4629'170ce87f_u128}, + {Sign::POS, -143, 0x98005a40'4772ea4d'071e84e3'b80a8881_u128}, + {Sign::POS, -143, 0x9c005f10'4d3d469a'06d62fdc'bdd6bec3_u128}, + {Sign::POS, -143, 0xa0006400'5355a375'a375a6b7'01dc77c0_u128}, + {Sign::POS, -143, 0xa4006910'59be00e7'450f3318'26ad6b05_u128}, + {Sign::POS, -143, 0xa8006e40'60785ef6'83b60ea8'bd0aa459_u128}, + {Sign::POS, -143, 0xac007390'6786bdab'277e6914'69dd13f5_u128}, + {Sign::POS, -143, 0xb0007900'6eeb1d0d'287d6e0a'0d1e25eb_u128}, + {Sign::POS, -143, 0xb4007e90'76a77d24'aec94b3b'e9b060f5_u128}, + {Sign::POS, -143, 0xb8008440'7ebdddfa'1279365f'ce280cce_u128}, + {Sign::POS, -143, 0xbc008a10'87303f95'dba5732f'3e83e04a_u128}, + {Sign::POS, -143, 0xc0009000'9000a200'c2675967'9ed5b754_u128}, + {Sign::POS, -143, 0xc4009610'99310543'aed95aca'5edb5109_u128}, + {Sign::POS, -143, 0xc8009c40'a2c36967'b917091d'2687160f_u128}, + {Sign::POS, -143, 0xcc00a290'acb9ce76'293d1c2a'0378e75d_u128}, + {Sign::POS, -143, 0xd000a900'b7163478'776977bf'9766f5a7_u128}, + {Sign::POS, -143, 0xd400af90'c1da9b78'4bbb31b1'4776a18b_u128}, + {Sign::POS, -143, 0xd800b640'cd09037f'7e5297d7'6c8564ba_u128}, + {Sign::POS, -143, 0xdc00bd10'd8a36c98'1751360f'8461c447_u128}, + {Sign::POS, -143, 0xe000c400'e4abd6cc'4ed9dc3c'63f44c41_u128}, + {Sign::POS, -143, 0xe400cb10'f1244226'8d10a446'6a5894d5_u128}, + {Sign::POS, -143, 0xe800d240'fe0eaeb1'6a1af81b'b4e6510e_u128}, + {Sign::POS, -143, 0xec00d991'0b6d1c77'ae1f97b0'542a677a_u128}, + {Sign::POS, -143, 0xf000e101'19418b84'51469efe'81d014cc_u128}, + {Sign::POS, -143, 0xf400e891'278dfbe2'7bb98c06'd77a18b4_u128}, + {Sign::POS, -143, 0xf800f041'36546d9d'85a344d0'868bed17_u128}, + {Sign::POS, -143, 0xfc00f811'4596e0c0'f7301d69'90e307cc_u128}, + {Sign::POS, -142, 0x80008000'aaabaaac'4446eef3'8140138f_u128}, + {Sign::POS, -142, 0x82008408'b2cbe5b8'10f5e432'96105497_u128}, + {Sign::POS, -142, 0x84008820'bb2d2189'edbd4f83'ef63f730_u128}, + {Sign::POS, -142, 0x86008c48'c3d05e27'feb654fd'541c638e_u128}, + {Sign::POS, -142, 0x88009080'ccb69b98'7ffadeb8'882f7674_u128}, + {Sign::POS, -142, 0x8a0094c8'd5e0d9e1'c5a59fd3'6bd44397_u128}, }; // Minimax polynomial generated by Sollya with: @@ -810,10 +814,10 @@ const Float128 LOG_R3[139] = { // > dirtyinfnorm(log(1 + x)/x - 1 - x*P, [-0x1.01928p-22 , 0x1p-22]); // 0x1.ce1e...p-116 const Float128 BIG_COEFFS[4]{ - {Sign::POS, -130, MType({0x7ed78465d460315b, 0xccccccd74818e397})}, - {Sign::NEG, -129, MType({0xc6388a23871ce156, 0x80000000000478b0})}, - {Sign::POS, -129, MType({0xaa807bd867763262, 0xaaaaaaaaaaaaaaaa})}, - {Sign::NEG, -128, MType({0x0, 0x8000000000000000})}, + {Sign::POS, -130, 0xccccccd7'4818e397'7ed78465'd460315b_u128}, + {Sign::NEG, -129, 0x80000000'000478b0'c6388a23'871ce156_u128}, + {Sign::POS, -129, 0xaaaaaaaa'aaaaaaaa'aa807bd8'67763262_u128}, + {Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128}, }; LIBC_INLINE double log1p_accurate(int e_x, int index, diff --git a/libc/src/math/generic/log2.cpp b/libc/src/math/generic/log2.cpp index ab392166475c70..9657b344ffbd7c 100644 --- a/libc/src/math/generic/log2.cpp +++ b/libc/src/math/generic/log2.cpp @@ -14,6 +14,7 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" +#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "common_constants.h" @@ -23,8 +24,8 @@ namespace LIBC_NAMESPACE { // 128-bit precision dyadic floating point numbers. using Float128 = typename fputil::DyadicFloat<128>; -using MType = typename Float128::MantissaType; using Sign = fputil::Sign; +using LIBC_NAMESPACE::operator""_u128; namespace { @@ -167,142 +168,144 @@ const fputil::DoubleDouble LOG_R1[128] = { const LogRR LOG2_TABLE = { // -log2(r) with 128-bit precision generated by SageMath with: - // + // def format_hex(value): + // l = hex(value)[2:] + // n = 8 + // x = [l[i:i + n] for i in range(0, len(l), n)] + // return "0x" + "'".join(x) + "_uint128" // for i in range(1, 127): // r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^(-7)) ); // s, m, e = RealField(128)(r).log2().sign_mantissa_exponent(); - // print("{Sign::POS,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) - // % 2^64), - // "})},"); + // print("{Sign::POS,", e, ", format_hex(m), "},"); /* .step_1 = */ { - {Sign::POS, 0, MType(0)}, - {Sign::POS, -134, MType({0xe8c251630adb856a, 0xb963dd107b993ada})}, - {Sign::POS, -133, MType({0xa41b08fbe05f82d0, 0xba1f7430f9aab1b2})}, - {Sign::POS, -132, MType({0x1f06c085bc1b865d, 0x8c25c7262b57c149})}, - {Sign::POS, -132, MType({0x2e1c07f0438ebac0, 0xbb9ca64ecac6aaef})}, - {Sign::POS, -132, MType({0xaacc0e21d6541224, 0xeb75e8f8ff5ff022})}, - {Sign::POS, -131, MType({0x31514aef39ce6303, 0x8dd9953002a4e866})}, - {Sign::POS, -131, MType({0x50799beaaab2940c, 0xa62b07f3457c4070})}, - {Sign::POS, -131, MType({0xda288fc615a727dc, 0xbeb024b67dda6339})}, - {Sign::POS, -131, MType({0x22dbbaced44516ce, 0xcb0657cd5dbe4f6f})}, - {Sign::POS, -131, MType({0xd939dceecdd9ce05, 0xe3da945b878e27d0})}, - {Sign::POS, -131, MType({0x9596a8e2e84c8f45, 0xfce4aee0e88b2749})}, - {Sign::POS, -130, MType({0x243efd9325954cfe, 0x84bf1c673032495d})}, - {Sign::POS, -130, MType({0x91d79938e7226384, 0x916d6e1559a4b696})}, - {Sign::POS, -130, MType({0x22563c9ed9462091, 0x9e37db2866f2850b})}, - {Sign::POS, -130, MType({0x3a53ca1181015ada, 0xa4a7c31dc6f9a5d5})}, - {Sign::POS, -130, MType({0x3eb8023eed65d601, 0xb19d45fa1be70855})}, - {Sign::POS, -130, MType({0xce5cabbd2d753d9b, 0xb823018e3cfc25f0})}, - {Sign::POS, -130, MType({0x54dbf16fb0695ee3, 0xc544c055fde99333})}, - {Sign::POS, -130, MType({0x5196a85a067c6739, 0xcbe0e589e3f6042d})}, - {Sign::POS, -130, MType({0xf349845e48955078, 0xd930124bea9a2c66})}, - {Sign::POS, -130, MType({0x815ef705cfaef035, 0xdfe33d3fffa66037})}, - {Sign::POS, -130, MType({0x2ba704dcaa76f41d, 0xed61169f220e97f2})}, - {Sign::POS, -130, MType({0x2062f36bc14d0d93, 0xf42be9e9b09b3def})}, - {Sign::POS, -129, MType({0x132880194144b02b, 0x80ecdde7d30ea2ed})}, - {Sign::POS, -129, MType({0x54880de63812fd49, 0x845e706cafd1bf61})}, - {Sign::POS, -129, MType({0xa87c02eaf36e2c29, 0x8b4e029b1f8ac391})}, - {Sign::POS, -129, MType({0x9804237ec8d9431d, 0x8ecc164ea93841ae})}, - {Sign::POS, -129, MType({0x20f81ca95d9e7968, 0x924e69589e6b6268})}, - {Sign::POS, -129, MType({0x124bc6f1acf95dc4, 0x995ff71b8773432d})}, - {Sign::POS, -129, MType({0x5a5e8e21bff3336b, 0x9cef470aacfb7bf9})}, - {Sign::POS, -129, MType({0x4e53fa3329f65894, 0xa08300be1f651473})}, - {Sign::POS, -129, MType({0x2742d7296a39eed6, 0xa7b7dd96762cc3c7})}, - {Sign::POS, -129, MType({0xf359c5544bc5e134, 0xab591735abc724e4})}, - {Sign::POS, -129, MType({0x6b6c874dd96e1d75, 0xaefee78f75707221})}, - {Sign::POS, -129, MType({0x21006678c0a5c390, 0xb2a95a4cc313bb59})}, - {Sign::POS, -129, MType({0x6d40900b25024b32, 0xb6587b432e47501b})}, - {Sign::POS, -129, MType({0x89e2eb553b279b3d, 0xbdc4f8167955698f})}, - {Sign::POS, -129, MType({0xd58525aad392ca50, 0xc1826c8608fe9951})}, - {Sign::POS, -129, MType({0x54dbf16fb0695ee3, 0xc544c055fde99333})}, - {Sign::POS, -129, MType({0x88d5eae3326327bb, 0xc90c004926e9dbfb})}, - {Sign::POS, -129, MType({0x46dfa05bddfded8c, 0xccd83954b6359379})}, - {Sign::POS, -129, MType({0xbfe9dbebf2e8a45e, 0xd47fcb8c0852f0c0})}, - {Sign::POS, -129, MType({0x7b11f1c5160c515c, 0xd85b3fa7a3407fa8})}, - {Sign::POS, -129, MType({0x1339e5677ec44dd0, 0xdc3be2bd8d837f7f})}, - {Sign::POS, -129, MType({0xea2b8c7bb0ee9c8b, 0xe021c2cf17ed9bdb})}, - {Sign::POS, -129, MType({0xaec562332791fe38, 0xe40cee16a2ff21c4})}, - {Sign::POS, -129, MType({0x71682ebacca79cfa, 0xe7fd7308d6895b14})}, - {Sign::POS, -129, MType({0xa5ad5ce9fb5a7bb6, 0xebf36055e1abc61e})}, - {Sign::POS, -129, MType({0x3225190531a852c5, 0xefeec4eac371584e})}, - {Sign::POS, -129, MType({0xda8ad649da21eab0, 0xf3efaff29c559a77})}, - {Sign::POS, -129, MType({0x4c3e2ea7c15c3d1e, 0xf7f630d808fc2ada})}, - {Sign::POS, -129, MType({0xbcb9bfa9852e0d35, 0xfc02574686680cc6})}, - {Sign::POS, -128, MType({0xce032f41d1e774e8, 0x800a1995f0019518})}, - {Sign::POS, -128, MType({0x9b39ffeebc29372a, 0x8215ea5cd3e4c4c7})}, - {Sign::POS, -128, MType({0x87f95f1befb6f806, 0x8424a6335c777e0b})}, - {Sign::POS, -128, MType({0xb987b42e3bb332a1, 0x8636557862acb7ce})}, - {Sign::POS, -128, MType({0x139a7ba83bf2d136, 0x884b00aef726cec5})}, - {Sign::POS, -128, MType({0x50799beaaab2941, 0x8a62b07f3457c407})}, - {Sign::POS, -128, MType({0x8bd744617e9b7d52, 0x8c7d6db7169e0cda})}, - {Sign::POS, -128, MType({0x46ad444333ceb10, 0x8e9b414b5a92a606})}, - {Sign::POS, -128, MType({0xef4c737fba4f5d66, 0x90bc345861bf3d52})}, - {Sign::POS, -128, MType({0xae441c09d761c549, 0x92e050231df57d6f})}, - {Sign::POS, -128, MType({0x6e36aa9ce90a3879, 0x95079e1a0382dc79})}, - {Sign::POS, -128, MType({0xefca1a184e93809, 0x973227d6027ebd8a})}, - {Sign::POS, -128, MType({0xefca1a184e93809, 0x973227d6027ebd8a})}, - {Sign::POS, -128, MType({0x124bc6f1acf95dc4, 0x995ff71b8773432d})}, - {Sign::POS, -128, MType({0x352bea51e58ea9e8, 0x9b9115db83a3dd2d})}, - {Sign::POS, -128, MType({0x266d6cdc959153bc, 0x9dc58e347d37696d})}, - {Sign::POS, -128, MType({0x4527d82c8214ddca, 0x9ffd6a73a78eaf35})}, - {Sign::POS, -128, MType({0x404cabb76d600e3c, 0xa238b5160413106e})}, - {Sign::POS, -128, MType({0x404cabb76d600e3c, 0xa238b5160413106e})}, - {Sign::POS, -128, MType({0xcab7d2ec23f0eef3, 0xa47778c98bcc86a1})}, - {Sign::POS, -128, MType({0x761c48dd859de2d3, 0xa6b9c06e6211646b})}, - {Sign::POS, -128, MType({0x7fd3b7d7e5d148bb, 0xa8ff971810a5e181})}, - {Sign::POS, -128, MType({0xc27c6780d92b4d11, 0xab49080ecda53208})}, - {Sign::POS, -128, MType({0xdb502402c94092cd, 0xad961ed0cb91d406})}, - {Sign::POS, -128, MType({0xdb502402c94092cd, 0xad961ed0cb91d406})}, - {Sign::POS, -128, MType({0x3432ef6b732b6843, 0xafe6e71393eeda29})}, - {Sign::POS, -128, MType({0xbb324da7e046e792, 0xb23b6cc56cc84c99})}, - {Sign::POS, -128, MType({0xb21709ce430c8e24, 0xb493bc0ec9954243})}, - {Sign::POS, -128, MType({0xb21709ce430c8e24, 0xb493bc0ec9954243})}, - {Sign::POS, -128, MType({0xe91ad16ecff10111, 0xb6efe153c7e319f6})}, - {Sign::POS, -128, MType({0xce31e481cd797e79, 0xb94fe935b83e3eb5})}, - {Sign::POS, -128, MType({0xda3e961a96c580fa, 0xbbb3e094b3d228d3})}, - {Sign::POS, -128, MType({0xda3e961a96c580fa, 0xbbb3e094b3d228d3})}, - {Sign::POS, -128, MType({0xf396598aae91499a, 0xbe1bd4913f3fda43})}, - {Sign::POS, -128, MType({0xae4cceb0f621941b, 0xc087d28dfb2febb8})}, - {Sign::POS, -128, MType({0xae4cceb0f621941b, 0xc087d28dfb2febb8})}, - {Sign::POS, -128, MType({0x6c1855c42078f81b, 0xc2f7e831632b6670})}, - {Sign::POS, -128, MType({0x169535fb8bf577c8, 0xc56c23679b4d206e})}, - {Sign::POS, -128, MType({0x169535fb8bf577c8, 0xc56c23679b4d206e})}, - {Sign::POS, -128, MType({0x3b24cecc60217942, 0xc7e492644d64237e})}, - {Sign::POS, -128, MType({0x3dc2687fcf939696, 0xca6143a49626d820})}, - {Sign::POS, -128, MType({0x3dc2687fcf939696, 0xca6143a49626d820})}, - {Sign::POS, -128, MType({0xa62e6add1a901a0, 0xcce245f1031e41fa})}, - {Sign::POS, -128, MType({0x5bb6e23138ad51e1, 0xcf67a85fa1f89a04})}, - {Sign::POS, -128, MType({0x5bb6e23138ad51e1, 0xcf67a85fa1f89a04})}, - {Sign::POS, -128, MType({0x7fc60a5103092bae, 0xd1f17a5621fb01ac})}, - {Sign::POS, -128, MType({0xbfe9dbebf2e8a45e, 0xd47fcb8c0852f0c0})}, - {Sign::POS, -128, MType({0xbfe9dbebf2e8a45e, 0xd47fcb8c0852f0c0})}, - {Sign::POS, -128, MType({0x8e2d7d378127d823, 0xd712ac0cf811659d})}, - {Sign::POS, -128, MType({0x5c1a7f14b168b365, 0xd9aa2c3b0ea3cbc1})}, - {Sign::POS, -128, MType({0x5c1a7f14b168b365, 0xd9aa2c3b0ea3cbc1})}, - {Sign::POS, -128, MType({0xb7579f0f8d3d514b, 0xdc465cd155a90942})}, - {Sign::POS, -128, MType({0xb7579f0f8d3d514b, 0xdc465cd155a90942})}, - {Sign::POS, -128, MType({0xb087205eb55aea85, 0xdee74ee64b0c38d3})}, - {Sign::POS, -128, MType({0x424a2623d60dfb16, 0xe18d13ee805a4de3})}, - {Sign::POS, -128, MType({0x424a2623d60dfb16, 0xe18d13ee805a4de3})}, - {Sign::POS, -128, MType({0x4d3a591ae6854787, 0xe437bdbf5254459c})}, - {Sign::POS, -128, MType({0x4d3a591ae6854787, 0xe437bdbf5254459c})}, - {Sign::POS, -128, MType({0x8dcdb6b24c5c5cdf, 0xe6e75e91b9cca551})}, - {Sign::POS, -128, MType({0x33ac7d9ebba8a53c, 0xe99c090536ece983})}, - {Sign::POS, -128, MType({0x33ac7d9ebba8a53c, 0xe99c090536ece983})}, - {Sign::POS, -128, MType({0xfb2eede4b59d8959, 0xec55d022d80e3d27})}, - {Sign::POS, -128, MType({0xfb2eede4b59d8959, 0xec55d022d80e3d27})}, - {Sign::POS, -128, MType({0x308b454666de8f99, 0xef14c7605d60654c})}, - {Sign::POS, -128, MType({0x308b454666de8f99, 0xef14c7605d60654c})}, - {Sign::POS, -128, MType({0x8383cb0ce23bebd4, 0xf1d902a37aaa5085})}, - {Sign::POS, -128, MType({0x8383cb0ce23bebd4, 0xf1d902a37aaa5085})}, - {Sign::POS, -128, MType({0x64fc87b4a41f7b70, 0xf4a2964538813c67})}, - {Sign::POS, -128, MType({0x64fc87b4a41f7b70, 0xf4a2964538813c67})}, - {Sign::POS, -128, MType({0x3f5d7d82b65c5686, 0xf77197157665f689})}, - {Sign::POS, -128, MType({0x3f5d7d82b65c5686, 0xf77197157665f689})}, - {Sign::POS, -128, MType({0x6476077b9fbd41ae, 0xfa461a5e8f4b759d})}, - {Sign::POS, -128, MType({0x6476077b9fbd41ae, 0xfa461a5e8f4b759d})}, - {Sign::POS, -128, MType({0xe3909ffd0d61778, 0xfd2035e9221ef5d0})}, - {Sign::POS, 0, MType(0)}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -134, 0xb963dd10'7b993ada'e8c25163'0adb856a_u128}, + {Sign::POS, -133, 0xba1f7430'f9aab1b2'a41b08fb'e05f82d0_u128}, + {Sign::POS, -132, 0x8c25c726'2b57c149'1f06c085'bc1b865d_u128}, + {Sign::POS, -132, 0xbb9ca64e'cac6aaef'2e1c07f0'438ebac0_u128}, + {Sign::POS, -132, 0xeb75e8f8'ff5ff022'aacc0e21'd6541224_u128}, + {Sign::POS, -131, 0x8dd99530'02a4e866'31514aef'39ce6303_u128}, + {Sign::POS, -131, 0xa62b07f3'457c4070'50799bea'aab2940c_u128}, + {Sign::POS, -131, 0xbeb024b6'7dda6339'da288fc6'15a727dc_u128}, + {Sign::POS, -131, 0xcb0657cd'5dbe4f6f'22dbbace'd44516ce_u128}, + {Sign::POS, -131, 0xe3da945b'878e27d0'd939dcee'cdd9ce05_u128}, + {Sign::POS, -131, 0xfce4aee0'e88b2749'9596a8e2'e84c8f45_u128}, + {Sign::POS, -130, 0x84bf1c67'3032495d'243efd93'25954cfe_u128}, + {Sign::POS, -130, 0x916d6e15'59a4b696'91d79938'e7226384_u128}, + {Sign::POS, -130, 0x9e37db28'66f2850b'22563c9e'd9462091_u128}, + {Sign::POS, -130, 0xa4a7c31d'c6f9a5d5'3a53ca11'81015ada_u128}, + {Sign::POS, -130, 0xb19d45fa'1be70855'3eb8023e'ed65d601_u128}, + {Sign::POS, -130, 0xb823018e'3cfc25f0'ce5cabbd'2d753d9b_u128}, + {Sign::POS, -130, 0xc544c055'fde99333'54dbf16f'b0695ee3_u128}, + {Sign::POS, -130, 0xcbe0e589'e3f6042d'5196a85a'067c6739_u128}, + {Sign::POS, -130, 0xd930124b'ea9a2c66'f349845e'48955078_u128}, + {Sign::POS, -130, 0xdfe33d3f'ffa66037'815ef705'cfaef035_u128}, + {Sign::POS, -130, 0xed61169f'220e97f2'2ba704dc'aa76f41d_u128}, + {Sign::POS, -130, 0xf42be9e9'b09b3def'2062f36b'c14d0d93_u128}, + {Sign::POS, -129, 0x80ecdde7'd30ea2ed'13288019'4144b02b_u128}, + {Sign::POS, -129, 0x845e706c'afd1bf61'54880de6'3812fd49_u128}, + {Sign::POS, -129, 0x8b4e029b'1f8ac391'a87c02ea'f36e2c29_u128}, + {Sign::POS, -129, 0x8ecc164e'a93841ae'9804237e'c8d9431d_u128}, + {Sign::POS, -129, 0x924e6958'9e6b6268'20f81ca9'5d9e7968_u128}, + {Sign::POS, -129, 0x995ff71b'8773432d'124bc6f1'acf95dc4_u128}, + {Sign::POS, -129, 0x9cef470a'acfb7bf9'5a5e8e21'bff3336b_u128}, + {Sign::POS, -129, 0xa08300be'1f651473'4e53fa33'29f65894_u128}, + {Sign::POS, -129, 0xa7b7dd96'762cc3c7'2742d729'6a39eed6_u128}, + {Sign::POS, -129, 0xab591735'abc724e4'f359c554'4bc5e134_u128}, + {Sign::POS, -129, 0xaefee78f'75707221'6b6c874d'd96e1d75_u128}, + {Sign::POS, -129, 0xb2a95a4c'c313bb59'21006678'c0a5c390_u128}, + {Sign::POS, -129, 0xb6587b43'2e47501b'6d40900b'25024b32_u128}, + {Sign::POS, -129, 0xbdc4f816'7955698f'89e2eb55'3b279b3d_u128}, + {Sign::POS, -129, 0xc1826c86'08fe9951'd58525aa'd392ca50_u128}, + {Sign::POS, -129, 0xc544c055'fde99333'54dbf16f'b0695ee3_u128}, + {Sign::POS, -129, 0xc90c0049'26e9dbfb'88d5eae3'326327bb_u128}, + {Sign::POS, -129, 0xccd83954'b6359379'46dfa05b'ddfded8c_u128}, + {Sign::POS, -129, 0xd47fcb8c'0852f0c0'bfe9dbeb'f2e8a45e_u128}, + {Sign::POS, -129, 0xd85b3fa7'a3407fa8'7b11f1c5'160c515c_u128}, + {Sign::POS, -129, 0xdc3be2bd'8d837f7f'1339e567'7ec44dd0_u128}, + {Sign::POS, -129, 0xe021c2cf'17ed9bdb'ea2b8c7b'b0ee9c8b_u128}, + {Sign::POS, -129, 0xe40cee16'a2ff21c4'aec56233'2791fe38_u128}, + {Sign::POS, -129, 0xe7fd7308'd6895b14'71682eba'cca79cfa_u128}, + {Sign::POS, -129, 0xebf36055'e1abc61e'a5ad5ce9'fb5a7bb6_u128}, + {Sign::POS, -129, 0xefeec4ea'c371584e'32251905'31a852c5_u128}, + {Sign::POS, -129, 0xf3efaff2'9c559a77'da8ad649'da21eab0_u128}, + {Sign::POS, -129, 0xf7f630d8'08fc2ada'4c3e2ea7'c15c3d1e_u128}, + {Sign::POS, -129, 0xfc025746'86680cc6'bcb9bfa9'852e0d35_u128}, + {Sign::POS, -128, 0x800a1995'f0019518'ce032f41'd1e774e8_u128}, + {Sign::POS, -128, 0x8215ea5c'd3e4c4c7'9b39ffee'bc29372a_u128}, + {Sign::POS, -128, 0x8424a633'5c777e0b'87f95f1b'efb6f806_u128}, + {Sign::POS, -128, 0x86365578'62acb7ce'b987b42e'3bb332a1_u128}, + {Sign::POS, -128, 0x884b00ae'f726cec5'139a7ba8'3bf2d136_u128}, + {Sign::POS, -128, 0x8a62b07f'3457c407'050799be'aaab2941_u128}, + {Sign::POS, -128, 0x8c7d6db7'169e0cda'8bd74461'7e9b7d52_u128}, + {Sign::POS, -128, 0x8e9b414b'5a92a606'046ad444'333ceb10_u128}, + {Sign::POS, -128, 0x90bc3458'61bf3d52'ef4c737f'ba4f5d66_u128}, + {Sign::POS, -128, 0x92e05023'1df57d6f'ae441c09'd761c549_u128}, + {Sign::POS, -128, 0x95079e1a'0382dc79'6e36aa9c'e90a3879_u128}, + {Sign::POS, -128, 0x973227d6'027ebd8a'0efca1a1'84e93809_u128}, + {Sign::POS, -128, 0x973227d6'027ebd8a'0efca1a1'84e93809_u128}, + {Sign::POS, -128, 0x995ff71b'8773432d'124bc6f1'acf95dc4_u128}, + {Sign::POS, -128, 0x9b9115db'83a3dd2d'352bea51'e58ea9e8_u128}, + {Sign::POS, -128, 0x9dc58e34'7d37696d'266d6cdc'959153bc_u128}, + {Sign::POS, -128, 0x9ffd6a73'a78eaf35'4527d82c'8214ddca_u128}, + {Sign::POS, -128, 0xa238b516'0413106e'404cabb7'6d600e3c_u128}, + {Sign::POS, -128, 0xa238b516'0413106e'404cabb7'6d600e3c_u128}, + {Sign::POS, -128, 0xa47778c9'8bcc86a1'cab7d2ec'23f0eef3_u128}, + {Sign::POS, -128, 0xa6b9c06e'6211646b'761c48dd'859de2d3_u128}, + {Sign::POS, -128, 0xa8ff9718'10a5e181'7fd3b7d7'e5d148bb_u128}, + {Sign::POS, -128, 0xab49080e'cda53208'c27c6780'd92b4d11_u128}, + {Sign::POS, -128, 0xad961ed0'cb91d406'db502402'c94092cd_u128}, + {Sign::POS, -128, 0xad961ed0'cb91d406'db502402'c94092cd_u128}, + {Sign::POS, -128, 0xafe6e713'93eeda29'3432ef6b'732b6843_u128}, + {Sign::POS, -128, 0xb23b6cc5'6cc84c99'bb324da7'e046e792_u128}, + {Sign::POS, -128, 0xb493bc0e'c9954243'b21709ce'430c8e24_u128}, + {Sign::POS, -128, 0xb493bc0e'c9954243'b21709ce'430c8e24_u128}, + {Sign::POS, -128, 0xb6efe153'c7e319f6'e91ad16e'cff10111_u128}, + {Sign::POS, -128, 0xb94fe935'b83e3eb5'ce31e481'cd797e79_u128}, + {Sign::POS, -128, 0xbbb3e094'b3d228d3'da3e961a'96c580fa_u128}, + {Sign::POS, -128, 0xbbb3e094'b3d228d3'da3e961a'96c580fa_u128}, + {Sign::POS, -128, 0xbe1bd491'3f3fda43'f396598a'ae91499a_u128}, + {Sign::POS, -128, 0xc087d28d'fb2febb8'ae4cceb0'f621941b_u128}, + {Sign::POS, -128, 0xc087d28d'fb2febb8'ae4cceb0'f621941b_u128}, + {Sign::POS, -128, 0xc2f7e831'632b6670'6c1855c4'2078f81b_u128}, + {Sign::POS, -128, 0xc56c2367'9b4d206e'169535fb'8bf577c8_u128}, + {Sign::POS, -128, 0xc56c2367'9b4d206e'169535fb'8bf577c8_u128}, + {Sign::POS, -128, 0xc7e49264'4d64237e'3b24cecc'60217942_u128}, + {Sign::POS, -128, 0xca6143a4'9626d820'3dc2687f'cf939696_u128}, + {Sign::POS, -128, 0xca6143a4'9626d820'3dc2687f'cf939696_u128}, + {Sign::POS, -128, 0xcce245f1'031e41fa'0a62e6ad'd1a901a0_u128}, + {Sign::POS, -128, 0xcf67a85f'a1f89a04'5bb6e231'38ad51e1_u128}, + {Sign::POS, -128, 0xcf67a85f'a1f89a04'5bb6e231'38ad51e1_u128}, + {Sign::POS, -128, 0xd1f17a56'21fb01ac'7fc60a51'03092bae_u128}, + {Sign::POS, -128, 0xd47fcb8c'0852f0c0'bfe9dbeb'f2e8a45e_u128}, + {Sign::POS, -128, 0xd47fcb8c'0852f0c0'bfe9dbeb'f2e8a45e_u128}, + {Sign::POS, -128, 0xd712ac0c'f811659d'8e2d7d37'8127d823_u128}, + {Sign::POS, -128, 0xd9aa2c3b'0ea3cbc1'5c1a7f14'b168b365_u128}, + {Sign::POS, -128, 0xd9aa2c3b'0ea3cbc1'5c1a7f14'b168b365_u128}, + {Sign::POS, -128, 0xdc465cd1'55a90942'b7579f0f'8d3d514b_u128}, + {Sign::POS, -128, 0xdc465cd1'55a90942'b7579f0f'8d3d514b_u128}, + {Sign::POS, -128, 0xdee74ee6'4b0c38d3'b087205e'b55aea85_u128}, + {Sign::POS, -128, 0xe18d13ee'805a4de3'424a2623'd60dfb16_u128}, + {Sign::POS, -128, 0xe18d13ee'805a4de3'424a2623'd60dfb16_u128}, + {Sign::POS, -128, 0xe437bdbf'5254459c'4d3a591a'e6854787_u128}, + {Sign::POS, -128, 0xe437bdbf'5254459c'4d3a591a'e6854787_u128}, + {Sign::POS, -128, 0xe6e75e91'b9cca551'8dcdb6b2'4c5c5cdf_u128}, + {Sign::POS, -128, 0xe99c0905'36ece983'33ac7d9e'bba8a53c_u128}, + {Sign::POS, -128, 0xe99c0905'36ece983'33ac7d9e'bba8a53c_u128}, + {Sign::POS, -128, 0xec55d022'd80e3d27'fb2eede4'b59d8959_u128}, + {Sign::POS, -128, 0xec55d022'd80e3d27'fb2eede4'b59d8959_u128}, + {Sign::POS, -128, 0xef14c760'5d60654c'308b4546'66de8f99_u128}, + {Sign::POS, -128, 0xef14c760'5d60654c'308b4546'66de8f99_u128}, + {Sign::POS, -128, 0xf1d902a3'7aaa5085'8383cb0c'e23bebd4_u128}, + {Sign::POS, -128, 0xf1d902a3'7aaa5085'8383cb0c'e23bebd4_u128}, + {Sign::POS, -128, 0xf4a29645'38813c67'64fc87b4'a41f7b70_u128}, + {Sign::POS, -128, 0xf4a29645'38813c67'64fc87b4'a41f7b70_u128}, + {Sign::POS, -128, 0xf7719715'7665f689'3f5d7d82'b65c5686_u128}, + {Sign::POS, -128, 0xf7719715'7665f689'3f5d7d82'b65c5686_u128}, + {Sign::POS, -128, 0xfa461a5e'8f4b759d'6476077b'9fbd41ae_u128}, + {Sign::POS, -128, 0xfa461a5e'8f4b759d'6476077b'9fbd41ae_u128}, + {Sign::POS, -128, 0xfd2035e9'221ef5d0'0e3909ff'd0d61778_u128}, + {Sign::POS, 0, 0_u128}, }, // -log2(r) for the second step, generated by SageMath with: // @@ -310,202 +313,202 @@ const LogRR LOG2_TABLE = { // r = 2^-16 * round( 2^16 / (1 + i*2^(-14)) ); // s, m, e = RealField(128)(r).log2().sign_mantissa_exponent(); // print("{Sign::NEG," if s == 1 else "{Sign::POS,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // format_hex(m), "},"); /* .step_2 = */ { - {Sign::NEG, -135, MType({0xb5cfed58337e848a, 0xb906155918954401})}, - {Sign::NEG, -135, MType({0xffaf2ac1b1d20910, 0xb6264958a3c7fa2b})}, - {Sign::NEG, -135, MType({0x52521a3950ea2ed8, 0xb34671e439aa448e})}, - {Sign::NEG, -135, MType({0xf87e1abdee10fd95, 0xb0668efb7ef48ab7})}, - {Sign::NEG, -135, MType({0xfbd43bbcc24c5e43, 0xad86a09e185af0e8})}, - {Sign::NEG, -135, MType({0x2f4f5d48f9796742, 0xaaa6a6cbaa8d57ce})}, - {Sign::NEG, -135, MType({0x3477fd67c1cab6b3, 0xa7c6a183da375c3d})}, - {Sign::NEG, -135, MType({0x7b4d33eb381fe558, 0xa4e690c64c0056f0})}, - {Sign::NEG, -135, MType({0x3ce25e48cb498dea, 0xa2067492a48b5c43})}, - {Sign::NEG, -135, MType({0x70b0fcc9e4330983, 0x9f264ce888773bed})}, - {Sign::NEG, -135, MType({0xbc9e4267d3189b22, 0x9c4619c79c5e80bf})}, - {Sign::NEG, -135, MType({0x5fb3d896326615c4, 0x9965db2f84d7705f})}, - {Sign::NEG, -135, MType({0x178b58311e96d323, 0x9685911fe6740b02})}, - {Sign::NEG, -135, MType({0x6bf8b6cf73d847, 0x93a53b9865c20b2a})}, - {Sign::NEG, -135, MType({0x7019f6e64a580a02, 0x90c4da98a74ae561})}, - {Sign::NEG, -135, MType({0xcb5733cf0eb4191d, 0x8de46e204f93c7f6})}, - {Sign::NEG, -135, MType({0x56148d4fc5e415b6, 0x8b03f62f031d9ab8})}, - {Sign::NEG, -135, MType({0xfe5370f425872623, 0x882372c46664feaf})}, - {Sign::NEG, -135, MType({0x21b72a1457ee70d6, 0x8542e3e01de24ddf})}, - {Sign::NEG, -135, MType({0xabff4f89968bed0b, 0x81aa211f1e332fcf})}, - {Sign::NEG, -136, MType({0x86410a676480a5a7, 0xfd92f0cf88d75f24})}, - {Sign::NEG, -136, MType({0x44280889021970e4, 0xf7d1886b2a876289})}, - {Sign::NEG, -136, MType({0x32eb139d9812090d, 0xf21009106a42bc14})}, - {Sign::NEG, -136, MType({0xbef9dd41e8e42810, 0xec4e72be90cd2d2d})}, - {Sign::NEG, -136, MType({0x689d08ca6c7c3eb1, 0xe68cc574e6e1e5d7})}, - {Sign::NEG, -136, MType({0x1ef259a7f69821d, 0xe0cb0132b5338423})}, - {Sign::NEG, -136, MType({0xe22cea71b7bb8467, 0xdb0925f7446c13a9})}, - {Sign::NEG, -136, MType({0xe5bb27303f542fe, 0xd54733c1dd2d0d04})}, - {Sign::NEG, -136, MType({0x57453c8d5dc64ce1, 0xcf852a91c80f553f})}, - {Sign::NEG, -136, MType({0x6cc7add1fc09ef92, 0xc9c30a664da33d56})}, - {Sign::NEG, -136, MType({0xe678d7280de1c07f, 0xc400d33eb67081a7})}, - {Sign::NEG, -136, MType({0x419bbeb2239bdc39, 0xbe3e851a4af6496d})}, - {Sign::NEG, -136, MType({0xd4676d1d81755809, 0xb87c1ff853ab2631})}, - {Sign::NEG, -136, MType({0xb69dfef7ac2e2890, 0xb2b9a3d818fd1349})}, - {Sign::NEG, -136, MType({0x9f72fa0a8fccabc0, 0xacf710b8e3517548})}, - {Sign::NEG, -136, MType({0xb8bfe6a3addb988e, 0xa7346699fb051978})}, - {Sign::NEG, -136, MType({0x67862c8ec9dcd60d, 0xa171a57aa86c3551})}, - {Sign::NEG, -136, MType({0x9bd3370909e28a6, 0x9baecd5a33d265ee})}, - {Sign::NEG, -136, MType({0xa96bc611b991419b, 0x95ebde37e57aaf84})}, - {Sign::NEG, -136, MType({0xa50bb80f203f0d62, 0x9028d813059f7cdc})}, - {Sign::NEG, -136, MType({0x4d36cd474f65a317, 0x8a65baeadc729ec5})}, - {Sign::NEG, -136, MType({0x779be241ef4874a3, 0x84a286beb21d4b8c})}, - {Sign::NEG, -137, MType({0xe76a962fa65ace3, 0xfdbe771b9d803cea})}, - {Sign::NEG, -137, MType({0xd3d35627464a5267, 0xf237b2aef4e62e5a})}, - {Sign::NEG, -137, MType({0x162ef4b0e838c363, 0xe6b0c035fa8b328c})}, - {Sign::NEG, -137, MType({0x77bb10b976b3b9ca, 0xdb299faf3e7cd74f})}, - {Sign::NEG, -137, MType({0x209853cee70bc58b, 0xcfa2511950b77014})}, - {Sign::NEG, -137, MType({0x63f9b57cbaf2e58d, 0xc41ad472c12614d3})}, - {Sign::NEG, -137, MType({0x4fca1c931bd6e6d6, 0xb89329ba1fa2a0fd})}, - {Sign::NEG, -137, MType({0x26d26e434a53490a, 0xad0b50edfbf5b265})}, - {Sign::NEG, -137, MType({0xc55e079078dc86a0, 0xa1834a0ce5d6a82d})}, - {Sign::NEG, -137, MType({0xf05b9d5bd28f540b, 0x95fb15156ceba1b5})}, - {Sign::NEG, -137, MType({0x8ef87f1a11cdb727, 0x8a72b20620c97d84})}, - {Sign::NEG, -138, MType({0x9d6870114c1183cf, 0xfdd441bb21e7b069})}, - {Sign::NEG, -138, MType({0x63d514fff97e86f3, 0xe6c2c33499ba16c4})}, - {Sign::NEG, -138, MType({0x11a381901eadd883, 0xcfb0e875c7cc5929})}, - {Sign::NEG, -138, MType({0xa9d69d37bc0a5bac, 0xb89eb17bcabe1857})}, - {Sign::NEG, -138, MType({0x2dc97c9ffefd2497, 0xa18c1e43c10c6898})}, - {Sign::NEG, -138, MType({0xdcdc8afcb2ac09a, 0x8a792ecac911cf92})}, - {Sign::NEG, -139, MType({0xdd454eb3a1489470, 0xe6cbc61c020c8446})}, - {Sign::NEG, -139, MType({0x878035864d84b319, 0xb8a476150dfe4470})}, - {Sign::NEG, -139, MType({0x7ce595cc53b8342c, 0x8a7c6d7af1de7942})}, - {Sign::NEG, -140, MType({0x4710b59049899141, 0xb8a7588fd29b1baa})}, - {Sign::NEG, -141, MType({0x5957f633309d74e3, 0xb8a8c9d8be9ae994})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -141, MType({0x8268aba030b1adf6, 0xb8abac81ab576f3b})}, - {Sign::POS, -140, MType({0x1511cba2fb213a10, 0xb8ad1de1ac9ea6a5})}, - {Sign::POS, -139, MType({0x6379fb9fd9bc6235, 0x8a82eb7708262500})}, - {Sign::POS, -139, MType({0xb6fe1bf601ee27d5, 0xb8b000b8c65957cc})}, - {Sign::POS, -139, MType({0x8c6e60693a14e6d0, 0xe6ddcebbd72d3f7f})}, - {Sign::POS, -138, MType({0xe9bcfd0c62eaa2ca, 0x8a862ac30095c084})}, - {Sign::POS, -138, MType({0x73b214209a5234a7, 0xa19dca8e85918b6d})}, - {Sign::POS, -138, MType({0x347d4ca3109fe4db, 0xb8b5c6c35e142a9b})}, - {Sign::POS, -138, MType({0x37a62c48783bb066, 0xcfce1f646dca7745})}, - {Sign::POS, -138, MType({0x794b6437fb56344, 0xe6e6d4749883fbe3})}, - {Sign::POS, -138, MType({0x1cb9a45ed90318e6, 0xfdffe5f6c232f658})}, - {Sign::POS, -137, MType({0xbc118e5dbbef7dbc, 0x8a8ca9f6e7762d0f})}, - {Sign::POS, -137, MType({0xb4c0fb9535907cf8, 0x96198f2e5173e93b})}, - {Sign::POS, -137, MType({0xc051d2c5f00a9bb9, 0xa1a6a2a3113fe246})}, - {Sign::POS, -137, MType({0x553269878c1e5110, 0xad33e4569918a8d5})}, - {Sign::POS, -137, MType({0xbc906750b0ce372c, 0xb8c1544a5b4e2caf})}, - {Sign::POS, -137, MType({0x4c50eaa63be294b6, 0xc44ef27fca41bdd8})}, - {Sign::POS, -137, MType({0xb6cb28db8c065b44, 0xcfdcbef858660da1})}, - {Sign::POS, -137, MType({0x70479336830ceb05, 0xdb6ab9b5783f2fc5})}, - {Sign::POS, -137, MType({0x2a458c831f6aeb49, 0xe6f8e2b89c629b7a})}, - {Sign::POS, -137, MType({0x6489ba5bd391e206, 0xf2873a0337772c8a})}, - {Sign::POS, -137, MType({0x13f6fda510aeec3b, 0xfe15bf96bc35246b})}, - {Sign::POS, -136, MType({0x2f9a0ef9e8250836, 0x84d239ba4eb315a9})}, - {Sign::POS, -136, MType({0x389019e822b70f1e, 0x8a99aacf26f2a8a7})}, - {Sign::POS, -136, MType({0x308beeffa12cf669, 0x9061330aa04f87ae})}, - {Sign::POS, -136, MType({0x9886a71b25a2085d, 0x9628d26d7448a43f})}, - {Sign::POS, -136, MType({0x70ba9cebe0b969c3, 0x9bf088f85c65a56b})}, - {Sign::POS, -136, MType({0xcd855dc705ea2bea, 0xa1b856ac1236e85b})}, - {Sign::POS, -136, MType({0x7736196b11afb331, 0xa7803b894f5580e0})}, - {Sign::POS, -136, MType({0x94c99761b8eab3d8, 0xad483790cd6339fa})}, - {Sign::POS, -136, MType({0x6194b8c040814736, 0xb3104ac3460a9668})}, - {Sign::POS, -136, MType({0xedde8d24c7a999cc, 0xb8d8752172fed130})}, - {Sign::POS, -136, MType({0xea6b01ebde42f1d0, 0xbea0b6ac0dfbde2f})}, - {Sign::POS, -136, MType({0x7ef732b69334cf50, 0xc4690f63d0c66aa1})}, - {Sign::POS, -136, MType({0x2ba86275fcfc2d72, 0xca317f49752bddae})}, - {Sign::POS, -136, MType({0xb56ea44e185bf99f, 0xcffa065db50258f6})}, - {Sign::POS, -136, MType({0x1d5c3bbeb6902bfe, 0xd5c2a4a14a28b920})}, - {Sign::POS, -136, MType({0xa2f2bb9e156b0f37, 0xdb8b5a14ee86965f})}, - {Sign::POS, -136, MType({0xd166eb8da06ab5ef, 0xe15426b95c0c4506})}, - {Sign::POS, -136, MType({0x97dc7bae4219de0f, 0xe71d0a8f4cb2d60f})}, - {Sign::POS, -136, MType({0x6c9a8e7698f416c4, 0xece605977a7c17a8})}, - {Sign::POS, -136, MType({0x7b3a20aa5289695e, 0xf2af17d29f7295c0})}, - {Sign::POS, -136, MType({0xddcf578ee2c2897b, 0xf878414175a99a93})}, - {Sign::POS, -136, MType({0xe10ebd96c3ec30ec, 0xfe4181e4b73d2f37})}, - {Sign::POS, -135, MType({0xa9b7baecb34ba577, 0x82056cde8f290e13})}, - {Sign::POS, -135, MType({0x2da910dc61c182da, 0x8430f56d5e1edfd1})}, - {Sign::POS, -135, MType({0xfaca09dc7e0ba8b5, 0x8715b5a8f27bed90})}, - {Sign::POS, -135, MType({0xd723876173c0947, 0x89fa818019a2cace})}, - {Sign::POS, -135, MType({0x4e6651df154e8f8c, 0x8cdf58f330b64515})}, - {Sign::POS, -135, MType({0xee54b77d3bc34b6d, 0x8fc43c0294dd8af3})}, - {Sign::POS, -135, MType({0xad07dde9b5f92cce, 0x92a92aaea3442c3d})}, - {Sign::POS, -135, MType({0x261aacf944b638f0, 0x958e24f7b91a1a53})}, - {Sign::POS, -135, MType({0x232f5d64a85b219d, 0x98732ade3393a868})}, - {Sign::POS, -135, MType({0xf3a958bb706093fc, 0x9b583c626fe98bc9})}, - {Sign::POS, -135, MType({0xc9eaa059e7b0333a, 0x9e3d5984cb58dc25})}, - {Sign::POS, -135, MType({0x1e154029663243c0, 0xa1228245a32313cf})}, - {Sign::POS, -135, MType({0x16515200e283d006, 0xa407b6a5548e1006})}, - {Sign::POS, -135, MType({0xf498168a3337ca4f, 0xa6ecf6a43ce4113d})}, - {Sign::POS, -135, MType({0x8a04a89f0548a10f, 0xa9d24242b973bb63})}, - {Sign::POS, -135, MType({0xafaad01f25772805, 0xacb7998127901623})}, - {Sign::POS, -135, MType({0xc4f47950543fe0b8, 0xaf9cfc5fe4908d31})}, - {Sign::POS, -135, MType({0x338655e677d0d3ec, 0xb2826adf4dd0f08e})}, - {Sign::POS, -135, MType({0xf8ac2ce19d009541, 0xb567e4ffc0b174cc})}, - {Sign::POS, -135, MType({0x344d5e7dd7b2f465, 0xb84d6ac19a96b35c})}, - {Sign::POS, -135, MType({0xbd6a217fb4598ec7, 0xbb32fc2538e9aaca})}, - {Sign::POS, -135, MType({0xbc21ff368f562b75, 0xbe18992af917bf0e})}, - {Sign::POS, -135, MType({0x4944139ccbf2cb9a, 0xc0fe41d33892b9cc})}, - {Sign::POS, -135, MType({0x1369970c8b67e6b5, 0xc3e3f61e54d0ca9c})}, - {Sign::POS, -135, MType({0x99b370e2d04a530, 0xc6c9b60cab4c8752})}, - {Sign::POS, -135, MType({0xb81c3d48aff589f, 0xc9af819e9984ec44})}, - {Sign::POS, -135, MType({0x9f22b80993be311b, 0xcc9558d47cfd5c90})}, - {Sign::POS, -135, MType({0xac29209c8d8985ae, 0xcf7b3baeb33da265})}, - {Sign::POS, -135, MType({0x3cbb6a520292351d, 0xd2612a2d99d1ef47})}, - {Sign::POS, -135, MType({0x43de9ae40507ef24, 0xd54724518e4adc56})}, - {Sign::POS, -135, MType({0x69677b902ea4df3a, 0xd82d2a1aee3d6a97})}, - {Sign::POS, -135, MType({0xdb7a3aff74967bd5, 0xdb133b8a17430339})}, - {Sign::POS, -135, MType({0x25990c82a0066ac6, 0xddf9589f66f977de})}, - {Sign::POS, -135, MType({0xd424aacf4babf55, 0xe0df815b3b0302dd})}, - {Sign::POS, -135, MType({0xf8e3e7eb5a7bdebb, 0xe30c278d9936c595})}, - {Sign::POS, -135, MType({0x5ef8bf5adf5deebe, 0xe5f264adb62d5810})}, - {Sign::POS, -135, MType({0x331d19965368fc82, 0xe8d8ad75590bdf92})}, - {Sign::POS, -135, MType({0x901c30c427e358b8, 0xebbf01e4df85219e})}, - {Sign::POS, -135, MType({0xaeac7e9857253b06, 0xeea561fca7504dc1})}, - {Sign::POS, -135, MType({0xe2113e5893ab5b40, 0xf18bcdbd0e28fdd7})}, - {Sign::POS, -135, MType({0x9a4efc80ae977826, 0xf472452671cf3654})}, - {Sign::POS, -135, MType({0x6bf3ba8319332c9f, 0xf758c83930076689})}, - {Sign::POS, -135, MType({0x1d732d302e75018b, 0xfa3f56f5a69a68ed})}, - {Sign::POS, -135, MType({0xba179c5dbcceec01, 0xfd25f15c33558362})}, - {Sign::POS, -134, MType({0x5543f53b8ad85039, 0x80064bb69a0533c0})}, - {Sign::POS, -134, MType({0xe971a5565b93cb67, 0x8179a4948347996b})}, - {Sign::POS, -134, MType({0x5b399644ba714691, 0x82ed0348045f379d})}, - {Sign::POS, -134, MType({0x5079f1e0ec4b8496, 0x846067d14c3b8982})}, - {Sign::POS, -134, MType({0x6aba4990a32e8873, 0x85d3d23089ce40b0})}, - {Sign::POS, -134, MType({0xe16770c3a404291c, 0x87474265ec0b4548})}, - {Sign::POS, -134, MType({0x1edb7ffb1d6b3eab, 0x88bab871a1e8b61c})}, - {Sign::POS, -134, MType({0x603243e1ba7c7865, 0x8a2e3453da5ee8cd})}, - {Sign::POS, -134, MType({0x57ea5c03ea4621dd, 0x8ba1b60cc46869f6})}, - {Sign::POS, -134, MType({0xd3534cbf43bd7fd8, 0x8d153d9c8f01fd4a})}, - {Sign::POS, -134, MType({0x62c8c8075dc91cd5, 0x8e88cb03692a9dbc})}, - {Sign::POS, -134, MType({0x4bb70a5e3db7b85, 0x8ffc5e4181e37d9e})}, - {Sign::POS, -134, MType({0xd3875ba32159547a, 0x916ff757083006c7})}, - {Sign::POS, -134, MType({0x5c94c80e7a8f66b1, 0x9286adfca91ba28d})}, - {Sign::POS, -134, MType({0x52d313c47b4f91db, 0x93fa514ba0517623})}, - {Sign::POS, -134, MType({0x80829e9f3957a4c3, 0x956dfa72866fc57d})}, - {Sign::POS, -134, MType({0x1cd4917972015ae7, 0x96e1a9718a824be5})}, - {Sign::POS, -134, MType({0x1af23c29ef3032da, 0x98555e48db96fcd2})}, - {Sign::POS, -134, MType({0xe7f7bf240be67b80, 0x99c918f8a8be040e})}, - {Sign::POS, -134, MType({0x2bbe3cd4f7d868fa, 0x9b3cd9812109c5dc})}, - {Sign::POS, -134, MType({0x8c75d6a4c5ae460d, 0x9cb09fe2738edf14})}, - {Sign::POS, -134, MType({0x750fb989c9a06186, 0x9e246c1ccf642550})}, - {Sign::POS, -134, MType({0xde787e244901bdf9, 0x9f983e3063a2a709})}, - {Sign::POS, -134, MType({0x1ba3205ff729efa4, 0xa10c161d5f65abc0})}, - {Sign::POS, -134, MType({0xa864d2a038fb19cd, 0xa27ff3e3f1cab41b})}, - {Sign::POS, -134, MType({0xfb21f083a5fec56d, 0xa3f3d78449f17a11})}, - {Sign::POS, -134, MType({0x594c5552bcc377f5, 0xa567c0fe96fbf109})}, - {Sign::POS, -134, MType({0xaeb35a353fc5a503, 0xa6dbb053080e45fc})}, - {Sign::POS, -134, MType({0x67a5c05130c0f330, 0xa84fa581cc4edf9f})}, - {Sign::POS, -134, MType({0x4de5cafde1caf46f, 0xa9c3a08b12e65e81})}, - {Sign::POS, -134, MType({0x686fce3d160e88fd, 0xab37a16f0aff9d32})}, - {Sign::POS, -134, MType({0xde1375b3af6749a6, 0xacaba82de3c7b066})}, - {Sign::POS, -134, MType({0x243569048ac4affe, 0xadc2b114c632da56})}, - {Sign::POS, -134, MType({0xd6796227dcd39551, 0xaf36c21319b80ea2})}, - {Sign::POS, -134, MType({0xabc9265386172074, 0xb0aad8eccfb38d51})}, - {Sign::POS, -134, MType({0xcaac9f17896f2ce, 0xb21ef5a2175ac65e})}, - {Sign::POS, -134, MType({0x1c65a3c7f828972b, 0xb39318331fe56492})}, - {Sign::POS, -134, MType({0xabdc66446a4286d9, 0xb50740a0188d4daa})}, - {Sign::POS, -134, MType({0x2f3bbe8e8d72abec, 0xb67b6ee9308ea27b})}, - {Sign::POS, -134, MType({0xb67dbdd7f03d168c, 0xb7efa30e9727bf11})}, + {Sign::NEG, -135, 0xb9061559'18954401'b5cfed58'337e848a_u128}, + {Sign::NEG, -135, 0xb6264958'a3c7fa2b'ffaf2ac1'b1d20910_u128}, + {Sign::NEG, -135, 0xb34671e4'39aa448e'52521a39'50ea2ed8_u128}, + {Sign::NEG, -135, 0xb0668efb'7ef48ab7'f87e1abd'ee10fd95_u128}, + {Sign::NEG, -135, 0xad86a09e'185af0e8'fbd43bbc'c24c5e43_u128}, + {Sign::NEG, -135, 0xaaa6a6cb'aa8d57ce'2f4f5d48'f9796742_u128}, + {Sign::NEG, -135, 0xa7c6a183'da375c3d'3477fd67'c1cab6b3_u128}, + {Sign::NEG, -135, 0xa4e690c6'4c0056f0'7b4d33eb'381fe558_u128}, + {Sign::NEG, -135, 0xa2067492'a48b5c43'3ce25e48'cb498dea_u128}, + {Sign::NEG, -135, 0x9f264ce8'88773bed'70b0fcc9'e4330983_u128}, + {Sign::NEG, -135, 0x9c4619c7'9c5e80bf'bc9e4267'd3189b22_u128}, + {Sign::NEG, -135, 0x9965db2f'84d7705f'5fb3d896'326615c4_u128}, + {Sign::NEG, -135, 0x9685911f'e6740b02'178b5831'1e96d323_u128}, + {Sign::NEG, -135, 0x93a53b98'65c20b2a'006bf8b6'cf73d847_u128}, + {Sign::NEG, -135, 0x90c4da98'a74ae561'7019f6e6'4a580a02_u128}, + {Sign::NEG, -135, 0x8de46e20'4f93c7f6'cb5733cf'0eb4191d_u128}, + {Sign::NEG, -135, 0x8b03f62f'031d9ab8'56148d4f'c5e415b6_u128}, + {Sign::NEG, -135, 0x882372c4'6664feaf'fe5370f4'25872623_u128}, + {Sign::NEG, -135, 0x8542e3e0'1de24ddf'21b72a14'57ee70d6_u128}, + {Sign::NEG, -135, 0x81aa211f'1e332fcf'abff4f89'968bed0b_u128}, + {Sign::NEG, -136, 0xfd92f0cf'88d75f24'86410a67'6480a5a7_u128}, + {Sign::NEG, -136, 0xf7d1886b'2a876289'44280889'021970e4_u128}, + {Sign::NEG, -136, 0xf2100910'6a42bc14'32eb139d'9812090d_u128}, + {Sign::NEG, -136, 0xec4e72be'90cd2d2d'bef9dd41'e8e42810_u128}, + {Sign::NEG, -136, 0xe68cc574'e6e1e5d7'689d08ca'6c7c3eb1_u128}, + {Sign::NEG, -136, 0xe0cb0132'b5338423'01ef259a'7f69821d_u128}, + {Sign::NEG, -136, 0xdb0925f7'446c13a9'e22cea71'b7bb8467_u128}, + {Sign::NEG, -136, 0xd54733c1'dd2d0d04'0e5bb273'03f542fe_u128}, + {Sign::NEG, -136, 0xcf852a91'c80f553f'57453c8d'5dc64ce1_u128}, + {Sign::NEG, -136, 0xc9c30a66'4da33d56'6cc7add1'fc09ef92_u128}, + {Sign::NEG, -136, 0xc400d33e'b67081a7'e678d728'0de1c07f_u128}, + {Sign::NEG, -136, 0xbe3e851a'4af6496d'419bbeb2'239bdc39_u128}, + {Sign::NEG, -136, 0xb87c1ff8'53ab2631'd4676d1d'81755809_u128}, + {Sign::NEG, -136, 0xb2b9a3d8'18fd1349'b69dfef7'ac2e2890_u128}, + {Sign::NEG, -136, 0xacf710b8'e3517548'9f72fa0a'8fccabc0_u128}, + {Sign::NEG, -136, 0xa7346699'fb051978'b8bfe6a3'addb988e_u128}, + {Sign::NEG, -136, 0xa171a57a'a86c3551'67862c8e'c9dcd60d_u128}, + {Sign::NEG, -136, 0x9baecd5a'33d265ee'09bd3370'909e28a6_u128}, + {Sign::NEG, -136, 0x95ebde37'e57aaf84'a96bc611'b991419b_u128}, + {Sign::NEG, -136, 0x9028d813'059f7cdc'a50bb80f'203f0d62_u128}, + {Sign::NEG, -136, 0x8a65baea'dc729ec5'4d36cd47'4f65a317_u128}, + {Sign::NEG, -136, 0x84a286be'b21d4b8c'779be241'ef4874a3_u128}, + {Sign::NEG, -137, 0xfdbe771b'9d803cea'0e76a962'fa65ace3_u128}, + {Sign::NEG, -137, 0xf237b2ae'f4e62e5a'd3d35627'464a5267_u128}, + {Sign::NEG, -137, 0xe6b0c035'fa8b328c'162ef4b0'e838c363_u128}, + {Sign::NEG, -137, 0xdb299faf'3e7cd74f'77bb10b9'76b3b9ca_u128}, + {Sign::NEG, -137, 0xcfa25119'50b77014'209853ce'e70bc58b_u128}, + {Sign::NEG, -137, 0xc41ad472'c12614d3'63f9b57c'baf2e58d_u128}, + {Sign::NEG, -137, 0xb89329ba'1fa2a0fd'4fca1c93'1bd6e6d6_u128}, + {Sign::NEG, -137, 0xad0b50ed'fbf5b265'26d26e43'4a53490a_u128}, + {Sign::NEG, -137, 0xa1834a0c'e5d6a82d'c55e0790'78dc86a0_u128}, + {Sign::NEG, -137, 0x95fb1515'6ceba1b5'f05b9d5b'd28f540b_u128}, + {Sign::NEG, -137, 0x8a72b206'20c97d84'8ef87f1a'11cdb727_u128}, + {Sign::NEG, -138, 0xfdd441bb'21e7b069'9d687011'4c1183cf_u128}, + {Sign::NEG, -138, 0xe6c2c334'99ba16c4'63d514ff'f97e86f3_u128}, + {Sign::NEG, -138, 0xcfb0e875'c7cc5929'11a38190'1eadd883_u128}, + {Sign::NEG, -138, 0xb89eb17b'cabe1857'a9d69d37'bc0a5bac_u128}, + {Sign::NEG, -138, 0xa18c1e43'c10c6898'2dc97c9f'fefd2497_u128}, + {Sign::NEG, -138, 0x8a792eca'c911cf92'0dcdc8af'cb2ac09a_u128}, + {Sign::NEG, -139, 0xe6cbc61c'020c8446'dd454eb3'a1489470_u128}, + {Sign::NEG, -139, 0xb8a47615'0dfe4470'87803586'4d84b319_u128}, + {Sign::NEG, -139, 0x8a7c6d7a'f1de7942'7ce595cc'53b8342c_u128}, + {Sign::NEG, -140, 0xb8a7588f'd29b1baa'4710b590'49899141_u128}, + {Sign::NEG, -141, 0xb8a8c9d8'be9ae994'5957f633'309d74e3_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -141, 0xb8abac81'ab576f3b'8268aba0'30b1adf6_u128}, + {Sign::POS, -140, 0xb8ad1de1'ac9ea6a5'1511cba2'fb213a10_u128}, + {Sign::POS, -139, 0x8a82eb77'08262500'6379fb9f'd9bc6235_u128}, + {Sign::POS, -139, 0xb8b000b8'c65957cc'b6fe1bf6'01ee27d5_u128}, + {Sign::POS, -139, 0xe6ddcebb'd72d3f7f'8c6e6069'3a14e6d0_u128}, + {Sign::POS, -138, 0x8a862ac3'0095c084'e9bcfd0c'62eaa2ca_u128}, + {Sign::POS, -138, 0xa19dca8e'85918b6d'73b21420'9a5234a7_u128}, + {Sign::POS, -138, 0xb8b5c6c3'5e142a9b'347d4ca3'109fe4db_u128}, + {Sign::POS, -138, 0xcfce1f64'6dca7745'37a62c48'783bb066_u128}, + {Sign::POS, -138, 0xe6e6d474'9883fbe3'0794b643'7fb56344_u128}, + {Sign::POS, -138, 0xfdffe5f6'c232f658'1cb9a45e'd90318e6_u128}, + {Sign::POS, -137, 0x8a8ca9f6'e7762d0f'bc118e5d'bbef7dbc_u128}, + {Sign::POS, -137, 0x96198f2e'5173e93b'b4c0fb95'35907cf8_u128}, + {Sign::POS, -137, 0xa1a6a2a3'113fe246'c051d2c5'f00a9bb9_u128}, + {Sign::POS, -137, 0xad33e456'9918a8d5'55326987'8c1e5110_u128}, + {Sign::POS, -137, 0xb8c1544a'5b4e2caf'bc906750'b0ce372c_u128}, + {Sign::POS, -137, 0xc44ef27f'ca41bdd8'4c50eaa6'3be294b6_u128}, + {Sign::POS, -137, 0xcfdcbef8'58660da1'b6cb28db'8c065b44_u128}, + {Sign::POS, -137, 0xdb6ab9b5'783f2fc5'70479336'830ceb05_u128}, + {Sign::POS, -137, 0xe6f8e2b8'9c629b7a'2a458c83'1f6aeb49_u128}, + {Sign::POS, -137, 0xf2873a03'37772c8a'6489ba5b'd391e206_u128}, + {Sign::POS, -137, 0xfe15bf96'bc35246b'13f6fda5'10aeec3b_u128}, + {Sign::POS, -136, 0x84d239ba'4eb315a9'2f9a0ef9'e8250836_u128}, + {Sign::POS, -136, 0x8a99aacf'26f2a8a7'389019e8'22b70f1e_u128}, + {Sign::POS, -136, 0x9061330a'a04f87ae'308beeff'a12cf669_u128}, + {Sign::POS, -136, 0x9628d26d'7448a43f'9886a71b'25a2085d_u128}, + {Sign::POS, -136, 0x9bf088f8'5c65a56b'70ba9ceb'e0b969c3_u128}, + {Sign::POS, -136, 0xa1b856ac'1236e85b'cd855dc7'05ea2bea_u128}, + {Sign::POS, -136, 0xa7803b89'4f5580e0'7736196b'11afb331_u128}, + {Sign::POS, -136, 0xad483790'cd6339fa'94c99761'b8eab3d8_u128}, + {Sign::POS, -136, 0xb3104ac3'460a9668'6194b8c0'40814736_u128}, + {Sign::POS, -136, 0xb8d87521'72fed130'edde8d24'c7a999cc_u128}, + {Sign::POS, -136, 0xbea0b6ac'0dfbde2f'ea6b01eb'de42f1d0_u128}, + {Sign::POS, -136, 0xc4690f63'd0c66aa1'7ef732b6'9334cf50_u128}, + {Sign::POS, -136, 0xca317f49'752bddae'2ba86275'fcfc2d72_u128}, + {Sign::POS, -136, 0xcffa065d'b50258f6'b56ea44e'185bf99f_u128}, + {Sign::POS, -136, 0xd5c2a4a1'4a28b920'1d5c3bbe'b6902bfe_u128}, + {Sign::POS, -136, 0xdb8b5a14'ee86965f'a2f2bb9e'156b0f37_u128}, + {Sign::POS, -136, 0xe15426b9'5c0c4506'd166eb8d'a06ab5ef_u128}, + {Sign::POS, -136, 0xe71d0a8f'4cb2d60f'97dc7bae'4219de0f_u128}, + {Sign::POS, -136, 0xece60597'7a7c17a8'6c9a8e76'98f416c4_u128}, + {Sign::POS, -136, 0xf2af17d2'9f7295c0'7b3a20aa'5289695e_u128}, + {Sign::POS, -136, 0xf8784141'75a99a93'ddcf578e'e2c2897b_u128}, + {Sign::POS, -136, 0xfe4181e4'b73d2f37'e10ebd96'c3ec30ec_u128}, + {Sign::POS, -135, 0x82056cde'8f290e13'a9b7baec'b34ba577_u128}, + {Sign::POS, -135, 0x8430f56d'5e1edfd1'2da910dc'61c182da_u128}, + {Sign::POS, -135, 0x8715b5a8'f27bed90'faca09dc'7e0ba8b5_u128}, + {Sign::POS, -135, 0x89fa8180'19a2cace'0d723876'173c0947_u128}, + {Sign::POS, -135, 0x8cdf58f3'30b64515'4e6651df'154e8f8c_u128}, + {Sign::POS, -135, 0x8fc43c02'94dd8af3'ee54b77d'3bc34b6d_u128}, + {Sign::POS, -135, 0x92a92aae'a3442c3d'ad07dde9'b5f92cce_u128}, + {Sign::POS, -135, 0x958e24f7'b91a1a53'261aacf9'44b638f0_u128}, + {Sign::POS, -135, 0x98732ade'3393a868'232f5d64'a85b219d_u128}, + {Sign::POS, -135, 0x9b583c62'6fe98bc9'f3a958bb'706093fc_u128}, + {Sign::POS, -135, 0x9e3d5984'cb58dc25'c9eaa059'e7b0333a_u128}, + {Sign::POS, -135, 0xa1228245'a32313cf'1e154029'663243c0_u128}, + {Sign::POS, -135, 0xa407b6a5'548e1006'16515200'e283d006_u128}, + {Sign::POS, -135, 0xa6ecf6a4'3ce4113d'f498168a'3337ca4f_u128}, + {Sign::POS, -135, 0xa9d24242'b973bb63'8a04a89f'0548a10f_u128}, + {Sign::POS, -135, 0xacb79981'27901623'afaad01f'25772805_u128}, + {Sign::POS, -135, 0xaf9cfc5f'e4908d31'c4f47950'543fe0b8_u128}, + {Sign::POS, -135, 0xb2826adf'4dd0f08e'338655e6'77d0d3ec_u128}, + {Sign::POS, -135, 0xb567e4ff'c0b174cc'f8ac2ce1'9d009541_u128}, + {Sign::POS, -135, 0xb84d6ac1'9a96b35c'344d5e7d'd7b2f465_u128}, + {Sign::POS, -135, 0xbb32fc25'38e9aaca'bd6a217f'b4598ec7_u128}, + {Sign::POS, -135, 0xbe18992a'f917bf0e'bc21ff36'8f562b75_u128}, + {Sign::POS, -135, 0xc0fe41d3'3892b9cc'4944139c'cbf2cb9a_u128}, + {Sign::POS, -135, 0xc3e3f61e'54d0ca9c'1369970c'8b67e6b5_u128}, + {Sign::POS, -135, 0xc6c9b60c'ab4c8752'099b370e'2d04a530_u128}, + {Sign::POS, -135, 0xc9af819e'9984ec44'0b81c3d4'8aff589f_u128}, + {Sign::POS, -135, 0xcc9558d4'7cfd5c90'9f22b809'93be311b_u128}, + {Sign::POS, -135, 0xcf7b3bae'b33da265'ac29209c'8d8985ae_u128}, + {Sign::POS, -135, 0xd2612a2d'99d1ef47'3cbb6a52'0292351d_u128}, + {Sign::POS, -135, 0xd5472451'8e4adc56'43de9ae4'0507ef24_u128}, + {Sign::POS, -135, 0xd82d2a1a'ee3d6a97'69677b90'2ea4df3a_u128}, + {Sign::POS, -135, 0xdb133b8a'17430339'db7a3aff'74967bd5_u128}, + {Sign::POS, -135, 0xddf9589f'66f977de'25990c82'a0066ac6_u128}, + {Sign::POS, -135, 0xe0df815b'3b0302dd'0d424aac'f4babf55_u128}, + {Sign::POS, -135, 0xe30c278d'9936c595'f8e3e7eb'5a7bdebb_u128}, + {Sign::POS, -135, 0xe5f264ad'b62d5810'5ef8bf5a'df5deebe_u128}, + {Sign::POS, -135, 0xe8d8ad75'590bdf92'331d1996'5368fc82_u128}, + {Sign::POS, -135, 0xebbf01e4'df85219e'901c30c4'27e358b8_u128}, + {Sign::POS, -135, 0xeea561fc'a7504dc1'aeac7e98'57253b06_u128}, + {Sign::POS, -135, 0xf18bcdbd'0e28fdd7'e2113e58'93ab5b40_u128}, + {Sign::POS, -135, 0xf4724526'71cf3654'9a4efc80'ae977826_u128}, + {Sign::POS, -135, 0xf758c839'30076689'6bf3ba83'19332c9f_u128}, + {Sign::POS, -135, 0xfa3f56f5'a69a68ed'1d732d30'2e75018b_u128}, + {Sign::POS, -135, 0xfd25f15c'33558362'ba179c5d'bcceec01_u128}, + {Sign::POS, -134, 0x80064bb6'9a0533c0'5543f53b'8ad85039_u128}, + {Sign::POS, -134, 0x8179a494'8347996b'e971a556'5b93cb67_u128}, + {Sign::POS, -134, 0x82ed0348'045f379d'5b399644'ba714691_u128}, + {Sign::POS, -134, 0x846067d1'4c3b8982'5079f1e0'ec4b8496_u128}, + {Sign::POS, -134, 0x85d3d230'89ce40b0'6aba4990'a32e8873_u128}, + {Sign::POS, -134, 0x87474265'ec0b4548'e16770c3'a404291c_u128}, + {Sign::POS, -134, 0x88bab871'a1e8b61c'1edb7ffb'1d6b3eab_u128}, + {Sign::POS, -134, 0x8a2e3453'da5ee8cd'603243e1'ba7c7865_u128}, + {Sign::POS, -134, 0x8ba1b60c'c46869f6'57ea5c03'ea4621dd_u128}, + {Sign::POS, -134, 0x8d153d9c'8f01fd4a'd3534cbf'43bd7fd8_u128}, + {Sign::POS, -134, 0x8e88cb03'692a9dbc'62c8c807'5dc91cd5_u128}, + {Sign::POS, -134, 0x8ffc5e41'81e37d9e'04bb70a5'e3db7b85_u128}, + {Sign::POS, -134, 0x916ff757'083006c7'd3875ba3'2159547a_u128}, + {Sign::POS, -134, 0x9286adfc'a91ba28d'5c94c80e'7a8f66b1_u128}, + {Sign::POS, -134, 0x93fa514b'a0517623'52d313c4'7b4f91db_u128}, + {Sign::POS, -134, 0x956dfa72'866fc57d'80829e9f'3957a4c3_u128}, + {Sign::POS, -134, 0x96e1a971'8a824be5'1cd49179'72015ae7_u128}, + {Sign::POS, -134, 0x98555e48'db96fcd2'1af23c29'ef3032da_u128}, + {Sign::POS, -134, 0x99c918f8'a8be040e'e7f7bf24'0be67b80_u128}, + {Sign::POS, -134, 0x9b3cd981'2109c5dc'2bbe3cd4'f7d868fa_u128}, + {Sign::POS, -134, 0x9cb09fe2'738edf14'8c75d6a4'c5ae460d_u128}, + {Sign::POS, -134, 0x9e246c1c'cf642550'750fb989'c9a06186_u128}, + {Sign::POS, -134, 0x9f983e30'63a2a709'de787e24'4901bdf9_u128}, + {Sign::POS, -134, 0xa10c161d'5f65abc0'1ba3205f'f729efa4_u128}, + {Sign::POS, -134, 0xa27ff3e3'f1cab41b'a864d2a0'38fb19cd_u128}, + {Sign::POS, -134, 0xa3f3d784'49f17a11'fb21f083'a5fec56d_u128}, + {Sign::POS, -134, 0xa567c0fe'96fbf109'594c5552'bcc377f5_u128}, + {Sign::POS, -134, 0xa6dbb053'080e45fc'aeb35a35'3fc5a503_u128}, + {Sign::POS, -134, 0xa84fa581'cc4edf9f'67a5c051'30c0f330_u128}, + {Sign::POS, -134, 0xa9c3a08b'12e65e81'4de5cafd'e1caf46f_u128}, + {Sign::POS, -134, 0xab37a16f'0aff9d32'686fce3d'160e88fd_u128}, + {Sign::POS, -134, 0xacaba82d'e3c7b066'de1375b3'af6749a6_u128}, + {Sign::POS, -134, 0xadc2b114'c632da56'24356904'8ac4affe_u128}, + {Sign::POS, -134, 0xaf36c213'19b80ea2'd6796227'dcd39551_u128}, + {Sign::POS, -134, 0xb0aad8ec'cfb38d51'abc92653'86172074_u128}, + {Sign::POS, -134, 0xb21ef5a2'175ac65e'0caac9f1'7896f2ce_u128}, + {Sign::POS, -134, 0xb3931833'1fe56492'1c65a3c7'f828972b_u128}, + {Sign::POS, -134, 0xb50740a0'188d4daa'abdc6644'6a4286d9_u128}, + {Sign::POS, -134, 0xb67b6ee9'308ea27b'2f3bbe8e'8d72abec_u128}, + {Sign::POS, -134, 0xb7efa30e'9727bf11'b67dbdd7'f03d168c_u128}, }, // -log2(r) for the third step, generated by SageMath with: // @@ -513,170 +516,170 @@ const LogRR LOG2_TABLE = { // r = 2^-21 * round( 2^21 / (1 + i*2^(-21)) ); // s, m, e = RealField(128)(r).log2().sign_mantissa_exponent(); // print("{Sign::NEG," if (s == 1) else "{Sign::POS,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // format_hex(m), "},"); /* .step_3 = */ { - {Sign::NEG, -142, MType({0x26f2c63c0827ccbb, 0xe6d3a96b978fc16e})}, - {Sign::NEG, -142, MType({0x4b56fe667c8ec091, 0xe3f107a9fbfc50ca})}, - {Sign::NEG, -142, MType({0x647d76181aec10fc, 0xe10e65d14b937265})}, - {Sign::NEG, -142, MType({0x99e8f4d5379eca79, 0xde2bc3e18653b4f5})}, - {Sign::NEG, -142, MType({0xf07da89990c20623, 0xdb4921daac3ba730})}, - {Sign::NEG, -142, MType({0x4a8121848531851a, 0xd8667fbcbd49d7cd})}, - {Sign::NEG, -142, MType({0x679a4d854ae13619, 0xd583dd87b97cd580})}, - {Sign::NEG, -142, MType({0xe4d174072487a514, 0xd2a13b3ba0d32eff})}, - {Sign::NEG, -142, MType({0x3c90319d969b54be, 0xcfbe98d8734b7301})}, - {Sign::NEG, -142, MType({0xc6a173b09ba301e6, 0xccdbf65e30e43039})}, - {Sign::NEG, -142, MType({0xb8317428d7d8d06b, 0xc9f953ccd99bf55e})}, - {Sign::NEG, -142, MType({0x23cdb51bcc2061cd, 0xc716b1246d715125})}, - {Sign::NEG, -142, MType({0xf964fc78084fd515, 0xc4340e64ec62d241})}, - {Sign::NEG, -142, MType({0x6474fb15ccbb015, 0xc1516b8e566f076a})}, - {Sign::NEG, -142, MType({0xf525ef6d0b75b1c3, 0xbe6ec8a0ab947f51})}, - {Sign::NEG, -142, MType({0x4e13532df7ee8da7, 0xbb8c259bebd1c8ae})}, - {Sign::NEG, -142, MType({0x76832500d72a9027, 0xb8a9828017257233})}, - {Sign::NEG, -142, MType({0xb14a3d285e592ba0, 0xb5c6df4d2d8e0a95})}, - {Sign::NEG, -142, MType({0x1e9e9dc9711f6e20, 0xb2e43c032f0a2089})}, - {Sign::NEG, -142, MType({0xbc176e974f255fac, 0xb00198a21b9842c1})}, - {Sign::NEG, -142, MType({0x64acf87fc0f648e6, 0xad1ef529f336fff3})}, - {Sign::NEG, -142, MType({0xd0b8a1574433e1f8, 0xaa3c519ab5e4e6d1})}, - {Sign::NEG, -142, MType({0x95f4e785371c69a9, 0xa759adf463a08610})}, - {Sign::NEG, -142, MType({0x277d5db00363a46f, 0xa4770a36fc686c63})}, - {Sign::NEG, -142, MType({0xd5cea669485ec36c, 0xa1946662803b287c})}, - {Sign::NEG, -142, MType({0xcec66fda04833322, 0x9eb1c276ef174910})}, - {Sign::NEG, -142, MType({0x1da36f6ebe3851db, 0x9bcf1e7448fb5cd2})}, - {Sign::NEG, -142, MType({0xab055d83abfc0d82, 0x98ec7a5a8de5f273})}, - {Sign::NEG, -142, MType({0x3cecf110dbda68e9, 0x9609d629bdd598a8})}, - {Sign::NEG, -142, MType({0x76bbdb565a37e84b, 0x932731e1d8c8de22})}, - {Sign::NEG, -142, MType({0xd934c38857eee4f3, 0x90448d82debe5194})}, - {Sign::NEG, -142, MType({0xc27b427b4fbfc7db, 0x8d61e90ccfb481b1})}, - {Sign::NEG, -142, MType({0x6e13de502b142b39, 0x8a7f447faba9fd2b})}, - {Sign::NEG, -142, MType({0xf4e406206614e2ba, 0x879c9fdb729d52b3})}, - {Sign::NEG, -142, MType({0x4d320daa3312ea6c, 0x84b9fb20248d10fd})}, - {Sign::NEG, -142, MType({0x4aa528fc9d433c1a, 0x81d7564dc177c6b9})}, - {Sign::NEG, -143, MType({0x3c8ad047559b1622, 0xfde962c892b80533})}, - {Sign::NEG, -143, MType({0xacf765a8fc5bcc31, 0xf82418c77870a69f})}, - {Sign::NEG, -143, MType({0xbe238832edd27f20, 0xf25ece9834168f1a})}, - {Sign::NEG, -143, MType({0x2644bfca329b708, 0xec99843ac5a6dc07})}, - {Sign::NEG, -143, MType({0xc6d05a788e614744, 0xe6d439af2d1eaac6})}, - {Sign::NEG, -143, MType({0x133fe9cc57a8c1d0, 0xe10eeef56a7b18bc})}, - {Sign::NEG, -143, MType({0xaa4cb429195fb5dd, 0xdb49a40d7db94348})}, - {Sign::NEG, -143, MType({0x951ef239abbb959, 0xd58458f766d647ce})}, - {Sign::NEG, -143, MType({0x686c430c89143d35, 0xcfbf0db325cf43ad})}, - {Sign::NEG, -143, MType({0xba79c248afd42c12, 0xc9f9c240baa15447})}, - {Sign::NEG, -143, MType({0xad19e0a92f115327, 0xc43476a0254996fd})}, - {Sign::NEG, -143, MType({0xa8ad6ac3b0c99520, 0xbe6f2ad165c5292f})}, - {Sign::NEG, -143, MType({0xd0567d4a9cc5e6a1, 0xb8a9ded47c11283d})}, - {Sign::NEG, -143, MType({0x1f87c654b231443, 0xb2e492a9682ab188})}, - {Sign::NEG, -143, MType({0xd6380b08358051bc, 0xad1f46502a0ee26d})}, - {Sign::NEG, -143, MType({0xa07b024d26d391f6, 0xa759f9c8c1bad84e})}, - {Sign::NEG, -143, MType({0x6ee868cb69e3a7d8, 0xa194ad132f2bb089})}, - {Sign::NEG, -143, MType({0xa6869eff6682f73, 0x9bcf602f725e887d})}, - {Sign::NEG, -143, MType({0xf6a44d559ccf3f61, 0x960a131d8b507d87})}, - {Sign::NEG, -143, MType({0x72066e1d30a8e210, 0x9044c5dd79fead08})}, - {Sign::NEG, -143, MType({0x75ba3245b1b856af, 0x8a7f786f3e66345c})}, - {Sign::NEG, -143, MType({0xb5ac020473ab198f, 0x84ba2ad2d88430e1})}, - {Sign::NEG, -144, MType({0x41127e3a88eb6741, 0xfde9ba1090ab7feb})}, - {Sign::NEG, -144, MType({0xbf80787522aca1c4, 0xf25f1e1f1baffdea})}, - {Sign::NEG, -144, MType({0xaf00688b14fa3adc, 0xe6d481d15210167b})}, - {Sign::NEG, -144, MType({0x4d72837c8ab4d1e5, 0xdb49e52733c60457})}, - {Sign::NEG, -144, MType({0x4e38ac27bb252090, 0xcfbf4820c0cc0236})}, - {Sign::NEG, -144, MType({0xda3661f9292f59e8, 0xc434aabdf91c4ad0})}, - {Sign::NEG, -144, MType({0x8fd0af9bdfd21488, 0xb8aa0cfedcb118de})}, - {Sign::NEG, -144, MType({0x82ee19a9abf0bfa5, 0xad1f6ee36b84a716})}, - {Sign::NEG, -144, MType({0x3cf68d5b5369a251, 0xa194d06ba591302f})}, - {Sign::NEG, -144, MType({0xbcd34f38c977647e, 0x960a31978ad0eede})}, - {Sign::NEG, -144, MType({0x76eee9c9605e2143, 0x8a7f92671b3e1dda})}, - {Sign::NEG, -145, MType({0xaa6a3887f0c803ab, 0xfde9e5b4ada5efae})}, - {Sign::NEG, -145, MType({0x6e25927e582ac191, 0xe6d4a5e27b136f13})}, - {Sign::NEG, -145, MType({0xe2ebcac2f3a8e9eb, 0xcfbf65579eb92f4a})}, - {Sign::NEG, -145, MType({0x9d9acc22d5690751, 0xb8aa2414188ba5bb})}, - {Sign::NEG, -145, MType({0x1e12604b6d4132ef, 0xa194e217e87f47cb})}, - {Sign::NEG, -145, MType({0xcf340d2acb9b92a9, 0x8a7f9f630e888add})}, - {Sign::NEG, -146, MType({0xdc5e49fbde3c520, 0xe6d4b7eb1537c8ae})}, - {Sign::NEG, -146, MType({0xc074c9557c01188, 0xb8aa2f9eb95b9332})}, - {Sign::NEG, -146, MType({0xf0f82818ff9b654f, 0x8a7fa5e109656009})}, - {Sign::NEG, -147, MType({0xd4cd612078bbe9b0, 0xb8aa35640a7c33eb})}, - {Sign::NEG, -148, MType({0xf08cf68f42e09fa0, 0xb8aa3846b33aaecf})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -148, MType({0x68bd0facdf0ddaaf, 0xb8aa3e0c0513f9b1})}, - {Sign::POS, -147, MType({0x192af653dd41575b, 0xb8aa40eeae2ec9b3})}, - {Sign::POS, -146, MType({0x3b5c89842e540a51, 0x8a7fb2dd018e4892})}, - {Sign::POS, -146, MType({0x34ad8ebdd8b2750c, 0xb8aa46b400c0bee3})}, - {Sign::POS, -146, MType({0x70b12bd698e5be74, 0xe6d4dbfc54c5dd1b})}, - {Sign::POS, -145, MType({0x8c7e424efbd90e1, 0x8a7fb95afeda5c46})}, - {Sign::POS, -145, MType({0x31b8eba774a1de77, 0xa19505707dd23344})}, - {Sign::POS, -145, MType({0xee400e8c68838733, 0xb8aa523ea755fe32})}, - {Sign::POS, -145, MType({0xe71fa0b5603bc2f, 0xcfbf9fc57b7147be})}, - {Sign::POS, -145, MType({0x7763c919d8ac65f1, 0xe6d4ee04fa2f9a92})}, - {Sign::POS, -145, MType({0x232b270bb6046ec1, 0xfdea3cfd239c815e})}, - {Sign::POS, -144, MType({0x106f39197e068972, 0x8a7fc656fbe1c368})}, - {Sign::POS, -144, MType({0x4a4a6f4012941bd9, 0x960a6e8bbb581acc})}, - {Sign::POS, -144, MType({0x5bb34c1120b3e54b, 0xa195171cd0370c34})}, - {Sign::POS, -144, MType({0x6bb6731392a3147a, 0xad1fc00a3a845cf9})}, - {Sign::POS, -144, MType({0x2be1268dcee3c8fc, 0xb8aa6953fa45d275})}, - {Sign::POS, -144, MType({0xd84158d5d50251a9, 0xc43512fa0f813201})}, - {Sign::POS, -144, MType({0x3765bda15d0ef0fa, 0xcfbfbcfc7a3c40fa})}, - {Sign::POS, -144, MType({0x9a5ddb55f9cc27d9, 0xdb4a675b3a7cc4b9})}, - {Sign::POS, -144, MType({0xdcba1c593d918775, 0xe6d512165048829b})}, - {Sign::POS, -144, MType({0x648be060e1e30a95, 0xf25fbd2dbba53ffd})}, - {Sign::POS, -144, MType({0x22658dc2f1bcf6e8, 0xfdea68a17c98c23b})}, - {Sign::POS, -143, MType({0x48ad5162fb4a236e, 0x84ba8a38c9946759})}, - {Sign::POS, -143, MType({0xdb7fe3789405ce3a, 0x8a7fe04effad9560})}, - {Sign::POS, -143, MType({0x91b56e2e4f2e5ed8, 0x90453693609acde3})}, - {Sign::POS, -143, MType({0xf8998880c3bb4d76, 0x960a8d05ec5ef390})}, - {Sign::POS, -143, MType({0xe2b878052f67efee, 0x9bcfe3a6a2fce918})}, - {Sign::POS, -143, MType({0x67df399193f707c0, 0xa1953a758477912b})}, - {Sign::POS, -143, MType({0xe51b89e4d5d095e1, 0xa75a917290d1ce78})}, - {Sign::POS, -143, MType({0xfcbbee4edbf9f47d, 0xad1fe89dc80e83b1})}, - {Sign::POS, -143, MType({0x964fbd58b168371b, 0xb2e53ff72a309387})}, - {Sign::POS, -143, MType({0xdea7276ca7acd135, 0xb8aa977eb73ae0aa})}, - {Sign::POS, -143, MType({0x47d33f7e7afc83a6, 0xbe6fef346f304dcd})}, - {Sign::POS, -143, MType({0x892603b377909123, 0xc43547185213bda0})}, - {Sign::POS, -143, MType({0x9f32660aa06239fb, 0xc9fa9f2a5fe812d6})}, - {Sign::POS, -143, MType({0xcbcc5504d7407f6c, 0xcfbff76a98b03021})}, - {Sign::POS, -143, MType({0x9608c44d06402ebe, 0xd5854fd8fc6ef834})}, - {Sign::POS, -143, MType({0xca3db5604a863477, 0xdb4aa8758b274dc1})}, - {Sign::POS, -143, MType({0x7a024036206c37d6, 0xe110014044dc137c})}, - {Sign::POS, -143, MType({0xfc2e9be890ff7ee3, 0xe6d55a3929902c17})}, - {Sign::POS, -143, MType({0xecdc275c60da1b53, 0xec9ab36039467a47})}, - {Sign::POS, -143, MType({0x2d6571e94056607f, 0xf2600cb57401e0c0})}, - {Sign::POS, -143, MType({0xe4664401fd1ca2a7, 0xf8256638d9c54234})}, - {Sign::POS, -143, MType({0x7dbba7dcb50b3fd7, 0xfdeabfea6a93815a})}, - {Sign::POS, -142, MType({0xd541f90d853c794b, 0x81d80ce51337c072})}, - {Sign::POS, -142, MType({0xb08f65392ce8b75b, 0x84bab9ec06ae11c5})}, - {Sign::POS, -142, MType({0x6e969a29f8462436, 0x879d670a0fae2600})}, - {Sign::POS, -142, MType({0xcfc8cbcaa2bf130c, 0x8a80143f2e396e7d})}, - {Sign::POS, -142, MType({0xb737e48c19421e68, 0x8d62c18b62515c98})}, - {Sign::POS, -142, MType({0x2a9689b997c50c0b, 0x90456eeeabf761ac})}, - {Sign::POS, -142, MType({0x52381fccc774d66b, 0x93281c690b2cef13})}, - {Sign::POS, -142, MType({0x7910cec1dd92dc10, 0x960ac9fa7ff37629})}, - {Sign::POS, -142, MType({0xcb5866bbaff34cb, 0x98ed77a30a4c684a})}, - {Sign::POS, -142, MType({0x9d5c02c80c702d11, 0x9bd02562aa3936d0})}, - {Sign::POS, -142, MType({0xdddad0536b56e775, 0x9eb2d3395fbb5318})}, - {Sign::POS, -142, MType({0xa3a9505d7f71247a, 0xa19581272ad42e7e})}, - {Sign::POS, -142, MType({0xe6dfbd5d210830d7, 0xa4782f2c0b853a5d})}, - {Sign::POS, -142, MType({0xc2372f447bdcfa45, 0xa75add4801cfe812})}, - {Sign::POS, -142, MType({0x73099fd532c14b05, 0xaa3d8b7b0db5a8f9})}, - {Sign::POS, -142, MType({0x5951eef483de2c37, 0xad2039c52f37ee6e})}, - {Sign::POS, -142, MType({0xf7abe6ff6da76f1e, 0xb002e826665829cd})}, - {Sign::POS, -142, MType({0xf354411ed47c5d7b, 0xb2e5969eb317cc74})}, - {Sign::POS, -142, MType({0x1428a99ba8f5911f, 0xb5c8452e157847c0})}, - {Sign::POS, -142, MType({0x44a7c4330edff2c8, 0xb8aaf3d48d7b0d0c})}, - {Sign::POS, -142, MType({0x91f1306a84e4e07b, 0xbb8da2921b218db6})}, - {Sign::POS, -142, MType({0x2bc58de40cdf7b6a, 0xbe705166be6d3b1c})}, - {Sign::POS, -142, MType({0x648680b254df1d99, 0xc1530052775f869a})}, - {Sign::POS, -142, MType({0xb136b5ace0d6f74d, 0xc435af5545f9e18e})}, - {Sign::POS, -142, MType({0xa979e6c434fad480, 0xc7185e6f2a3dbd56})}, - {Sign::POS, -142, MType({0x794df5600c90a5a, 0xc9fb0da0242c8b50})}, - {Sign::POS, -142, MType({0xa86d80814ac18cf1, 0xccddbce833c7bcd8})}, - {Sign::POS, -142, MType({0x8b8ac57a9cca2d56, 0xcfc06c475910c34e})}, - {Sign::POS, -142, MType({0xd314c7e03140001f, 0xd2a31bbd9409100f})}, - {Sign::POS, -142, MType({0xc3d4c40e20b5ec89, 0xd585cb4ae4b2147a})}, - {Sign::POS, -142, MType({0xc5351d729060644e, 0xd8687aef4b0d41ed})}, - {Sign::POS, -142, MType({0x614162e1e12e445d, 0xdb4b2aaac71c09c7})}, - {Sign::POS, -142, MType({0x44a652eadf8ede85, 0xde2dda7d58dfdd66})}, - {Sign::POS, -142, MType({0x3eb1e02af3e52c3c, 0xe1108a67005a2e29})}, - {Sign::POS, -142, MType({0x415335a253a82aa2, 0xe3f33a67bd8c6d6f})}, - {Sign::POS, -142, MType({0x611abb0833305fe1, 0xe6d5ea7f90780c97})}, + {Sign::NEG, -142, 0xe6d3a96b'978fc16e'26f2c63c'0827ccbb_u128}, + {Sign::NEG, -142, 0xe3f107a9'fbfc50ca'4b56fe66'7c8ec091_u128}, + {Sign::NEG, -142, 0xe10e65d1'4b937265'647d7618'1aec10fc_u128}, + {Sign::NEG, -142, 0xde2bc3e1'8653b4f5'99e8f4d5'379eca79_u128}, + {Sign::NEG, -142, 0xdb4921da'ac3ba730'f07da899'90c20623_u128}, + {Sign::NEG, -142, 0xd8667fbc'bd49d7cd'4a812184'8531851a_u128}, + {Sign::NEG, -142, 0xd583dd87'b97cd580'679a4d85'4ae13619_u128}, + {Sign::NEG, -142, 0xd2a13b3b'a0d32eff'e4d17407'2487a514_u128}, + {Sign::NEG, -142, 0xcfbe98d8'734b7301'3c90319d'969b54be_u128}, + {Sign::NEG, -142, 0xccdbf65e'30e43039'c6a173b0'9ba301e6_u128}, + {Sign::NEG, -142, 0xc9f953cc'd99bf55e'b8317428'd7d8d06b_u128}, + {Sign::NEG, -142, 0xc716b124'6d715125'23cdb51b'cc2061cd_u128}, + {Sign::NEG, -142, 0xc4340e64'ec62d241'f964fc78'084fd515_u128}, + {Sign::NEG, -142, 0xc1516b8e'566f076a'06474fb1'5ccbb015_u128}, + {Sign::NEG, -142, 0xbe6ec8a0'ab947f51'f525ef6d'0b75b1c3_u128}, + {Sign::NEG, -142, 0xbb8c259b'ebd1c8ae'4e13532d'f7ee8da7_u128}, + {Sign::NEG, -142, 0xb8a98280'17257233'76832500'd72a9027_u128}, + {Sign::NEG, -142, 0xb5c6df4d'2d8e0a95'b14a3d28'5e592ba0_u128}, + {Sign::NEG, -142, 0xb2e43c03'2f0a2089'1e9e9dc9'711f6e20_u128}, + {Sign::NEG, -142, 0xb00198a2'1b9842c1'bc176e97'4f255fac_u128}, + {Sign::NEG, -142, 0xad1ef529'f336fff3'64acf87f'c0f648e6_u128}, + {Sign::NEG, -142, 0xaa3c519a'b5e4e6d1'd0b8a157'4433e1f8_u128}, + {Sign::NEG, -142, 0xa759adf4'63a08610'95f4e785'371c69a9_u128}, + {Sign::NEG, -142, 0xa4770a36'fc686c63'277d5db0'0363a46f_u128}, + {Sign::NEG, -142, 0xa1946662'803b287c'd5cea669'485ec36c_u128}, + {Sign::NEG, -142, 0x9eb1c276'ef174910'cec66fda'04833322_u128}, + {Sign::NEG, -142, 0x9bcf1e74'48fb5cd2'1da36f6e'be3851db_u128}, + {Sign::NEG, -142, 0x98ec7a5a'8de5f273'ab055d83'abfc0d82_u128}, + {Sign::NEG, -142, 0x9609d629'bdd598a8'3cecf110'dbda68e9_u128}, + {Sign::NEG, -142, 0x932731e1'd8c8de22'76bbdb56'5a37e84b_u128}, + {Sign::NEG, -142, 0x90448d82'debe5194'd934c388'57eee4f3_u128}, + {Sign::NEG, -142, 0x8d61e90c'cfb481b1'c27b427b'4fbfc7db_u128}, + {Sign::NEG, -142, 0x8a7f447f'aba9fd2b'6e13de50'2b142b39_u128}, + {Sign::NEG, -142, 0x879c9fdb'729d52b3'f4e40620'6614e2ba_u128}, + {Sign::NEG, -142, 0x84b9fb20'248d10fd'4d320daa'3312ea6c_u128}, + {Sign::NEG, -142, 0x81d7564d'c177c6b9'4aa528fc'9d433c1a_u128}, + {Sign::NEG, -143, 0xfde962c8'92b80533'3c8ad047'559b1622_u128}, + {Sign::NEG, -143, 0xf82418c7'7870a69f'acf765a8'fc5bcc31_u128}, + {Sign::NEG, -143, 0xf25ece98'34168f1a'be238832'edd27f20_u128}, + {Sign::NEG, -143, 0xec99843a'c5a6dc07'02644bfc'a329b708_u128}, + {Sign::NEG, -143, 0xe6d439af'2d1eaac6'c6d05a78'8e614744_u128}, + {Sign::NEG, -143, 0xe10eeef5'6a7b18bc'133fe9cc'57a8c1d0_u128}, + {Sign::NEG, -143, 0xdb49a40d'7db94348'aa4cb429'195fb5dd_u128}, + {Sign::NEG, -143, 0xd58458f7'66d647ce'0951ef23'9abbb959_u128}, + {Sign::NEG, -143, 0xcfbf0db3'25cf43ad'686c430c'89143d35_u128}, + {Sign::NEG, -143, 0xc9f9c240'baa15447'ba79c248'afd42c12_u128}, + {Sign::NEG, -143, 0xc43476a0'254996fd'ad19e0a9'2f115327_u128}, + {Sign::NEG, -143, 0xbe6f2ad1'65c5292f'a8ad6ac3'b0c99520_u128}, + {Sign::NEG, -143, 0xb8a9ded4'7c11283d'd0567d4a'9cc5e6a1_u128}, + {Sign::NEG, -143, 0xb2e492a9'682ab188'01f87c65'4b231443_u128}, + {Sign::NEG, -143, 0xad1f4650'2a0ee26d'd6380b08'358051bc_u128}, + {Sign::NEG, -143, 0xa759f9c8'c1bad84e'a07b024d'26d391f6_u128}, + {Sign::NEG, -143, 0xa194ad13'2f2bb089'6ee868cb'69e3a7d8_u128}, + {Sign::NEG, -143, 0x9bcf602f'725e887d'0a6869ef'f6682f73_u128}, + {Sign::NEG, -143, 0x960a131d'8b507d87'f6a44d55'9ccf3f61_u128}, + {Sign::NEG, -143, 0x9044c5dd'79fead08'72066e1d'30a8e210_u128}, + {Sign::NEG, -143, 0x8a7f786f'3e66345c'75ba3245'b1b856af_u128}, + {Sign::NEG, -143, 0x84ba2ad2'd88430e1'b5ac0204'73ab198f_u128}, + {Sign::NEG, -144, 0xfde9ba10'90ab7feb'41127e3a'88eb6741_u128}, + {Sign::NEG, -144, 0xf25f1e1f'1baffdea'bf807875'22aca1c4_u128}, + {Sign::NEG, -144, 0xe6d481d1'5210167b'af00688b'14fa3adc_u128}, + {Sign::NEG, -144, 0xdb49e527'33c60457'4d72837c'8ab4d1e5_u128}, + {Sign::NEG, -144, 0xcfbf4820'c0cc0236'4e38ac27'bb252090_u128}, + {Sign::NEG, -144, 0xc434aabd'f91c4ad0'da3661f9'292f59e8_u128}, + {Sign::NEG, -144, 0xb8aa0cfe'dcb118de'8fd0af9b'dfd21488_u128}, + {Sign::NEG, -144, 0xad1f6ee3'6b84a716'82ee19a9'abf0bfa5_u128}, + {Sign::NEG, -144, 0xa194d06b'a591302f'3cf68d5b'5369a251_u128}, + {Sign::NEG, -144, 0x960a3197'8ad0eede'bcd34f38'c977647e_u128}, + {Sign::NEG, -144, 0x8a7f9267'1b3e1dda'76eee9c9'605e2143_u128}, + {Sign::NEG, -145, 0xfde9e5b4'ada5efae'aa6a3887'f0c803ab_u128}, + {Sign::NEG, -145, 0xe6d4a5e2'7b136f13'6e25927e'582ac191_u128}, + {Sign::NEG, -145, 0xcfbf6557'9eb92f4a'e2ebcac2'f3a8e9eb_u128}, + {Sign::NEG, -145, 0xb8aa2414'188ba5bb'9d9acc22'd5690751_u128}, + {Sign::NEG, -145, 0xa194e217'e87f47cb'1e12604b'6d4132ef_u128}, + {Sign::NEG, -145, 0x8a7f9f63'0e888add'cf340d2a'cb9b92a9_u128}, + {Sign::NEG, -146, 0xe6d4b7eb'1537c8ae'0dc5e49f'bde3c520_u128}, + {Sign::NEG, -146, 0xb8aa2f9e'b95b9332'0c074c95'57c01188_u128}, + {Sign::NEG, -146, 0x8a7fa5e1'09656009'f0f82818'ff9b654f_u128}, + {Sign::NEG, -147, 0xb8aa3564'0a7c33eb'd4cd6120'78bbe9b0_u128}, + {Sign::NEG, -148, 0xb8aa3846'b33aaecf'f08cf68f'42e09fa0_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -148, 0xb8aa3e0c'0513f9b1'68bd0fac'df0ddaaf_u128}, + {Sign::POS, -147, 0xb8aa40ee'ae2ec9b3'192af653'dd41575b_u128}, + {Sign::POS, -146, 0x8a7fb2dd'018e4892'3b5c8984'2e540a51_u128}, + {Sign::POS, -146, 0xb8aa46b4'00c0bee3'34ad8ebd'd8b2750c_u128}, + {Sign::POS, -146, 0xe6d4dbfc'54c5dd1b'70b12bd6'98e5be74_u128}, + {Sign::POS, -145, 0x8a7fb95a'feda5c46'08c7e424'efbd90e1_u128}, + {Sign::POS, -145, 0xa1950570'7dd23344'31b8eba7'74a1de77_u128}, + {Sign::POS, -145, 0xb8aa523e'a755fe32'ee400e8c'68838733_u128}, + {Sign::POS, -145, 0xcfbf9fc5'7b7147be'0e71fa0b'5603bc2f_u128}, + {Sign::POS, -145, 0xe6d4ee04'fa2f9a92'7763c919'd8ac65f1_u128}, + {Sign::POS, -145, 0xfdea3cfd'239c815e'232b270b'b6046ec1_u128}, + {Sign::POS, -144, 0x8a7fc656'fbe1c368'106f3919'7e068972_u128}, + {Sign::POS, -144, 0x960a6e8b'bb581acc'4a4a6f40'12941bd9_u128}, + {Sign::POS, -144, 0xa195171c'd0370c34'5bb34c11'20b3e54b_u128}, + {Sign::POS, -144, 0xad1fc00a'3a845cf9'6bb67313'92a3147a_u128}, + {Sign::POS, -144, 0xb8aa6953'fa45d275'2be1268d'cee3c8fc_u128}, + {Sign::POS, -144, 0xc43512fa'0f813201'd84158d5'd50251a9_u128}, + {Sign::POS, -144, 0xcfbfbcfc'7a3c40fa'3765bda1'5d0ef0fa_u128}, + {Sign::POS, -144, 0xdb4a675b'3a7cc4b9'9a5ddb55'f9cc27d9_u128}, + {Sign::POS, -144, 0xe6d51216'5048829b'dcba1c59'3d918775_u128}, + {Sign::POS, -144, 0xf25fbd2d'bba53ffd'648be060'e1e30a95_u128}, + {Sign::POS, -144, 0xfdea68a1'7c98c23b'22658dc2'f1bcf6e8_u128}, + {Sign::POS, -143, 0x84ba8a38'c9946759'48ad5162'fb4a236e_u128}, + {Sign::POS, -143, 0x8a7fe04e'ffad9560'db7fe378'9405ce3a_u128}, + {Sign::POS, -143, 0x90453693'609acde3'91b56e2e'4f2e5ed8_u128}, + {Sign::POS, -143, 0x960a8d05'ec5ef390'f8998880'c3bb4d76_u128}, + {Sign::POS, -143, 0x9bcfe3a6'a2fce918'e2b87805'2f67efee_u128}, + {Sign::POS, -143, 0xa1953a75'8477912b'67df3991'93f707c0_u128}, + {Sign::POS, -143, 0xa75a9172'90d1ce78'e51b89e4'd5d095e1_u128}, + {Sign::POS, -143, 0xad1fe89d'c80e83b1'fcbbee4e'dbf9f47d_u128}, + {Sign::POS, -143, 0xb2e53ff7'2a309387'964fbd58'b168371b_u128}, + {Sign::POS, -143, 0xb8aa977e'b73ae0aa'dea7276c'a7acd135_u128}, + {Sign::POS, -143, 0xbe6fef34'6f304dcd'47d33f7e'7afc83a6_u128}, + {Sign::POS, -143, 0xc4354718'5213bda0'892603b3'77909123_u128}, + {Sign::POS, -143, 0xc9fa9f2a'5fe812d6'9f32660a'a06239fb_u128}, + {Sign::POS, -143, 0xcfbff76a'98b03021'cbcc5504'd7407f6c_u128}, + {Sign::POS, -143, 0xd5854fd8'fc6ef834'9608c44d'06402ebe_u128}, + {Sign::POS, -143, 0xdb4aa875'8b274dc1'ca3db560'4a863477_u128}, + {Sign::POS, -143, 0xe1100140'44dc137c'7a024036'206c37d6_u128}, + {Sign::POS, -143, 0xe6d55a39'29902c17'fc2e9be8'90ff7ee3_u128}, + {Sign::POS, -143, 0xec9ab360'39467a47'ecdc275c'60da1b53_u128}, + {Sign::POS, -143, 0xf2600cb5'7401e0c0'2d6571e9'4056607f_u128}, + {Sign::POS, -143, 0xf8256638'd9c54234'e4664401'fd1ca2a7_u128}, + {Sign::POS, -143, 0xfdeabfea'6a93815a'7dbba7dc'b50b3fd7_u128}, + {Sign::POS, -142, 0x81d80ce5'1337c072'd541f90d'853c794b_u128}, + {Sign::POS, -142, 0x84bab9ec'06ae11c5'b08f6539'2ce8b75b_u128}, + {Sign::POS, -142, 0x879d670a'0fae2600'6e969a29'f8462436_u128}, + {Sign::POS, -142, 0x8a80143f'2e396e7d'cfc8cbca'a2bf130c_u128}, + {Sign::POS, -142, 0x8d62c18b'62515c98'b737e48c'19421e68_u128}, + {Sign::POS, -142, 0x90456eee'abf761ac'2a9689b9'97c50c0b_u128}, + {Sign::POS, -142, 0x93281c69'0b2cef13'52381fcc'c774d66b_u128}, + {Sign::POS, -142, 0x960ac9fa'7ff37629'7910cec1'dd92dc10_u128}, + {Sign::POS, -142, 0x98ed77a3'0a4c684a'0cb5866b'baff34cb_u128}, + {Sign::POS, -142, 0x9bd02562'aa3936d0'9d5c02c8'0c702d11_u128}, + {Sign::POS, -142, 0x9eb2d339'5fbb5318'dddad053'6b56e775_u128}, + {Sign::POS, -142, 0xa1958127'2ad42e7e'a3a9505d'7f71247a_u128}, + {Sign::POS, -142, 0xa4782f2c'0b853a5d'e6dfbd5d'210830d7_u128}, + {Sign::POS, -142, 0xa75add48'01cfe812'c2372f44'7bdcfa45_u128}, + {Sign::POS, -142, 0xaa3d8b7b'0db5a8f9'73099fd5'32c14b05_u128}, + {Sign::POS, -142, 0xad2039c5'2f37ee6e'5951eef4'83de2c37_u128}, + {Sign::POS, -142, 0xb002e826'665829cd'f7abe6ff'6da76f1e_u128}, + {Sign::POS, -142, 0xb2e5969e'b317cc74'f354411e'd47c5d7b_u128}, + {Sign::POS, -142, 0xb5c8452e'157847c0'1428a99b'a8f5911f_u128}, + {Sign::POS, -142, 0xb8aaf3d4'8d7b0d0c'44a7c433'0edff2c8_u128}, + {Sign::POS, -142, 0xbb8da292'1b218db6'91f1306a'84e4e07b_u128}, + {Sign::POS, -142, 0xbe705166'be6d3b1c'2bc58de4'0cdf7b6a_u128}, + {Sign::POS, -142, 0xc1530052'775f869a'648680b2'54df1d99_u128}, + {Sign::POS, -142, 0xc435af55'45f9e18e'b136b5ac'e0d6f74d_u128}, + {Sign::POS, -142, 0xc7185e6f'2a3dbd56'a979e6c4'34fad480_u128}, + {Sign::POS, -142, 0xc9fb0da0'242c8b50'0794df56'00c90a5a_u128}, + {Sign::POS, -142, 0xccddbce8'33c7bcd8'a86d8081'4ac18cf1_u128}, + {Sign::POS, -142, 0xcfc06c47'5910c34e'8b8ac57a'9cca2d56_u128}, + {Sign::POS, -142, 0xd2a31bbd'9409100f'd314c7e0'3140001f_u128}, + {Sign::POS, -142, 0xd585cb4a'e4b2147a'c3d4c40e'20b5ec89_u128}, + {Sign::POS, -142, 0xd8687aef'4b0d41ed'c5351d72'9060644e_u128}, + {Sign::POS, -142, 0xdb4b2aaa'c71c09c7'614162e1'e12e445d_u128}, + {Sign::POS, -142, 0xde2dda7d'58dfdd66'44a652ea'df8ede85_u128}, + {Sign::POS, -142, 0xe1108a67'005a2e29'3eb1e02a'f3e52c3c_u128}, + {Sign::POS, -142, 0xe3f33a67'bd8c6d6f'415335a2'53a82aa2_u128}, + {Sign::POS, -142, 0xe6d5ea7f'90780c97'611abb08'33305fe1_u128}, }, // -log2(r) for the fourth step, generated by SageMath with: // @@ -684,139 +687,139 @@ const LogRR LOG2_TABLE = { // r = 2^-28 * round( 2^28 / (1 + i*2^(-28)) ); // s, m, e = RealField(128)(r).log2().sign_mantissa_exponent(); // print("{Sign::NEG," if (s == 1) else "{Sign::POS,", e, ", - // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); + // format_hex(m), "},"); /* .step_4 = */ { - {Sign::NEG, -149, MType({0xef1bffe565ce0a46, 0xbb8ce2990b5d0b90})}, - {Sign::NEG, -149, MType({0xbea3244560ca3d99, 0xb8aa39b807a576e4})}, - {Sign::NEG, -149, MType({0x8b91f71ceefa31a2, 0xb5c790d6d5c354df})}, - {Sign::NEG, -149, MType({0x9096e3d684001c0e, 0xb2e4e7f575b6a57b})}, - {Sign::NEG, -149, MType({0x86054c794367f36, 0xb0023f13e77f68b3})}, - {Sign::NEG, -149, MType({0x2d9cb33094afe4de, 0xad1f96322b1d9e80})}, - {Sign::NEG, -149, MType({0x3afa673cfb3698f3, 0xaa3ced50409146dd})}, - {Sign::NEG, -149, MType({0x6b27d8033e4c6450, 0xa75a446e27da61c4})}, - {Sign::NEG, -149, MType({0xf8d36b84d52a477b, 0xa4779b8be0f8ef2f})}, - {Sign::NEG, -149, MType({0x1eab86ae37c03565, 0xa194f2a96becef1a})}, - {Sign::NEG, -149, MType({0x175e8d56deb4ce2c, 0x9eb249c6c8b6617d})}, - {Sign::NEG, -149, MType({0x1d9ae241436519da, 0x9bcfa0e3f7554653})}, - {Sign::NEG, -149, MType({0x6c0ee71adfe44325, 0x98ecf800f7c99d96})}, - {Sign::NEG, -149, MType({0x3d68fc7c2efb522f, 0x960a4f1dca136741})}, - {Sign::NEG, -149, MType({0xcc5781e8ac28e749, 0x9327a63a6e32a34d})}, - {Sign::NEG, -149, MType({0x5388d5ced3a0f5af, 0x9044fd56e42751b6})}, - {Sign::NEG, -149, MType({0xdab5588224c7e4a, 0x8d6254732bf17275})}, - {Sign::NEG, -149, MType({0x356d5d5915c94a70, 0x8a7fab8f45910584})}, - {Sign::NEG, -149, MType({0x57d48712c69a6a7, 0x879d02ab31060ade})}, - {Sign::NEG, -149, MType({0xb88970eae5341d60, 0x84ba59c6ee50827c})}, - {Sign::NEG, -149, MType({0x89402fcbbfe331bb, 0x81d7b0e27d706c5a})}, - {Sign::NEG, -150, MType({0x649fba0879ca348b, 0xfdea0ffbbccb90e3})}, - {Sign::NEG, -150, MType({0xdccd9edfbab6f777, 0xf824be3222612d78})}, - {Sign::NEG, -150, MType({0xf066b9aa4636478e, 0xf25f6c682ba1ae69})}, - {Sign::NEG, -150, MType({0x14c7b3cb21578781, 0xec9a1a9dd88d13ab})}, - {Sign::NEG, -150, MType({0xbf4d347b528f56e1, 0xe6d4c8d329235d30})}, - {Sign::NEG, -150, MType({0x6553e0c9e1b70799, 0xe10f77081d648aef})}, - {Sign::NEG, -150, MType({0x7c385b9bd80c1375, 0xdb4a253cb5509cdb})}, - {Sign::NEG, -150, MType({0x795745ac402f919d, 0xd584d370f0e792e9})}, - {Sign::NEG, -150, MType({0xd20d3d8c2625ac1b, 0xcfbf81a4d0296d0d})}, - {Sign::NEG, -150, MType({0xfbb6dfa297551554, 0xc9fa2fd853162b3c})}, - {Sign::NEG, -150, MType({0x6bb0c62ca2867d91, 0xc434de0b79adcd6b})}, - {Sign::NEG, -150, MType({0x9757893d57e40877, 0xbe6f8c3e43f0538d})}, - {Sign::NEG, -150, MType({0xf407bebdc8f8c28e, 0xb8aa3a70b1ddbd97})}, - {Sign::NEG, -150, MType({0xf71dfa6d08b016be, 0xb2e4e8a2c3760b7e})}, - {Sign::NEG, -150, MType({0x15f6cde02b5543ce, 0xad1f96d478b93d37})}, - {Sign::NEG, -150, MType({0xc5eec8824692d1e9, 0xa75a4505d1a752b4})}, - {Sign::NEG, -150, MType({0x7c6277947172081a, 0xa194f336ce404bec})}, - {Sign::NEG, -150, MType({0xaeae662dc45a61ce, 0x9bcfa1676e8428d2})}, - {Sign::NEG, -150, MType({0xd22f1d3b59110455, 0x960a4f97b272e95b})}, - {Sign::NEG, -150, MType({0x5c4123804ab83462, 0x9044fdc79a0c8d7c})}, - {Sign::NEG, -150, MType({0xc240fd95b5cecb89, 0x8a7fabf725511528})}, - {Sign::NEG, -150, MType({0x798b2deab82fadc4, 0x84ba5a2654408055})}, - {Sign::NEG, -151, MType({0xeef86988e2227ddb, 0xfdea10aa4db59ded})}, - {Sign::NEG, -151, MType({0x62e1207c0209b090, 0xf25f6d073a400203})}, - {Sign::NEG, -151, MType({0x3989789113ec7bee, 0xe6d4c9636e202cd4})}, - {Sign::NEG, -151, MType({0x5daa65565e562909, 0xdb4a25bee9561e49})}, - {Sign::NEG, -151, MType({0xb9fcd6062a84acbd, 0xcfbf8219abe1d64b})}, - {Sign::NEG, -151, MType({0x3939b586c46792b3, 0xc434de73b5c354c4})}, - {Sign::NEG, -151, MType({0xc619ea6a7a9ee85e, 0xb8aa3acd06fa999b})}, - {Sign::NEG, -151, MType({0x4b5656ef9e7a27fd, 0xad1f97259f87a4bb})}, - {Sign::NEG, -151, MType({0xb3a7d90083f7239c, 0xa194f37d7f6a760b})}, - {Sign::NEG, -151, MType({0xe9c74a3381c0f016, 0x960a4fd4a6a30d75})}, - {Sign::NEG, -151, MType({0xd86d7fcaf12ed012, 0x8a7fac2b15316ae2})}, - {Sign::NEG, -152, MType({0xd4a6956a5c863e0f, 0xfdea1101962b1c76})}, - {Sign::NEG, -152, MType({0x1462ef192f547877, 0xe6d4c9ab909eeed1})}, - {Sign::NEG, -152, MType({0x45819d2f1d72eb8b, 0xcfbf825419be4ca6})}, - {Sign::NEG, -152, MType({0x3d742790eedbe719, 0xb8aa3afb318935c8})}, - {Sign::NEG, -152, MType({0xd1ac0d7b70d74492, 0xa194f3a0d7ffaa08})}, - {Sign::NEG, -152, MType({0xd79ac58375f83d0c, 0x8a7fac450d21a939})}, - {Sign::NEG, -153, MType({0x49637b2bac367e87, 0xe6d4c9cfa1de665a})}, - {Sign::NEG, -153, MType({0x1cc4b5eedcc78b35, 0xb8aa3b1246d08f69})}, - {Sign::NEG, -153, MType({0xd43bf48a42745836, 0x8a7fac520919cd43})}, - {Sign::NEG, -154, MType({0x3557bdcf592619eb, 0xb8aa3b1dd1743f1c})}, - {Sign::NEG, -155, MType({0x6bdc2e83d3ebb0c4, 0xb8aa3b2396c617ae})}, - {Sign::POS, 0, MType({0x0, 0x0})}, - {Sign::POS, -155, MType({0x2d5b40050e44e8ab, 0xb8aa3b2f2169ca44})}, - {Sign::POS, -154, MType({0xb8560371b8f04afe, 0xb8aa3b34e6bba447})}, - {Sign::POS, -153, MType({0xc79a43ccc70459cc, 0x8a7fac6c010a1f14})}, - {Sign::POS, -153, MType({0x22c25632f519f77f, 0xb8aa3b40715f59c0})}, - {Sign::POS, -153, MType({0x42c10a314e35fb9e, 0xe6d4ca17c45d8282})}, - {Sign::POS, -152, MType({0xbe5a212ed7b949e4, 0x8a7fac78fd024cdb})}, - {Sign::POS, -152, MType({0x12dcf94ef5c5b918, 0xa194f3e7892a4fde})}, - {Sign::POS, -152, MType({0x49781013e57110ce, 0xb8aa3b5786a6ca76})}, - {Sign::POS, -152, MType({0x8cba70c085c12cb3, 0xcfbf82c8f577bcd2})}, - {Sign::POS, -152, MType({0x7332f3fb09328b8, 0xe6d4ca3bd59d2721})}, - {Sign::POS, -152, MType({0xe37168243a9d8b14, 0xfdea11b02717098f})}, - {Sign::POS, -151, MType({0xa602205479b93722, 0x8a7fac92f4f2b226})}, - {Sign::POS, -151, MType({0xb5bd735852c0d583, 0x960a504e8f041bc3})}, - {Sign::POS, -151, MType({0x363248630b0d812d, 0xa194f40ae1bfc1b6})}, - {Sign::POS, -151, MType({0x3ca83f0e02b823c0, 0xad1f97c7ed25a415})}, - {Sign::POS, -151, MType({0xde66fb46974bc4fd, 0xb8aa3b85b135c2f7})}, - {Sign::POS, -151, MType({0x30b6254e23c69fc2, 0xc434df442df01e75})}, - {Sign::POS, -151, MType({0x48dd69ba009b370c, 0xcfbf83036354b6a4})}, - {Sign::POS, -151, MType({0x3c24797383b16af5, 0xdb4a26c351638b9c})}, - {Sign::POS, -151, MType({0x1fd309b800678db7, 0xe6d4ca83f81c9d74})}, - {Sign::POS, -151, MType({0x930d418c79378a3, 0xf25f6e45577fec43})}, - {Sign::POS, -151, MType({0xd85967b2783a12c, 0xfdea12076f8d7820})}, - {Sign::POS, -150, MType({0x210c898c360016ed, 0x84ba5ae52022a091})}, - {Sign::POS, -150, MType({0x5e19883eef2605ab, 0x8a7facc6e4d3a3b0})}, - {Sign::POS, -150, MType({0x488dacc6629300ae, 0x9044fea905d9c579})}, - {Sign::POS, -150, MType({0x6b0cdebd3264e3e3, 0x960a508b833505f7})}, - {Sign::POS, -150, MType({0x503b07e7ff788dc2, 0x9bcfa26e5ce56536})}, - {Sign::POS, -150, MType({0x82bc1435696a69d1, 0xa194f45192eae341})}, - {Sign::POS, -150, MType({0x8d33f1be0e96fb1f, 0xa75a463525458024})}, - {Sign::POS, -150, MType({0xfa4690c48c1b66c9, 0xad1f981913f53bea})}, - {Sign::POS, -150, MType({0x5497e3b57dd5fe75, 0xb2e4e9fd5efa16a0})}, - {Sign::POS, -150, MType({0x26cbdf277e66cad5, 0xb8aa3be206541050})}, - {Sign::POS, -150, MType({0xfb8679db27301625, 0xbe6f8dc70a032905})}, - {Sign::POS, -150, MType({0x5d6bacbb1056f6aa, 0xc434dfac6a0760cd})}, - {Sign::POS, -150, MType({0xd71f72dbd0c3d936, 0xc9fa31922660b7b1})}, - {Sign::POS, -150, MType({0xf345c97bfe230ba2, 0xcfbf83783f0f2dbe})}, - {Sign::POS, -150, MType({0x3c82b0042ce54751, 0xd584d55eb412c300})}, - {Sign::POS, -150, MType({0x3d7a2806f0403bae, 0xdb4a2745856b7781})}, - {Sign::POS, -150, MType({0x80d03540da2f18ae, 0xe10f792cb3194b4d})}, - {Sign::POS, -150, MType({0x9128dd987b73194f, 0xe6d4cb143d1c3e70})}, - {Sign::POS, -150, MType({0xf928291e63940e14, 0xec9a1cfc237450f5})}, - {Sign::POS, -150, MType({0x4372220d20e0e78a, 0xf25f6ee4662182e9})}, - {Sign::POS, -150, MType({0xfaaad4c9407040c7, 0xf824c0cd0523d455})}, - {Sign::POS, -150, MType({0xa9764fe14e20e9e4, 0xfdea12b6007b4547})}, - {Sign::POS, -149, MType({0xed3c5206ea4d3942, 0x81d7b24fac13eae4})}, - {Sign::POS, -149, MType({0xc2af218aea6da27, 0x84ba5b448614c2f4})}, - {Sign::POS, -149, MType({0xf6d912ac383aaeba, 0x879d04398e402ad6})}, - {Sign::POS, -149, MType({0x7298bf5cca8b3d95, 0x8a7fad2ec4962293})}, - {Sign::POS, -149, MType({0x44bc04daa8808214, 0x8d6256242916aa2f})}, - {Sign::POS, -149, MType({0x3294f0eb14683198, 0x9044ff19bbc1c1b0})}, - {Sign::POS, -149, MType({0x17592684ff600c3, 0x9327a80f7c97691c})}, - {Sign::POS, -149, MType({0x76aff9419c43e8b9, 0x960a51056b97a078})}, - {Sign::POS, -149, MType({0x5796367b39d26c63, 0x98ecf9fb88c267cb})}, - {Sign::POS, -149, MType({0x697a5c2e6888ddaa, 0x9bcfa2f1d417bf1a})}, - {Sign::POS, -149, MType({0x71ae7d8967b5a2b7, 0x9eb24be84d97a66b})}, - {Sign::POS, -149, MType({0x3584aecf760e7b39, 0xa194f4def5421dc4})}, - {Sign::POS, -149, MType({0x7a4f0558d1b0c59e, 0xa4779dd5cb17252a})}, - {Sign::POS, -149, MType({0x55f9792b821c455, 0xa75a46cccf16bca4})}, - {Sign::POS, -149, MType({0x9c087cff664ee311, 0xaa3cefc40140e436})}, - {Sign::POS, -149, MType({0x39bce36188dfc04, 0xad1f98bb61959be8})}, - {Sign::POS, -149, MType({0x16ba4e30a9d9d21, 0xb00241b2f014e3be})}, - {Sign::POS, -149, MType({0x5aca1bc777a54d5e, 0xb2e4eaaaacbebbbe})}, - {Sign::POS, -149, MType({0xd5094eb99a35d1f0, 0xb5c793a2979323ee})}, - {Sign::POS, -149, MType({0x357b5aa4ac49738d, 0xb8aa3c9ab0921c55})}, + {Sign::NEG, -149, 0xbb8ce299'0b5d0b90'ef1bffe5'65ce0a46_u128}, + {Sign::NEG, -149, 0xb8aa39b8'07a576e4'bea32445'60ca3d99_u128}, + {Sign::NEG, -149, 0xb5c790d6'd5c354df'8b91f71c'eefa31a2_u128}, + {Sign::NEG, -149, 0xb2e4e7f5'75b6a57b'9096e3d6'84001c0e_u128}, + {Sign::NEG, -149, 0xb0023f13'e77f68b3'086054c7'94367f36_u128}, + {Sign::NEG, -149, 0xad1f9632'2b1d9e80'2d9cb330'94afe4de_u128}, + {Sign::NEG, -149, 0xaa3ced50'409146dd'3afa673c'fb3698f3_u128}, + {Sign::NEG, -149, 0xa75a446e'27da61c4'6b27d803'3e4c6450_u128}, + {Sign::NEG, -149, 0xa4779b8b'e0f8ef2f'f8d36b84'd52a477b_u128}, + {Sign::NEG, -149, 0xa194f2a9'6becef1a'1eab86ae'37c03565_u128}, + {Sign::NEG, -149, 0x9eb249c6'c8b6617d'175e8d56'deb4ce2c_u128}, + {Sign::NEG, -149, 0x9bcfa0e3'f7554653'1d9ae241'436519da_u128}, + {Sign::NEG, -149, 0x98ecf800'f7c99d96'6c0ee71a'dfe44325_u128}, + {Sign::NEG, -149, 0x960a4f1d'ca136741'3d68fc7c'2efb522f_u128}, + {Sign::NEG, -149, 0x9327a63a'6e32a34d'cc5781e8'ac28e749_u128}, + {Sign::NEG, -149, 0x9044fd56'e42751b6'5388d5ce'd3a0f5af_u128}, + {Sign::NEG, -149, 0x8d625473'2bf17275'0dab5588'224c7e4a_u128}, + {Sign::NEG, -149, 0x8a7fab8f'45910584'356d5d59'15c94a70_u128}, + {Sign::NEG, -149, 0x879d02ab'31060ade'057d4871'2c69a6a7_u128}, + {Sign::NEG, -149, 0x84ba59c6'ee50827c'b88970ea'e5341d60_u128}, + {Sign::NEG, -149, 0x81d7b0e2'7d706c5a'89402fcb'bfe331bb_u128}, + {Sign::NEG, -150, 0xfdea0ffb'bccb90e3'649fba08'79ca348b_u128}, + {Sign::NEG, -150, 0xf824be32'22612d78'dccd9edf'bab6f777_u128}, + {Sign::NEG, -150, 0xf25f6c68'2ba1ae69'f066b9aa'4636478e_u128}, + {Sign::NEG, -150, 0xec9a1a9d'd88d13ab'14c7b3cb'21578781_u128}, + {Sign::NEG, -150, 0xe6d4c8d3'29235d30'bf4d347b'528f56e1_u128}, + {Sign::NEG, -150, 0xe10f7708'1d648aef'6553e0c9'e1b70799_u128}, + {Sign::NEG, -150, 0xdb4a253c'b5509cdb'7c385b9b'd80c1375_u128}, + {Sign::NEG, -150, 0xd584d370'f0e792e9'795745ac'402f919d_u128}, + {Sign::NEG, -150, 0xcfbf81a4'd0296d0d'd20d3d8c'2625ac1b_u128}, + {Sign::NEG, -150, 0xc9fa2fd8'53162b3c'fbb6dfa2'97551554_u128}, + {Sign::NEG, -150, 0xc434de0b'79adcd6b'6bb0c62c'a2867d91_u128}, + {Sign::NEG, -150, 0xbe6f8c3e'43f0538d'9757893d'57e40877_u128}, + {Sign::NEG, -150, 0xb8aa3a70'b1ddbd97'f407bebd'c8f8c28e_u128}, + {Sign::NEG, -150, 0xb2e4e8a2'c3760b7e'f71dfa6d'08b016be_u128}, + {Sign::NEG, -150, 0xad1f96d4'78b93d37'15f6cde0'2b5543ce_u128}, + {Sign::NEG, -150, 0xa75a4505'd1a752b4'c5eec882'4692d1e9_u128}, + {Sign::NEG, -150, 0xa194f336'ce404bec'7c627794'7172081a_u128}, + {Sign::NEG, -150, 0x9bcfa167'6e8428d2'aeae662d'c45a61ce_u128}, + {Sign::NEG, -150, 0x960a4f97'b272e95b'd22f1d3b'59110455_u128}, + {Sign::NEG, -150, 0x9044fdc7'9a0c8d7c'5c412380'4ab83462_u128}, + {Sign::NEG, -150, 0x8a7fabf7'25511528'c240fd95'b5cecb89_u128}, + {Sign::NEG, -150, 0x84ba5a26'54408055'798b2dea'b82fadc4_u128}, + {Sign::NEG, -151, 0xfdea10aa'4db59ded'eef86988'e2227ddb_u128}, + {Sign::NEG, -151, 0xf25f6d07'3a400203'62e1207c'0209b090_u128}, + {Sign::NEG, -151, 0xe6d4c963'6e202cd4'39897891'13ec7bee_u128}, + {Sign::NEG, -151, 0xdb4a25be'e9561e49'5daa6556'5e562909_u128}, + {Sign::NEG, -151, 0xcfbf8219'abe1d64b'b9fcd606'2a84acbd_u128}, + {Sign::NEG, -151, 0xc434de73'b5c354c4'3939b586'c46792b3_u128}, + {Sign::NEG, -151, 0xb8aa3acd'06fa999b'c619ea6a'7a9ee85e_u128}, + {Sign::NEG, -151, 0xad1f9725'9f87a4bb'4b5656ef'9e7a27fd_u128}, + {Sign::NEG, -151, 0xa194f37d'7f6a760b'b3a7d900'83f7239c_u128}, + {Sign::NEG, -151, 0x960a4fd4'a6a30d75'e9c74a33'81c0f016_u128}, + {Sign::NEG, -151, 0x8a7fac2b'15316ae2'd86d7fca'f12ed012_u128}, + {Sign::NEG, -152, 0xfdea1101'962b1c76'd4a6956a'5c863e0f_u128}, + {Sign::NEG, -152, 0xe6d4c9ab'909eeed1'1462ef19'2f547877_u128}, + {Sign::NEG, -152, 0xcfbf8254'19be4ca6'45819d2f'1d72eb8b_u128}, + {Sign::NEG, -152, 0xb8aa3afb'318935c8'3d742790'eedbe719_u128}, + {Sign::NEG, -152, 0xa194f3a0'd7ffaa08'd1ac0d7b'70d74492_u128}, + {Sign::NEG, -152, 0x8a7fac45'0d21a939'd79ac583'75f83d0c_u128}, + {Sign::NEG, -153, 0xe6d4c9cf'a1de665a'49637b2b'ac367e87_u128}, + {Sign::NEG, -153, 0xb8aa3b12'46d08f69'1cc4b5ee'dcc78b35_u128}, + {Sign::NEG, -153, 0x8a7fac52'0919cd43'd43bf48a'42745836_u128}, + {Sign::NEG, -154, 0xb8aa3b1d'd1743f1c'3557bdcf'592619eb_u128}, + {Sign::NEG, -155, 0xb8aa3b23'96c617ae'6bdc2e83'd3ebb0c4_u128}, + {Sign::POS, 0, 0_u128}, + {Sign::POS, -155, 0xb8aa3b2f'2169ca44'2d5b4005'0e44e8ab_u128}, + {Sign::POS, -154, 0xb8aa3b34'e6bba447'b8560371'b8f04afe_u128}, + {Sign::POS, -153, 0x8a7fac6c'010a1f14'c79a43cc'c70459cc_u128}, + {Sign::POS, -153, 0xb8aa3b40'715f59c0'22c25632'f519f77f_u128}, + {Sign::POS, -153, 0xe6d4ca17'c45d8282'42c10a31'4e35fb9e_u128}, + {Sign::POS, -152, 0x8a7fac78'fd024cdb'be5a212e'd7b949e4_u128}, + {Sign::POS, -152, 0xa194f3e7'892a4fde'12dcf94e'f5c5b918_u128}, + {Sign::POS, -152, 0xb8aa3b57'86a6ca76'49781013'e57110ce_u128}, + {Sign::POS, -152, 0xcfbf82c8'f577bcd2'8cba70c0'85c12cb3_u128}, + {Sign::POS, -152, 0xe6d4ca3b'd59d2721'07332f3f'b09328b8_u128}, + {Sign::POS, -152, 0xfdea11b0'2717098f'e3716824'3a9d8b14_u128}, + {Sign::POS, -151, 0x8a7fac92'f4f2b226'a6022054'79b93722_u128}, + {Sign::POS, -151, 0x960a504e'8f041bc3'b5bd7358'52c0d583_u128}, + {Sign::POS, -151, 0xa194f40a'e1bfc1b6'36324863'0b0d812d_u128}, + {Sign::POS, -151, 0xad1f97c7'ed25a415'3ca83f0e'02b823c0_u128}, + {Sign::POS, -151, 0xb8aa3b85'b135c2f7'de66fb46'974bc4fd_u128}, + {Sign::POS, -151, 0xc434df44'2df01e75'30b6254e'23c69fc2_u128}, + {Sign::POS, -151, 0xcfbf8303'6354b6a4'48dd69ba'009b370c_u128}, + {Sign::POS, -151, 0xdb4a26c3'51638b9c'3c247973'83b16af5_u128}, + {Sign::POS, -151, 0xe6d4ca83'f81c9d74'1fd309b8'00678db7_u128}, + {Sign::POS, -151, 0xf25f6e45'577fec43'0930d418'c79378a3_u128}, + {Sign::POS, -151, 0xfdea1207'6f8d7820'0d85967b'2783a12c_u128}, + {Sign::POS, -150, 0x84ba5ae5'2022a091'210c898c'360016ed_u128}, + {Sign::POS, -150, 0x8a7facc6'e4d3a3b0'5e19883e'ef2605ab_u128}, + {Sign::POS, -150, 0x9044fea9'05d9c579'488dacc6'629300ae_u128}, + {Sign::POS, -150, 0x960a508b'833505f7'6b0cdebd'3264e3e3_u128}, + {Sign::POS, -150, 0x9bcfa26e'5ce56536'503b07e7'ff788dc2_u128}, + {Sign::POS, -150, 0xa194f451'92eae341'82bc1435'696a69d1_u128}, + {Sign::POS, -150, 0xa75a4635'25458024'8d33f1be'0e96fb1f_u128}, + {Sign::POS, -150, 0xad1f9819'13f53bea'fa4690c4'8c1b66c9_u128}, + {Sign::POS, -150, 0xb2e4e9fd'5efa16a0'5497e3b5'7dd5fe75_u128}, + {Sign::POS, -150, 0xb8aa3be2'06541050'26cbdf27'7e66cad5_u128}, + {Sign::POS, -150, 0xbe6f8dc7'0a032905'fb8679db'27301625_u128}, + {Sign::POS, -150, 0xc434dfac'6a0760cd'5d6bacbb'1056f6aa_u128}, + {Sign::POS, -150, 0xc9fa3192'2660b7b1'd71f72db'd0c3d936_u128}, + {Sign::POS, -150, 0xcfbf8378'3f0f2dbe'f345c97b'fe230ba2_u128}, + {Sign::POS, -150, 0xd584d55e'b412c300'3c82b004'2ce54751_u128}, + {Sign::POS, -150, 0xdb4a2745'856b7781'3d7a2806'f0403bae_u128}, + {Sign::POS, -150, 0xe10f792c'b3194b4d'80d03540'da2f18ae_u128}, + {Sign::POS, -150, 0xe6d4cb14'3d1c3e70'9128dd98'7b73194f_u128}, + {Sign::POS, -150, 0xec9a1cfc'237450f5'f928291e'63940e14_u128}, + {Sign::POS, -150, 0xf25f6ee4'662182e9'4372220d'20e0e78a_u128}, + {Sign::POS, -150, 0xf824c0cd'0523d455'faaad4c9'407040c7_u128}, + {Sign::POS, -150, 0xfdea12b6'007b4547'a9764fe1'4e20e9e4_u128}, + {Sign::POS, -149, 0x81d7b24f'ac13eae4'ed3c5206'ea4d3942_u128}, + {Sign::POS, -149, 0x84ba5b44'8614c2f4'0c2af218'aea6da27_u128}, + {Sign::POS, -149, 0x879d0439'8e402ad6'f6d912ac'383aaeba_u128}, + {Sign::POS, -149, 0x8a7fad2e'c4962293'7298bf5c'ca8b3d95_u128}, + {Sign::POS, -149, 0x8d625624'2916aa2f'44bc04da'a8808214_u128}, + {Sign::POS, -149, 0x9044ff19'bbc1c1b0'3294f0eb'14683198_u128}, + {Sign::POS, -149, 0x9327a80f'7c97691c'01759268'4ff600c3_u128}, + {Sign::POS, -149, 0x960a5105'6b97a078'76aff941'9c43e8b9_u128}, + {Sign::POS, -149, 0x98ecf9fb'88c267cb'5796367b'39d26c63_u128}, + {Sign::POS, -149, 0x9bcfa2f1'd417bf1a'697a5c2e'6888ddaa_u128}, + {Sign::POS, -149, 0x9eb24be8'4d97a66b'71ae7d89'67b5a2b7_u128}, + {Sign::POS, -149, 0xa194f4de'f5421dc4'3584aecf'760e7b39_u128}, + {Sign::POS, -149, 0xa4779dd5'cb17252a'7a4f0558'd1b0c59e_u128}, + {Sign::POS, -149, 0xa75a46cc'cf16bca4'055f9792'b821c455_u128}, + {Sign::POS, -149, 0xaa3cefc4'0140e436'9c087cff'664ee311_u128}, + {Sign::POS, -149, 0xad1f98bb'61959be8'039bce36'188dfc04_u128}, + {Sign::POS, -149, 0xb00241b2'f014e3be'016ba4e3'0a9d9d21_u128}, + {Sign::POS, -149, 0xb2e4eaaa'acbebbbe'5aca1bc7'77a54d5e_u128}, + {Sign::POS, -149, 0xb5c793a2'979323ee'd5094eb9'9a35d1f0_u128}, + {Sign::POS, -149, 0xb8aa3c9a'b0921c55'357b5aa4'ac49738d_u128}, }}; // > P = fpminimax(log2(1 + x)/x, 3, [|128...|], [-0x1.0002143p-29 , 0x1p-29]); @@ -824,10 +827,10 @@ const LogRR LOG2_TABLE = { // > dirtyinfnorm(log2(1 + x)/x - P, [-0x1.0002143p-29 , 0x1p-29]); // 0x1.27ad5...p-121 const Float128 BIG_COEFFS[4]{ - {Sign::NEG, -129, MType({0x3eccf6940d66bbcc, 0xb8aa3b295c2b21e3})}, - {Sign::POS, -129, MType({0xee39a6d649394bb1, 0xf6384ee1d01febc9})}, - {Sign::NEG, -128, MType({0xbe87fed067ea2ad5, 0xb8aa3b295c17f0bb})}, - {Sign::POS, -127, MType({0xbe87fed0691d3e3f, 0xb8aa3b295c17f0bb})}, + {Sign::NEG, -129, 0xb8aa3b29'5c2b21e3'3eccf694'0d66bbcc_u128}, + {Sign::POS, -129, 0xf6384ee1'd01febc9'ee39a6d6'49394bb1_u128}, + {Sign::NEG, -128, 0xb8aa3b29'5c17f0bb'be87fed0'67ea2ad5_u128}, + {Sign::POS, -127, 0xb8aa3b29'5c17f0bb'be87fed0'691d3e3f_u128}, }; // Reuse the output of the fast pass range reduction. diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 231b01e0ee50a7..6dfd0f196b1088 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -55,6 +55,7 @@ add_libc_test( str_to_double_test.cpp str_to_long_double_test.cpp DEPENDS + libc.src.__support.integer_literals libc.src.__support.str_to_float libc.src.__support.uint128 libc.src.errno.errno @@ -67,8 +68,9 @@ add_libc_test( SRCS integer_to_string_test.cpp DEPENDS - libc.src.__support.integer_to_string libc.src.__support.CPP.string_view + libc.src.__support.integer_literals + libc.src.__support.integer_to_string libc.src.__support.uint libc.src.__support.uint128 ) diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp index 1c8a1c5b9d4cee..46f7d250596873 100644 --- a/libc/test/src/__support/FPUtil/fpbits_test.cpp +++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp @@ -452,7 +452,7 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { EXPECT_TRUE(negzero.is_neg()); EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); EXPECT_EQ(negzero.get_mantissa(), 0_u128); - EXPECT_EQ(negzero.uintval(), 0x80000000000000000000_u128); + EXPECT_EQ(negzero.uintval(), 0x8000'00000000'00000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negzero).c_str(), "0x00000000000080000000000000000000 = " @@ -462,7 +462,7 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { EXPECT_TRUE(one.is_pos()); EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(one.get_mantissa(), 0_u128); - EXPECT_EQ(one.uintval(), 0x3FFF8000000000000000_u128); + EXPECT_EQ(one.uintval(), 0x3FFF'80000000'00000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(one).c_str(), "0x0000000000003FFF8000000000000000 = " @@ -472,7 +472,7 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { EXPECT_TRUE(negone.is_neg()); EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(negone.get_mantissa(), 0_u128); - EXPECT_EQ(negone.uintval(), 0xBFFF8000000000000000_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF'80000000'00000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negone).c_str(), "0x000000000000BFFF8000000000000000 = " @@ -481,8 +481,8 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits num(1.125l); EXPECT_TRUE(num.is_pos()); EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(num.get_mantissa(), 0x1000000000000000_u128); - EXPECT_EQ(num.uintval(), 0x3FFF9000000000000000_u128); + EXPECT_EQ(num.get_mantissa(), 0x10000000'00000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF'90000000'00000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(num).c_str(), "0x0000000000003FFF9000000000000000 = " @@ -491,8 +491,8 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits negnum(-1.125l); EXPECT_TRUE(negnum.is_neg()); EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(negnum.get_mantissa(), 0x1000000000000000_u128); - EXPECT_EQ(negnum.uintval(), 0xBFFF9000000000000000_u128); + EXPECT_EQ(negnum.get_mantissa(), 0x10000000'00000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF'90000000'00000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negnum).c_str(), "0x000000000000BFFF9000000000000000 = " @@ -528,7 +528,7 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { EXPECT_TRUE(negzero.is_neg()); EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); EXPECT_EQ(negzero.get_mantissa(), 0_u128); - EXPECT_EQ(negzero.uintval(), 0x80000000000000000000000000000000_u128); + EXPECT_EQ(negzero.uintval(), 0x80000000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negzero).c_str(), "0x80000000000000000000000000000000 = " "(S: 1, E: 0x0000, M: 0x00000000000000000000000000000000)"); @@ -537,7 +537,7 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { EXPECT_TRUE(one.is_pos()); EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(one.get_mantissa(), 0_u128); - EXPECT_EQ(one.uintval(), 0x3FFF0000000000000000000000000000_u128); + EXPECT_EQ(one.uintval(), 0x3FFF0000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(one).c_str(), "0x3FFF0000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); @@ -546,7 +546,7 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { EXPECT_TRUE(negone.is_neg()); EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(negone.get_mantissa(), 0_u128); - EXPECT_EQ(negone.uintval(), 0xBFFF0000000000000000000000000000_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF0000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negone).c_str(), "0xBFFF0000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); @@ -554,8 +554,8 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { LongDoubleBits num(1.125l); EXPECT_TRUE(num.is_pos()); EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(num.get_mantissa(), 0x2000000000000000000000000000_u128); - EXPECT_EQ(num.uintval(), 0x3FFF2000000000000000000000000000_u128); + EXPECT_EQ(num.get_mantissa(), 0x2000'00000000'00000000'00000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF2000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(num).c_str(), "0x3FFF2000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); @@ -563,8 +563,8 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { LongDoubleBits negnum(-1.125l); EXPECT_TRUE(negnum.is_neg()); EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(negnum.get_mantissa(), 0x2000000000000000000000000000_u128); - EXPECT_EQ(negnum.uintval(), 0xBFFF2000000000000000000000000000_u128); + EXPECT_EQ(negnum.get_mantissa(), 0x2000'00000000'00000000'00000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF2000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negnum).c_str(), "0xBFFF2000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); @@ -599,7 +599,7 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { EXPECT_TRUE(negzero.is_neg()); EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); EXPECT_EQ(negzero.get_mantissa(), 0_u128); - EXPECT_EQ(negzero.uintval(), 0x80000000000000000000000000000000_u128); + EXPECT_EQ(negzero.uintval(), 0x80000000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negzero).c_str(), "0x80000000000000000000000000000000 = " "(S: 1, E: 0x0000, M: 0x00000000000000000000000000000000)"); @@ -608,7 +608,7 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { EXPECT_TRUE(one.is_pos()); EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(one.get_mantissa(), 0_u128); - EXPECT_EQ(one.uintval(), 0x3FFF0000000000000000000000000000_u128); + EXPECT_EQ(one.uintval(), 0x3FFF0000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(one).c_str(), "0x3FFF0000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); @@ -617,7 +617,7 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { EXPECT_TRUE(negone.is_neg()); EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(negone.get_mantissa(), 0_u128); - EXPECT_EQ(negone.uintval(), 0xBFFF0000000000000000000000000000_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF0000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negone).c_str(), "0xBFFF0000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); @@ -625,8 +625,8 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { Float128Bits num(float128(1.125)); EXPECT_TRUE(num.is_pos()); EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(num.get_mantissa(), 0x2000000000000000000000000000_u128); - EXPECT_EQ(num.uintval(), 0x3FFF2000000000000000000000000000_u128); + EXPECT_EQ(num.get_mantissa(), 0x2000'00000000'00000000'00000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF2000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(num).c_str(), "0x3FFF2000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); @@ -634,8 +634,8 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { Float128Bits negnum(float128(-1.125)); EXPECT_TRUE(negnum.is_neg()); EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(negnum.get_mantissa(), 0x2000000000000000000000000000_u128); - EXPECT_EQ(negnum.uintval(), 0xBFFF2000000000000000000000000000_u128); + EXPECT_EQ(negnum.get_mantissa(), 0x2000'00000000'00000000'00000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF2000'00000000'00000000'00000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negnum).c_str(), "0xBFFF2000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); diff --git a/libc/test/src/__support/integer_to_string_test.cpp b/libc/test/src/__support/integer_to_string_test.cpp index c8913bf461bb36..2a19c5bf7549c6 100644 --- a/libc/test/src/__support/integer_to_string_test.cpp +++ b/libc/test/src/__support/integer_to_string_test.cpp @@ -10,6 +10,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/UInt.h" #include "src/__support/UInt128.h" +#include "src/__support/integer_literals.h" #include "src/__support/integer_to_string.h" #include "test/UnitTest/Test.h" @@ -24,6 +25,8 @@ using LIBC_NAMESPACE::radix::Custom; using LIBC_NAMESPACE::radix::Dec; using LIBC_NAMESPACE::radix::Hex; using LIBC_NAMESPACE::radix::Oct; +using LIBC_NAMESPACE::operator""_u128; +using LIBC_NAMESPACE::operator""_u256; #define EXPECT(type, value, string_value) \ { \ @@ -204,11 +207,11 @@ TEST(LlvmLibcIntegerToStringTest, UINT128_Base_16) { using type = IntegerToString>; EXPECT(type, 0, "00000000000000000000000000000000"); EXPECT(type, 0x12345, "00000000000000000000000000012345"); - EXPECT(type, static_cast(0x1234) << 112, + EXPECT(type, 0x12340000'00000000'00000000'00000000_u128, "12340000000000000000000000000000"); - EXPECT(type, static_cast(0x1234) << 48, + EXPECT(type, 0x00000000'00000000'12340000'00000000_u128, "00000000000000001234000000000000"); - EXPECT(type, static_cast(0x1234) << 52, + EXPECT(type, 0x00000000'00000001'23400000'00000000_u128, "00000000000000012340000000000000"); } @@ -227,16 +230,26 @@ TEST(LlvmLibcIntegerToStringTest, UINT64_Base_36) { TEST(LlvmLibcIntegerToStringTest, UINT256_Base_16) { using UInt256 = LIBC_NAMESPACE::cpp::UInt<256>; using type = IntegerToString>; - EXPECT(type, static_cast(0), - "0000000000000000000000000000000000000000000000000000000000000000"); - EXPECT(type, static_cast(0x12345), - "0000000000000000000000000000000000000000000000000000000000012345"); - EXPECT(type, static_cast(0x1234) << 112, - "0000000000000000000000000000000012340000000000000000000000000000"); - EXPECT(type, static_cast(0x1234) << 116, - "0000000000000000000000000000000123400000000000000000000000000000"); - EXPECT(type, static_cast(0x1234) << 240, - "1234000000000000000000000000000000000000000000000000000000000000"); + EXPECT( + type, + 0x0000000000000000000000000000000000000000000000000000000000000000_u256, + "0000000000000000000000000000000000000000000000000000000000000000"); + EXPECT( + type, + 0x0000000000000000000000000000000000000000000000000000000000012345_u256, + "0000000000000000000000000000000000000000000000000000000000012345"); + EXPECT( + type, + 0x0000000000000000000000000000000012340000000000000000000000000000_u256, + "0000000000000000000000000000000012340000000000000000000000000000"); + EXPECT( + type, + 0x0000000000000000000000000000000123400000000000000000000000000000_u256, + "0000000000000000000000000000000123400000000000000000000000000000"); + EXPECT( + type, + 0x1234000000000000000000000000000000000000000000000000000000000000_u256, + "1234000000000000000000000000000000000000000000000000000000000000"); } TEST(LlvmLibcIntegerToStringTest, NegativeInterpretedAsPositive) { diff --git a/libc/test/src/__support/str_to_long_double_test.cpp b/libc/test/src/__support/str_to_long_double_test.cpp index 6fefc89ac3a945..c4686cfba3317e 100644 --- a/libc/test/src/__support/str_to_long_double_test.cpp +++ b/libc/test/src/__support/str_to_long_double_test.cpp @@ -1,8 +1,11 @@ #include "str_to_fp_test.h" +#include "src/__support/integer_literals.h" + namespace LIBC_NAMESPACE { using LlvmLibcStrToLongDblTest = LlvmLibcStrToFloatTest; +using LIBC_NAMESPACE::operator""_u128; #if defined(LIBC_LONG_DOUBLE_IS_FLOAT64) @@ -18,15 +21,12 @@ TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat80Simple) { } TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat80LongerMantissa) { - eisel_lemire_test((UInt128(0x1234567812345678) << 64) + - UInt128(0x1234567812345678), - 0, 0x91a2b3c091a2b3c1, 16507); - eisel_lemire_test((UInt128(0x1234567812345678) << 64) + - UInt128(0x1234567812345678), - 300, 0xd97757de56adb65c, 17503); - eisel_lemire_test((UInt128(0x1234567812345678) << 64) + - UInt128(0x1234567812345678), - -300, 0xc30feb9a7618457d, 15510); + eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, 0, + 0x91a2b3c091a2b3c1, 16507); + eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, 300, + 0xd97757de56adb65c, 17503); + eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, -300, + 0xc30feb9a7618457d, 15510); } // These tests check numbers at the edge of the DETAILED_POWERS_OF_TEN table. @@ -57,30 +57,24 @@ TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat80Fallback) { #else // Quad precision long double TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat128Simple) { - eisel_lemire_test(123, 0, (UInt128(0x1ec0000000000) << 64), 16389); - eisel_lemire_test( - 12345678901234568192u, 0, - (UInt128(0x156a95319d63e) << 64) + UInt128(0x1800000000000000), 16446); + eisel_lemire_test(123, 0, 0x1ec00'00000000'00000000'00000000_u128, 16389); + eisel_lemire_test(12345678901234568192u, 0, + 0x156a9'5319d63e'18000000'00000000_u128, 16446); } TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat128LongerMantissa) { - eisel_lemire_test( - (UInt128(0x1234567812345678) << 64) + UInt128(0x1234567812345678), 0, - (UInt128(0x1234567812345) << 64) + UInt128(0x6781234567812345), 16507); - eisel_lemire_test( - (UInt128(0x1234567812345678) << 64) + UInt128(0x1234567812345678), 300, - (UInt128(0x1b2eeafbcad5b) << 64) + UInt128(0x6cb8b4451dfcde19), 17503); - eisel_lemire_test( - (UInt128(0x1234567812345678) << 64) + UInt128(0x1234567812345678), -300, - (UInt128(0x1861fd734ec30) << 64) + UInt128(0x8afa7189f0f7595f), 15510); + eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, 0, + 0x12345'67812345'67812345'67812345_u128, 16507); + eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, 300, + 0x1b2ee'afbcad5b'6cb8b445'1dfcde19_u128, 17503); + eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, -300, + 0x1861f'd734ec30'8afa7189'f0f7595f_u128, 15510); } TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat128Fallback) { - ASSERT_FALSE( - internal::eisel_lemire( - {(UInt128(0x5ce0e9a56015fec5) << 64) + UInt128(0xaadfa328ae39b333), - 1}) - .has_value()); + ASSERT_FALSE(internal::eisel_lemire( + {0x5ce0e9a5'6015fec5'aadfa328'ae39b333_u128, 1}) + .has_value()); } #endif diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index fde2bac746f4f8..c6f01f435e12c5 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1283,6 +1283,7 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_fputil_triple_double", + ":__support_integer_literals", ":__support_macros_optimization", ":common_constants", ":explogxf", @@ -1313,6 +1314,7 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_fputil_triple_double", + ":__support_integer_literals", ":__support_macros_optimization", ":common_constants", ":explogxf", @@ -1342,6 +1344,7 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_fputil_triple_double", + ":__support_integer_literals", ":__support_macros_optimization", ":common_constants", ":explogxf", @@ -1365,6 +1368,7 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_fputil_triple_double", + ":__support_integer_literals", ":__support_macros_optimization", ":common_constants", ":explogxf", @@ -1428,11 +1432,12 @@ libc_math_function( libc_math_function( name = "log", additional_deps = [ + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":__support_fputil_double_double", - ":__support_fputil_dyadic_float", + ":__support_integer_literals", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", ":common_constants", @@ -1443,11 +1448,12 @@ libc_math_function( libc_math_function( name = "log2", additional_deps = [ + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":__support_fputil_double_double", - ":__support_fputil_dyadic_float", + ":__support_integer_literals", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", ":common_constants", @@ -1458,11 +1464,12 @@ libc_math_function( libc_math_function( name = "log10", additional_deps = [ + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":__support_fputil_double_double", - ":__support_fputil_dyadic_float", + ":__support_integer_literals", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", ":common_constants", @@ -1473,11 +1480,12 @@ libc_math_function( libc_math_function( name = "log1p", additional_deps = [ + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":__support_fputil_double_double", - ":__support_fputil_dyadic_float", + ":__support_integer_literals", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", ":common_constants", diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index e691d3c3d2ebdd..8e94a84f586f4c 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -50,6 +50,7 @@ libc_test( ], deps = [ "//libc:__support_fputil_fp_bits", + "//libc:__support_integer_literals", "//libc:__support_str_to_float", "//libc:__support_uint128", ], @@ -61,6 +62,7 @@ libc_test( deps = [ "//libc:__support_cpp_span", "//libc:__support_cpp_string_view", + "//libc:__support_integer_literals", "//libc:__support_integer_to_string", "//libc:__support_uint", "//libc:__support_uint128", From 0f33c54854c4c7ef73ec56a881f63089c504a7bf Mon Sep 17 00:00:00 2001 From: Karthika Devi C Date: Wed, 14 Feb 2024 23:22:47 +0530 Subject: [PATCH 158/240] [polly][ScheduleOptimizer] Use IslMaxOperationsGuard helper instead of explicit restoration (#79303) To fix long compile time issue of Schedule optimizer, patch #77280 sets the upper cap on max ISL operations. In case of bailing out when ISL quota is hit, error handling behavior was restored manually. This commit replaces the restoration code with IslMaxOperationsGuard helper and also removes redundant early return. --- polly/lib/Transform/ScheduleOptimizer.cpp | 23 ++++++------------- .../ScheduleOptimizer/schedule_computeout.ll | 4 ++-- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 8ee2b66339adbc..5a0ea3b4067542 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -868,23 +868,14 @@ static void runIslScheduleOptimizer( SC = SC.set_validity(Validity); SC = SC.set_coincidence(Validity); - // Save error handling behavior - long MaxOperations = isl_ctx_get_max_operations(Ctx); - isl_ctx_set_max_operations(Ctx, ScheduleComputeOut); - Schedule = SC.compute_schedule(); - bool ScheduleQuota = false; - if (isl_ctx_last_error(Ctx) == isl_error_quota) { - isl_ctx_reset_error(Ctx); - LLVM_DEBUG( - dbgs() << "Schedule optimizer calculation exceeds ISL quota\n"); - ScheduleQuota = true; - } - isl_options_set_on_error(Ctx, ISL_ON_ERROR_ABORT); - isl_ctx_reset_operations(Ctx); - isl_ctx_set_max_operations(Ctx, MaxOperations); + { + IslMaxOperationsGuard MaxOpGuard(Ctx, ScheduleComputeOut); + Schedule = SC.compute_schedule(); - if (ScheduleQuota) - return; + if (MaxOpGuard.hasQuotaExceeded()) + LLVM_DEBUG( + dbgs() << "Schedule optimizer calculation exceeds ISL quota\n"); + } isl_options_set_on_error(Ctx, OnErrorStatus); diff --git a/polly/test/ScheduleOptimizer/schedule_computeout.ll b/polly/test/ScheduleOptimizer/schedule_computeout.ll index eb59f0e36ac64c..acc8601a31a839 100644 --- a/polly/test/ScheduleOptimizer/schedule_computeout.ll +++ b/polly/test/ScheduleOptimizer/schedule_computeout.ll @@ -1,8 +1,8 @@ -; RUN: opt %loadPolly -S -polly-optree -polly-delicm -polly-opt-isl -polly-schedule-computeout=100000 -debug-only="polly-opt-isl" < %s 2>&1 | FileCheck %s +; RUN: opt %loadPolly -S -polly-optree -polly-delicm -polly-opt-isl -polly-schedule-computeout=10000 -debug-only="polly-opt-isl" < %s 2>&1 | FileCheck %s ; REQUIRES: asserts ; Bailout if the computations of schedule compute exceeds the max scheduling quota. -; Max compute out is initialized to 300000, Here it is set to 100000 for test purpose. +; Max compute out is initialized to 300000, Here it is set to 10000 for test purpose. target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" From 78d401b02a2dc1ed5446546a149030184f24bee0 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 14 Feb 2024 18:52:56 +0100 Subject: [PATCH 159/240] Revert "[libc][NFC] Use user defined literals to build 128 and 256 bit constants." (#81771) Reverts llvm/llvm-project#81746 --- libc/src/__support/integer_literals.h | 10 +- libc/src/math/generic/CMakeLists.txt | 40 +- libc/src/math/generic/exp.cpp | 24 +- libc/src/math/generic/exp10.cpp | 20 +- libc/src/math/generic/exp2.cpp | 20 +- libc/src/math/generic/expm1.cpp | 26 +- libc/src/math/generic/log.cpp | 1257 ++++++++-------- libc/src/math/generic/log10.cpp | 1262 ++++++++--------- libc/src/math/generic/log1p.cpp | 944 ++++++------ libc/src/math/generic/log2.cpp | 1251 ++++++++-------- libc/test/src/__support/CMakeLists.txt | 4 +- .../test/src/__support/FPUtil/fpbits_test.cpp | 42 +- .../src/__support/integer_to_string_test.cpp | 39 +- .../src/__support/str_to_long_double_test.cpp | 48 +- .../llvm-project-overlay/libc/BUILD.bazel | 24 +- .../libc/test/src/__support/BUILD.bazel | 2 - 16 files changed, 2490 insertions(+), 2523 deletions(-) diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h index ae09c5462a435b..c8e965c1a03a05 100644 --- a/libc/src/__support/integer_literals.h +++ b/libc/src/__support/integer_literals.h @@ -22,19 +22,19 @@ namespace LIBC_NAMESPACE { LIBC_INLINE constexpr uint8_t operator""_u8(unsigned long long value) { - return static_cast(value); + return value; } LIBC_INLINE constexpr uint16_t operator""_u16(unsigned long long value) { - return static_cast(value); + return value; } LIBC_INLINE constexpr uint32_t operator""_u32(unsigned long long value) { - return static_cast(value); + return value; } LIBC_INLINE constexpr uint64_t operator""_u64(unsigned long long value) { - return static_cast(value); + return value; } namespace internal { @@ -76,7 +76,7 @@ template struct DigitBuffer { // Returns the digit for a particular character. // Returns 255 if the character is invalid. LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) { - const auto to_lower = [](char c) -> uint8_t { return c | 32; }; + const auto to_lower = [](char c) { return c | 32; }; const auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; const auto is_alpha = [](char c) { return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index ab7f5a91ab7bac..fdf383f070697e 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -622,8 +622,6 @@ add_entrypoint_object( DEPENDS .common_constants .explogxf - libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -634,9 +632,10 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double - libc.src.__support.integer_literals libc.src.__support.macros.optimization + libc.include.errno libc.src.errno.errno + libc.include.math COMPILE_OPTIONS -O3 ) @@ -673,8 +672,6 @@ add_entrypoint_object( DEPENDS .common_constants .explogxf - libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -685,9 +682,10 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double - libc.src.__support.integer_literals libc.src.__support.macros.optimization + libc.include.errno libc.src.errno.errno + libc.include.math COMPILE_OPTIONS -O3 ) @@ -733,8 +731,6 @@ add_entrypoint_object( DEPENDS .common_constants .explogxf - libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -745,9 +741,10 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double - libc.src.__support.integer_literals libc.src.__support.macros.optimization + libc.include.errno libc.src.errno.errno + libc.include.math COMPILE_OPTIONS -O3 ) @@ -794,8 +791,6 @@ add_entrypoint_object( DEPENDS .common_constants .explogxf - libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -806,9 +801,10 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.FPUtil.triple_double - libc.src.__support.integer_literals libc.src.__support.macros.optimization + libc.include.errno libc.src.errno.errno + libc.include.math COMPILE_OPTIONS -O3 ) @@ -1065,13 +1061,12 @@ add_entrypoint_object( DEPENDS .common_constants .log_range_reduction - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval - libc.src.__support.integer_literals + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 @@ -1102,13 +1097,12 @@ add_entrypoint_object( ../log1p.h DEPENDS .common_constants - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval - libc.src.__support.integer_literals + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 @@ -1141,13 +1135,12 @@ add_entrypoint_object( DEPENDS .common_constants .log_range_reduction - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval - libc.src.__support.integer_literals + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 @@ -1180,13 +1173,12 @@ add_entrypoint_object( DEPENDS .common_constants .log_range_reduction - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval - libc.src.__support.integer_literals + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 diff --git a/libc/src/math/generic/exp.cpp b/libc/src/math/generic/exp.cpp index 42a4491131a04e..f23170f8ed4259 100644 --- a/libc/src/math/generic/exp.cpp +++ b/libc/src/math/generic/exp.cpp @@ -21,7 +21,6 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" -#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -32,7 +31,6 @@ using fputil::DoubleDouble; using fputil::TripleDouble; using Float128 = typename fputil::DyadicFloat<128>; using Sign = fputil::Sign; -using LIBC_NAMESPACE::operator""_u128; // log2(e) constexpr double LOG2_E = 0x1.71547652b82fep+0; @@ -99,15 +97,21 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // For |dx| < 2^-13 + 2^-30: // | output - exp(dx) | < 2^-126. Float128 poly_approx_f128(const Float128 &dx) { + using MType = typename Float128::MantissaType; + constexpr Float128 COEFFS_128[]{ - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 - {Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5 - {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/6 - {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/24 - {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/120 - {Sign::POS, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/720 - {Sign::POS, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/5040 + {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 + {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 + {Sign::POS, -128, MType({0, 0x8000000000000000})}, // 0.5 + {Sign::POS, -130, MType({0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaaa})}, // 1/6 + {Sign::POS, -132, + MType({0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaaa})}, // 1/24 + {Sign::POS, -134, + MType({0x8888888888888889, 0x8888888888888888})}, // 1/120 + {Sign::POS, -137, + MType({0x60b60b60b60b60b6, 0xb60b60b60b60b60b})}, // 1/720 + {Sign::POS, -140, + MType({0x00d00d00d00d00d0, 0xd00d00d00d00d00d})}, // 1/5040 }; Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2], diff --git a/libc/src/math/generic/exp10.cpp b/libc/src/math/generic/exp10.cpp index 72ece669765688..6b40f5561845d8 100644 --- a/libc/src/math/generic/exp10.cpp +++ b/libc/src/math/generic/exp10.cpp @@ -21,7 +21,6 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" -#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -32,7 +31,6 @@ using fputil::DoubleDouble; using fputil::TripleDouble; using Float128 = typename fputil::DyadicFloat<128>; using Sign = fputil::Sign; -using LIBC_NAMESPACE::operator""_u128; // log2(10) constexpr double LOG2_10 = 0x1.a934f0979a371p+1; @@ -101,15 +99,17 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // For |dx| < 2^-14: // | output - 10^dx | < 1.5 * 2^-124. Float128 poly_approx_f128(const Float128 &dx) { + using MType = typename Float128::MantissaType; + constexpr Float128 COEFFS_128[]{ - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 - {Sign::POS, -126, 0x935d8ddd'aaa8ac16'ea56d62b'82d30a2d_u128}, - {Sign::POS, -126, 0xa9a92639'e753443a'80a99ce7'5f4d5bdb_u128}, - {Sign::POS, -126, 0x82382c8e'f1652304'6a4f9d7d'bf6c9635_u128}, - {Sign::POS, -124, 0x12bd7609'fd98c44c'34578701'9216c7af_u128}, - {Sign::POS, -127, 0x450a7ff4'7535d889'cc41ed7e'0d27aee5_u128}, - {Sign::POS, -130, 0xd3f6b844'702d636b'8326bb91'a6e7601d_u128}, - {Sign::POS, -130, 0x45b937f0'd05bb1cd'fa7b46df'314112a9_u128}, + {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 + {Sign::POS, -126, MType({0xea56d62b82d30a2d, 0x935d8dddaaa8ac16})}, + {Sign::POS, -126, MType({0x80a99ce75f4d5bdb, 0xa9a92639e753443a})}, + {Sign::POS, -126, MType({0x6a4f9d7dbf6c9635, 0x82382c8ef1652304})}, + {Sign::POS, -124, MType({0x345787019216c7af, 0x12bd7609fd98c44c})}, + {Sign::POS, -127, MType({0xcc41ed7e0d27aee5, 0x450a7ff47535d889})}, + {Sign::POS, -130, MType({0x8326bb91a6e7601d, 0xd3f6b844702d636b})}, + {Sign::POS, -130, MType({0xfa7b46df314112a9, 0x45b937f0d05bb1cd})}, }; Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2], diff --git a/libc/src/math/generic/exp2.cpp b/libc/src/math/generic/exp2.cpp index 83f545eb116bd3..01e66d1ae00f70 100644 --- a/libc/src/math/generic/exp2.cpp +++ b/libc/src/math/generic/exp2.cpp @@ -21,7 +21,6 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" -#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -32,7 +31,6 @@ using fputil::DoubleDouble; using fputil::TripleDouble; using Float128 = typename fputil::DyadicFloat<128>; using Sign = fputil::Sign; -using LIBC_NAMESPACE::operator""_u128; // Error bounds: // Errors when using double precision. @@ -90,15 +88,17 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // For |dx| < 2^-13 + 2^-30: // | output - exp(dx) | < 2^-126. Float128 poly_approx_f128(const Float128 &dx) { + using MType = typename Float128::MantissaType; + constexpr Float128 COEFFS_128[]{ - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 - {Sign::POS, -128, 0xb17217f7'd1cf79ab'c9e3b398'03f2f6af_u128}, - {Sign::POS, -128, 0x3d7f7bff'058b1d50'de2d60dd'9c9a1d9f_u128}, - {Sign::POS, -132, 0xe35846b8'2505fc59'9d3b15d9'e7fb6897_u128}, - {Sign::POS, -134, 0x9d955b7d'd273b94e'184462f6'bcd2b9e7_u128}, - {Sign::POS, -137, 0xaec3ff3c'53398883'39ea1bb9'64c51a89_u128}, - {Sign::POS, -138, 0x2861225f'345c396a'842c5341'8fa8ae61_u128}, - {Sign::POS, -144, 0xffe5fe2d'109a319d'7abeb5ab'd5ad2079_u128}, + {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 + {Sign::POS, -128, MType({0xc9e3b39803f2f6af, 0xb17217f7d1cf79ab})}, + {Sign::POS, -128, MType({0xde2d60dd9c9a1d9f, 0x3d7f7bff058b1d50})}, + {Sign::POS, -132, MType({0x9d3b15d9e7fb6897, 0xe35846b82505fc59})}, + {Sign::POS, -134, MType({0x184462f6bcd2b9e7, 0x9d955b7dd273b94e})}, + {Sign::POS, -137, MType({0x39ea1bb964c51a89, 0xaec3ff3c53398883})}, + {Sign::POS, -138, MType({0x842c53418fa8ae61, 0x2861225f345c396a})}, + {Sign::POS, -144, MType({0x7abeb5abd5ad2079, 0xffe5fe2d109a319d})}, }; Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2], diff --git a/libc/src/math/generic/expm1.cpp b/libc/src/math/generic/expm1.cpp index 9f14a8c2068ec1..c1fb80309d7b46 100644 --- a/libc/src/math/generic/expm1.cpp +++ b/libc/src/math/generic/expm1.cpp @@ -22,7 +22,6 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/FPUtil/triple_double.h" #include "src/__support/common.h" -#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -40,7 +39,6 @@ using fputil::DoubleDouble; using fputil::TripleDouble; using Float128 = typename fputil::DyadicFloat<128>; using Sign = fputil::Sign; -using LIBC_NAMESPACE::operator""_u128; // log2(e) constexpr double LOG2_E = 0x1.71547652b82fep+0; @@ -109,14 +107,20 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // For |dx| < 2^-13 + 2^-30: // | output - exp(dx) | < 2^-126. Float128 poly_approx_f128(const Float128 &dx) { + using MType = typename Float128::MantissaType; + constexpr Float128 COEFFS_128[]{ - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 - {Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5 - {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/6 - {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/24 - {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/120 - {Sign::POS, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/720 - {Sign::POS, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/5040 + {Sign::POS, -127, MType({0, 0x8000000000000000})}, // 1.0 + {Sign::POS, -128, MType({0, 0x8000000000000000})}, // 0.5 + {Sign::POS, -130, MType({0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaaa})}, // 1/6 + {Sign::POS, -132, + MType({0xaaaaaaaaaaaaaaab, 0xaaaaaaaaaaaaaaaa})}, // 1/24 + {Sign::POS, -134, + MType({0x8888888888888889, 0x8888888888888888})}, // 1/120 + {Sign::POS, -137, + MType({0x60b60b60b60b60b6, 0xb60b60b60b60b60b})}, // 1/720 + {Sign::POS, -140, + MType({0x00d00d00d00d00d0, 0xd00d00d00d00d00d})}, // 1/5040 }; Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2], @@ -142,6 +146,7 @@ std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) { // TODO(lntue): investigate triple-double precision implementation for this // step. Float128 expm1_f128(double x, double kd, int idx1, int idx2) { + using MType = typename Float128::MantissaType; // Recalculate dx: double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact @@ -165,8 +170,7 @@ Float128 expm1_f128(double x, double kd, int idx1, int idx2) { Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2); int hi = static_cast(kd) >> 12; - Float128 minus_one{Sign::NEG, -127 - hi, - 0x80000000'00000000'00000000'00000000_u128}; + Float128 minus_one{Sign::NEG, -127 - hi, MType({0, 0x8000000000000000})}; Float128 exp_mid_m1 = fputil::quick_add(exp_mid, minus_one); diff --git a/libc/src/math/generic/log.cpp b/libc/src/math/generic/log.cpp index d770112599a98f..9edc52b8a8e24c 100644 --- a/libc/src/math/generic/log.cpp +++ b/libc/src/math/generic/log.cpp @@ -14,7 +14,6 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" -#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "common_constants.h" @@ -24,8 +23,8 @@ namespace LIBC_NAMESPACE { // 128-bit precision dyadic floating point numbers. using Float128 = typename fputil::DyadicFloat<128>; +using MType = typename Float128::MantissaType; using Sign = fputil::Sign; -using LIBC_NAMESPACE::operator""_u128; namespace { @@ -35,152 +34,150 @@ constexpr double HI_ERR = 0x1.0p-85; // Extra errors from P is from using x^2 to reduce evaluation latency. constexpr double P_ERR = 0x1.0p-50; -// log(2) with 128-bit precision generated by SageMath with: -// def format_hex(value): -// l = hex(value)[2:] -// n = 8 -// x = [l[i:i + n] for i in range(0, len(l), n)] -// return "0x" + "'".join(x) + "_uint128" -// (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent(); -// print(format_hex(m)); +// log(2) with 128-bit prepcision generated by SageMath with: +// sage: (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent(); +// sage: print("MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})"); const Float128 LOG_2(Sign::POS, /*exponent=*/-128, /*mantissa=*/ - 0xb17217f7'd1cf79ab'c9e3b398'03f2f6af_u128); + MType({0xc9e3b39803f2f6af, 0xb17217f7d1cf79ab})); alignas(64) const LogRR LOG_TABLE = { // -log(r) with 128-bit precision generated by SageMath with: + // // for i in range(128): // r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^(-7)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); - // print("{Sign::POS,", e, ", format_hex(m), "},"); + // print("{Sign::POS,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) + // % 2^64), + // "})},"); /* .step_1= */ { - {Sign::POS, 0, 0_u128}, - {Sign::POS, -134, 0x8080abac'46f38946'662d417c'ed007a46_u128}, - {Sign::POS, -133, 0x8102b2c4'9ac23a4f'91d082dc'e3ddcd38_u128}, - {Sign::POS, -133, 0xc2492946'4655f45c'da5f3cc0'b3251dbd_u128}, - {Sign::POS, -132, 0x820aec4f'3a222380'b9e3aea6'c444ef07_u128}, - {Sign::POS, -132, 0xa33576a1'6f1f4c64'521016bd'904dc968_u128}, - {Sign::POS, -132, 0xc4a550a4'fd9a19a8'be97660a'23cc540d_u128}, - {Sign::POS, -132, 0xe65b9e6e'ed965c36'e09f5fe2'058d6006_u128}, - {Sign::POS, -131, 0x842cc5ac'f1d03445'1fecdfa8'19b96098_u128}, - {Sign::POS, -131, 0x8cb9de8a'32ab368a'a7c98595'30a45153_u128}, - {Sign::POS, -131, 0x9defad3e'8f73217a'976d3b5b'45f6ca0b_u128}, - {Sign::POS, -131, 0xaf4ad26c'bc8e5be7'0e8b8b88'a14ff0ce_u128}, - {Sign::POS, -131, 0xb8069857'560707a3'6a677b4c'8bec22e1_u128}, - {Sign::POS, -131, 0xc99af2ea'ca4c4570'eaf51f66'692844ba_u128}, - {Sign::POS, -131, 0xdb56446d'6ad8deff'a8112e35'a60e6375_u128}, - {Sign::POS, -131, 0xe442c00d'e2591b47'196ab34c'e0bccd12_u128}, - {Sign::POS, -131, 0xf639cc18'5088fe5d'4066e87f'2c0f7340_u128}, - {Sign::POS, -131, 0xff4489ce'deab2ca6'c17bd40d'8d9291ec_u128}, - {Sign::POS, -130, 0x88bc7411'3f23def1'9c5a0fe3'96f40f1e_u128}, - {Sign::POS, -130, 0x8d515bf1'1fb94f1c'88713268'840cbcc0_u128}, - {Sign::POS, -130, 0x968b0864'3409ceb6'65c0da50'6a088484_u128}, - {Sign::POS, -130, 0x9b2fe580'ac80b17d'411a5b94'4aca8708_u128}, - {Sign::POS, -130, 0xa489ec19'9dab06f2'a9fb6cf0'ecb411b7_u128}, - {Sign::POS, -130, 0xa93f2f25'0dac67d1'cad2fb8d'48054ae0_u128}, - {Sign::POS, -130, 0xb2ba75f4'6099cf8b'2c3c2e77'904afa78_u128}, - {Sign::POS, -130, 0xb780945b'ab55dce4'34c7bc3d'32750fde_u128}, - {Sign::POS, -130, 0xc11e0b2a'8d1e0ddb'9a631e83'0fd30904_u128}, - {Sign::POS, -130, 0xc5f57f59'c7f46155'aa8b6997'a402bf30_u128}, - {Sign::POS, -130, 0xcad2d6e7'b80bf914'2c507fb7'a3d0bf6a_u128}, - {Sign::POS, -130, 0xd49f69e4'56cf1b79'5f53bd2e'406e66e7_u128}, - {Sign::POS, -130, 0xd98ec2ba'de71e539'58a98f2a'd65bee9b_u128}, - {Sign::POS, -130, 0xde8439c1'dec56877'4d57da94'5b5d0aaa_u128}, - {Sign::POS, -130, 0xe881bf93'2af3dac0'c524848e'3443e040_u128}, - {Sign::POS, -130, 0xed89ed86'a44a01aa'11d49f96'cb88317b_u128}, - {Sign::POS, -130, 0xf29877ff'38809091'3b020fa1'820c9492_u128}, - {Sign::POS, -130, 0xf7ad6f26'e7ff2ef7'54d2238f'75f969b1_u128}, - {Sign::POS, -130, 0xfcc8e365'9d9bcbec'ca0cdf30'1431b60f_u128}, - {Sign::POS, -129, 0x8389c302'6ac3139b'62dda9d2'270fa1f4_u128}, - {Sign::POS, -129, 0x86216b3b'0b17188b'163ceae8'8f720f1e_u128}, - {Sign::POS, -129, 0x88bc7411'3f23def1'9c5a0fe3'96f40f1e_u128}, - {Sign::POS, -129, 0x8b5ae65d'67db9acd'f7a51681'26a58b9a_u128}, - {Sign::POS, -129, 0x8dfccb1a'd35ca6ed'5147bdb6'ddcaf59c_u128}, - {Sign::POS, -129, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, - {Sign::POS, -129, 0x95f783e6'e49a9cfa'4a5004f3'ef063313_u128}, - {Sign::POS, -129, 0x98a78f0e'9ae71d85'2cdec347'84707839_u128}, - {Sign::POS, -129, 0x9b5b3bb5'f088b766'd878bbe3'd392be25_u128}, - {Sign::POS, -129, 0x9e1293b9'998c1daa'5b035eae'273a855f_u128}, - {Sign::POS, -129, 0xa0cda11e'af46390d'bb243827'3918db7e_u128}, - {Sign::POS, -129, 0xa38c6e13'8e20d831'f698298a'dddd7f32_u128}, - {Sign::POS, -129, 0xa64f04f0'b961df76'e4f5275c'2d15c21f_u128}, - {Sign::POS, -129, 0xa9157039'c51ebe70'8164c759'686a2209_u128}, - {Sign::POS, -129, 0xabdfba9e'468fd6f6'f72ea077'49ce6bd3_u128}, - {Sign::POS, -129, 0xaeadeefa'caf97d35'7dd6e688'ebb13b03_u128}, - {Sign::POS, -129, 0xb1801859'd56249dc'18ce51ff'f99479cd_u128}, - {Sign::POS, -129, 0xb45641f4'e350a0d3'2756eba0'0bc33978_u128}, - {Sign::POS, -129, 0xb7307735'78cb90b2'be1116c3'466beb6d_u128}, - {Sign::POS, -129, 0xba0ec3b6'33dd8b09'49dc60b2'b059a60b_u128}, - {Sign::POS, -129, 0xbcf13343'e7d9ec7d'2efd1778'1bb3afec_u128}, - {Sign::POS, -129, 0xbfd7d1de'c0a8df6f'37eda996'244bccb0_u128}, - {Sign::POS, -129, 0xc2c2abbb'6e5fd56f'33337789'd592e296_u128}, - {Sign::POS, -129, 0xc5b1cd44'596fa51e'1a18fb8f'9f9ef280_u128}, - {Sign::POS, -129, 0xc8a5431a'dfb44ca5'688ce7c1'a75e341a_u128}, - {Sign::POS, -129, 0xcb9d1a18'9ab56e76'2d7e9307'c70c0668_u128}, - {Sign::POS, -129, 0xce995f50'af69d861'ef2f3f4f'861ad6a9_u128}, - {Sign::POS, -129, 0xd19a2011'27d3c645'7f9d79f5'1dcc7301_u128}, - {Sign::POS, -129, 0xd19a2011'27d3c645'7f9d79f5'1dcc7301_u128}, - {Sign::POS, -129, 0xd49f69e4'56cf1b79'5f53bd2e'406e66e7_u128}, - {Sign::POS, -129, 0xd7a94a92'466e833a'ad88bba7'd0cee8e0_u128}, - {Sign::POS, -129, 0xdab7d022'31484a92'96c20cca'6efe2ac5_u128}, - {Sign::POS, -129, 0xddcb08dc'0717d85b'f40a666c'87842843_u128}, - {Sign::POS, -129, 0xe0e30349'fd1cec80'7fe8e180'2aba24d6_u128}, - {Sign::POS, -129, 0xe0e30349'fd1cec80'7fe8e180'2aba24d6_u128}, - {Sign::POS, -129, 0xe3ffce3a'2aa64922'3eadb651'b49ac53a_u128}, - {Sign::POS, -129, 0xe72178c0'323a1a0f'304e1653'e71d9973_u128}, - {Sign::POS, -129, 0xea481236'f7d35baf'e9a767a8'0d6d97e8_u128}, - {Sign::POS, -129, 0xed73aa42'64b0ade9'4f91cf4b'33e42998_u128}, - {Sign::POS, -129, 0xf0a450d1'39366ca6'fc66eb64'08ff6433_u128}, - {Sign::POS, -129, 0xf0a450d1'39366ca6'fc66eb64'08ff6433_u128}, - {Sign::POS, -129, 0xf3da161e'ed6b9aaf'ac8d42f7'8d3e65d3_u128}, - {Sign::POS, -129, 0xf7150ab5'a09f27f4'5a470250'd40ebe90_u128}, - {Sign::POS, -129, 0xfa553f70'18c966f2'b780a545'a1b54dcf_u128}, - {Sign::POS, -129, 0xfa553f70'18c966f2'b780a545'a1b54dcf_u128}, - {Sign::POS, -129, 0xfd9ac57b'd244217e'8f05924d'258c14c5_u128}, - {Sign::POS, -128, 0x8072d72d'903d588b'89d1b09c'70c4010a_u128}, - {Sign::POS, -128, 0x821b05f3'b01d6774'030d58c3'f7e2ea1f_u128}, - {Sign::POS, -128, 0x821b05f3'b01d6774'030d58c3'f7e2ea1f_u128}, - {Sign::POS, -128, 0x83c5f829'9e2b4091'20f6fafe'8fbb68b9_u128}, - {Sign::POS, -128, 0x8573b716'82a7d21a'e21f9f89'c1ab80b2_u128}, - {Sign::POS, -128, 0x8573b716'82a7d21a'e21f9f89'c1ab80b2_u128}, - {Sign::POS, -128, 0x87244c30'8e670a66'01e005d0'6dbfa8f8_u128}, - {Sign::POS, -128, 0x88d7c11e'3ad53cdc'223111a7'07b6de2c_u128}, - {Sign::POS, -128, 0x88d7c11e'3ad53cdc'223111a7'07b6de2c_u128}, - {Sign::POS, -128, 0x8a8e1fb7'94b09134'2eb628db'a173c82d_u128}, - {Sign::POS, -128, 0x8c477207'91e53313'be2ad194'15fe25a5_u128}, - {Sign::POS, -128, 0x8c477207'91e53313'be2ad194'15fe25a5_u128}, - {Sign::POS, -128, 0x8e03c24d'73003959'bddae1cc'ce247838_u128}, - {Sign::POS, -128, 0x8fc31afe'30b2c6de'9b00bf16'7e95da67_u128}, - {Sign::POS, -128, 0x8fc31afe'30b2c6de'9b00bf16'7e95da67_u128}, - {Sign::POS, -128, 0x918586c5'f5e4bf01'9b92199e'd1a4bab1_u128}, - {Sign::POS, -128, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, - {Sign::POS, -128, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, - {Sign::POS, -128, 0x9513c368'76083695'f3cbc416'a2418012_u128}, - {Sign::POS, -128, 0x96dfaabd'86fa1646'be1188fb'c94e2f15_u128}, - {Sign::POS, -128, 0x96dfaabd'86fa1646'be1188fb'c94e2f15_u128}, - {Sign::POS, -128, 0x98aed221'a03458b6'1d2f8932'1647b358_u128}, - {Sign::POS, -128, 0x98aed221'a03458b6'1d2f8932'1647b358_u128}, - {Sign::POS, -128, 0x9a81456c'ec642e0f'e549f9aa'ea3cb5e1_u128}, - {Sign::POS, -128, 0x9c5710b8'cbb73a42'a2554b2d'd4619e63_u128}, - {Sign::POS, -128, 0x9c5710b8'cbb73a42'a2554b2d'd4619e63_u128}, - {Sign::POS, -128, 0x9e304061'b5fda919'30603d87'b6df81ad_u128}, - {Sign::POS, -128, 0x9e304061'b5fda919'30603d87'b6df81ad_u128}, - {Sign::POS, -128, 0xa00ce109'2e5498c3'67879c5a'30cd1242_u128}, - {Sign::POS, -128, 0xa1ecff97'c91e267b'0b7efae0'8e597e16_u128}, - {Sign::POS, -128, 0xa1ecff97'c91e267b'0b7efae0'8e597e16_u128}, - {Sign::POS, -128, 0xa3d0a93f'45169a4a'83594fab'088c0d65_u128}, - {Sign::POS, -128, 0xa3d0a93f'45169a4a'83594fab'088c0d65_u128}, - {Sign::POS, -128, 0xa5b7eb7c'b860fb88'af6a62a0'dec6e073_u128}, - {Sign::POS, -128, 0xa5b7eb7c'b860fb88'af6a62a0'dec6e073_u128}, - {Sign::POS, -128, 0xa7a2d41a'd270c9d7'49362382'a768847a_u128}, - {Sign::POS, -128, 0xa7a2d41a'd270c9d7'49362382'a768847a_u128}, - {Sign::POS, -128, 0xa9917134'33c2b998'8ba4aea6'14d05701_u128}, - {Sign::POS, -128, 0xa9917134'33c2b998'8ba4aea6'14d05701_u128}, - {Sign::POS, -128, 0xab83d135'dc633301'7fe6607b'a902ef3c_u128}, - {Sign::POS, -128, 0xab83d135'dc633301'7fe6607b'a902ef3c_u128}, - {Sign::POS, -128, 0xad7a02e1'b24efd31'd60864fd'949b4bd3_u128}, - {Sign::POS, -128, 0xad7a02e1'b24efd31'd60864fd'949b4bd3_u128}, - {Sign::POS, -128, 0xaf741551'20c9011c'066d235e'e63073dd_u128}, - {Sign::POS, 0, 0_u128}, + {Sign::POS, 0, MType(0)}, + {Sign::POS, -134, MType({0x662d417ced007a46, 0x8080abac46f38946})}, + {Sign::POS, -133, MType({0x91d082dce3ddcd38, 0x8102b2c49ac23a4f})}, + {Sign::POS, -133, MType({0xda5f3cc0b3251dbd, 0xc24929464655f45c})}, + {Sign::POS, -132, MType({0xb9e3aea6c444ef07, 0x820aec4f3a222380})}, + {Sign::POS, -132, MType({0x521016bd904dc968, 0xa33576a16f1f4c64})}, + {Sign::POS, -132, MType({0xbe97660a23cc540d, 0xc4a550a4fd9a19a8})}, + {Sign::POS, -132, MType({0xe09f5fe2058d6006, 0xe65b9e6eed965c36})}, + {Sign::POS, -131, MType({0x1fecdfa819b96098, 0x842cc5acf1d03445})}, + {Sign::POS, -131, MType({0xa7c9859530a45153, 0x8cb9de8a32ab368a})}, + {Sign::POS, -131, MType({0x976d3b5b45f6ca0b, 0x9defad3e8f73217a})}, + {Sign::POS, -131, MType({0xe8b8b88a14ff0ce, 0xaf4ad26cbc8e5be7})}, + {Sign::POS, -131, MType({0x6a677b4c8bec22e1, 0xb8069857560707a3})}, + {Sign::POS, -131, MType({0xeaf51f66692844ba, 0xc99af2eaca4c4570})}, + {Sign::POS, -131, MType({0xa8112e35a60e6375, 0xdb56446d6ad8deff})}, + {Sign::POS, -131, MType({0x196ab34ce0bccd12, 0xe442c00de2591b47})}, + {Sign::POS, -131, MType({0x4066e87f2c0f7340, 0xf639cc185088fe5d})}, + {Sign::POS, -131, MType({0xc17bd40d8d9291ec, 0xff4489cedeab2ca6})}, + {Sign::POS, -130, MType({0x9c5a0fe396f40f1e, 0x88bc74113f23def1})}, + {Sign::POS, -130, MType({0x88713268840cbcc0, 0x8d515bf11fb94f1c})}, + {Sign::POS, -130, MType({0x65c0da506a088484, 0x968b08643409ceb6})}, + {Sign::POS, -130, MType({0x411a5b944aca8708, 0x9b2fe580ac80b17d})}, + {Sign::POS, -130, MType({0xa9fb6cf0ecb411b7, 0xa489ec199dab06f2})}, + {Sign::POS, -130, MType({0xcad2fb8d48054ae0, 0xa93f2f250dac67d1})}, + {Sign::POS, -130, MType({0x2c3c2e77904afa78, 0xb2ba75f46099cf8b})}, + {Sign::POS, -130, MType({0x34c7bc3d32750fde, 0xb780945bab55dce4})}, + {Sign::POS, -130, MType({0x9a631e830fd30904, 0xc11e0b2a8d1e0ddb})}, + {Sign::POS, -130, MType({0xaa8b6997a402bf30, 0xc5f57f59c7f46155})}, + {Sign::POS, -130, MType({0x2c507fb7a3d0bf6a, 0xcad2d6e7b80bf914})}, + {Sign::POS, -130, MType({0x5f53bd2e406e66e7, 0xd49f69e456cf1b79})}, + {Sign::POS, -130, MType({0x58a98f2ad65bee9b, 0xd98ec2bade71e539})}, + {Sign::POS, -130, MType({0x4d57da945b5d0aaa, 0xde8439c1dec56877})}, + {Sign::POS, -130, MType({0xc524848e3443e040, 0xe881bf932af3dac0})}, + {Sign::POS, -130, MType({0x11d49f96cb88317b, 0xed89ed86a44a01aa})}, + {Sign::POS, -130, MType({0x3b020fa1820c9492, 0xf29877ff38809091})}, + {Sign::POS, -130, MType({0x54d2238f75f969b1, 0xf7ad6f26e7ff2ef7})}, + {Sign::POS, -130, MType({0xca0cdf301431b60f, 0xfcc8e3659d9bcbec})}, + {Sign::POS, -129, MType({0x62dda9d2270fa1f4, 0x8389c3026ac3139b})}, + {Sign::POS, -129, MType({0x163ceae88f720f1e, 0x86216b3b0b17188b})}, + {Sign::POS, -129, MType({0x9c5a0fe396f40f1e, 0x88bc74113f23def1})}, + {Sign::POS, -129, MType({0xf7a5168126a58b9a, 0x8b5ae65d67db9acd})}, + {Sign::POS, -129, MType({0x5147bdb6ddcaf59c, 0x8dfccb1ad35ca6ed})}, + {Sign::POS, -129, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, + {Sign::POS, -129, MType({0x4a5004f3ef063313, 0x95f783e6e49a9cfa})}, + {Sign::POS, -129, MType({0x2cdec34784707839, 0x98a78f0e9ae71d85})}, + {Sign::POS, -129, MType({0xd878bbe3d392be25, 0x9b5b3bb5f088b766})}, + {Sign::POS, -129, MType({0x5b035eae273a855f, 0x9e1293b9998c1daa})}, + {Sign::POS, -129, MType({0xbb2438273918db7e, 0xa0cda11eaf46390d})}, + {Sign::POS, -129, MType({0xf698298adddd7f32, 0xa38c6e138e20d831})}, + {Sign::POS, -129, MType({0xe4f5275c2d15c21f, 0xa64f04f0b961df76})}, + {Sign::POS, -129, MType({0x8164c759686a2209, 0xa9157039c51ebe70})}, + {Sign::POS, -129, MType({0xf72ea07749ce6bd3, 0xabdfba9e468fd6f6})}, + {Sign::POS, -129, MType({0x7dd6e688ebb13b03, 0xaeadeefacaf97d35})}, + {Sign::POS, -129, MType({0x18ce51fff99479cd, 0xb1801859d56249dc})}, + {Sign::POS, -129, MType({0x2756eba00bc33978, 0xb45641f4e350a0d3})}, + {Sign::POS, -129, MType({0xbe1116c3466beb6d, 0xb730773578cb90b2})}, + {Sign::POS, -129, MType({0x49dc60b2b059a60b, 0xba0ec3b633dd8b09})}, + {Sign::POS, -129, MType({0x2efd17781bb3afec, 0xbcf13343e7d9ec7d})}, + {Sign::POS, -129, MType({0x37eda996244bccb0, 0xbfd7d1dec0a8df6f})}, + {Sign::POS, -129, MType({0x33337789d592e296, 0xc2c2abbb6e5fd56f})}, + {Sign::POS, -129, MType({0x1a18fb8f9f9ef280, 0xc5b1cd44596fa51e})}, + {Sign::POS, -129, MType({0x688ce7c1a75e341a, 0xc8a5431adfb44ca5})}, + {Sign::POS, -129, MType({0x2d7e9307c70c0668, 0xcb9d1a189ab56e76})}, + {Sign::POS, -129, MType({0xef2f3f4f861ad6a9, 0xce995f50af69d861})}, + {Sign::POS, -129, MType({0x7f9d79f51dcc7301, 0xd19a201127d3c645})}, + {Sign::POS, -129, MType({0x7f9d79f51dcc7301, 0xd19a201127d3c645})}, + {Sign::POS, -129, MType({0x5f53bd2e406e66e7, 0xd49f69e456cf1b79})}, + {Sign::POS, -129, MType({0xad88bba7d0cee8e0, 0xd7a94a92466e833a})}, + {Sign::POS, -129, MType({0x96c20cca6efe2ac5, 0xdab7d02231484a92})}, + {Sign::POS, -129, MType({0xf40a666c87842843, 0xddcb08dc0717d85b})}, + {Sign::POS, -129, MType({0x7fe8e1802aba24d6, 0xe0e30349fd1cec80})}, + {Sign::POS, -129, MType({0x7fe8e1802aba24d6, 0xe0e30349fd1cec80})}, + {Sign::POS, -129, MType({0x3eadb651b49ac53a, 0xe3ffce3a2aa64922})}, + {Sign::POS, -129, MType({0x304e1653e71d9973, 0xe72178c0323a1a0f})}, + {Sign::POS, -129, MType({0xe9a767a80d6d97e8, 0xea481236f7d35baf})}, + {Sign::POS, -129, MType({0x4f91cf4b33e42998, 0xed73aa4264b0ade9})}, + {Sign::POS, -129, MType({0xfc66eb6408ff6433, 0xf0a450d139366ca6})}, + {Sign::POS, -129, MType({0xfc66eb6408ff6433, 0xf0a450d139366ca6})}, + {Sign::POS, -129, MType({0xac8d42f78d3e65d3, 0xf3da161eed6b9aaf})}, + {Sign::POS, -129, MType({0x5a470250d40ebe90, 0xf7150ab5a09f27f4})}, + {Sign::POS, -129, MType({0xb780a545a1b54dcf, 0xfa553f7018c966f2})}, + {Sign::POS, -129, MType({0xb780a545a1b54dcf, 0xfa553f7018c966f2})}, + {Sign::POS, -129, MType({0x8f05924d258c14c5, 0xfd9ac57bd244217e})}, + {Sign::POS, -128, MType({0x89d1b09c70c4010a, 0x8072d72d903d588b})}, + {Sign::POS, -128, MType({0x30d58c3f7e2ea1f, 0x821b05f3b01d6774})}, + {Sign::POS, -128, MType({0x30d58c3f7e2ea1f, 0x821b05f3b01d6774})}, + {Sign::POS, -128, MType({0x20f6fafe8fbb68b9, 0x83c5f8299e2b4091})}, + {Sign::POS, -128, MType({0xe21f9f89c1ab80b2, 0x8573b71682a7d21a})}, + {Sign::POS, -128, MType({0xe21f9f89c1ab80b2, 0x8573b71682a7d21a})}, + {Sign::POS, -128, MType({0x1e005d06dbfa8f8, 0x87244c308e670a66})}, + {Sign::POS, -128, MType({0x223111a707b6de2c, 0x88d7c11e3ad53cdc})}, + {Sign::POS, -128, MType({0x223111a707b6de2c, 0x88d7c11e3ad53cdc})}, + {Sign::POS, -128, MType({0x2eb628dba173c82d, 0x8a8e1fb794b09134})}, + {Sign::POS, -128, MType({0xbe2ad19415fe25a5, 0x8c47720791e53313})}, + {Sign::POS, -128, MType({0xbe2ad19415fe25a5, 0x8c47720791e53313})}, + {Sign::POS, -128, MType({0xbddae1ccce247838, 0x8e03c24d73003959})}, + {Sign::POS, -128, MType({0x9b00bf167e95da67, 0x8fc31afe30b2c6de})}, + {Sign::POS, -128, MType({0x9b00bf167e95da67, 0x8fc31afe30b2c6de})}, + {Sign::POS, -128, MType({0x9b92199ed1a4bab1, 0x918586c5f5e4bf01})}, + {Sign::POS, -128, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, + {Sign::POS, -128, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, + {Sign::POS, -128, MType({0xf3cbc416a2418012, 0x9513c36876083695})}, + {Sign::POS, -128, MType({0xbe1188fbc94e2f15, 0x96dfaabd86fa1646})}, + {Sign::POS, -128, MType({0xbe1188fbc94e2f15, 0x96dfaabd86fa1646})}, + {Sign::POS, -128, MType({0x1d2f89321647b358, 0x98aed221a03458b6})}, + {Sign::POS, -128, MType({0x1d2f89321647b358, 0x98aed221a03458b6})}, + {Sign::POS, -128, MType({0xe549f9aaea3cb5e1, 0x9a81456cec642e0f})}, + {Sign::POS, -128, MType({0xa2554b2dd4619e63, 0x9c5710b8cbb73a42})}, + {Sign::POS, -128, MType({0xa2554b2dd4619e63, 0x9c5710b8cbb73a42})}, + {Sign::POS, -128, MType({0x30603d87b6df81ad, 0x9e304061b5fda919})}, + {Sign::POS, -128, MType({0x30603d87b6df81ad, 0x9e304061b5fda919})}, + {Sign::POS, -128, MType({0x67879c5a30cd1242, 0xa00ce1092e5498c3})}, + {Sign::POS, -128, MType({0xb7efae08e597e16, 0xa1ecff97c91e267b})}, + {Sign::POS, -128, MType({0xb7efae08e597e16, 0xa1ecff97c91e267b})}, + {Sign::POS, -128, MType({0x83594fab088c0d65, 0xa3d0a93f45169a4a})}, + {Sign::POS, -128, MType({0x83594fab088c0d65, 0xa3d0a93f45169a4a})}, + {Sign::POS, -128, MType({0xaf6a62a0dec6e073, 0xa5b7eb7cb860fb88})}, + {Sign::POS, -128, MType({0xaf6a62a0dec6e073, 0xa5b7eb7cb860fb88})}, + {Sign::POS, -128, MType({0x49362382a768847a, 0xa7a2d41ad270c9d7})}, + {Sign::POS, -128, MType({0x49362382a768847a, 0xa7a2d41ad270c9d7})}, + {Sign::POS, -128, MType({0x8ba4aea614d05701, 0xa991713433c2b998})}, + {Sign::POS, -128, MType({0x8ba4aea614d05701, 0xa991713433c2b998})}, + {Sign::POS, -128, MType({0x7fe6607ba902ef3c, 0xab83d135dc633301})}, + {Sign::POS, -128, MType({0x7fe6607ba902ef3c, 0xab83d135dc633301})}, + {Sign::POS, -128, MType({0xd60864fd949b4bd3, 0xad7a02e1b24efd31})}, + {Sign::POS, -128, MType({0xd60864fd949b4bd3, 0xad7a02e1b24efd31})}, + {Sign::POS, -128, MType({0x66d235ee63073dd, 0xaf74155120c9011c})}, + {Sign::POS, 0, MType(0)}, }, // -log(r) for the second step, generated by SageMath with: // @@ -188,202 +185,202 @@ alignas(64) const LogRR LOG_TABLE = { // r = 2^-16 * round( 2^16 / (1 + i*2^(-14)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if s == -1 else "{Sign::NEG,", e, ", - // format_hex(m), "},"); + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_2 = */ { - {Sign::NEG, -135, 0x803faaca'c419abf2'a1c6f3fc'242ef8d0_u128}, - {Sign::NEG, -136, 0xfc834da1'6f0d9f57'a225ebc0'2e6d9dd4_u128}, - {Sign::NEG, -136, 0xf88735cc'c7433381'c33f6ad3'40ae18a9_u128}, - {Sign::NEG, -136, 0xf48b0e17'1249b6bc'70b2a4d3'8a242244_u128}, - {Sign::NEG, -136, 0xf08ed67f'd190e280'1d548190'48b811b0_u128}, - {Sign::NEG, -136, 0xec928f06'86828706'aee59837'01d2a02b_u128}, - {Sign::NEG, -136, 0xe89637aa'b2828aed'40abb8ab'72afa2d2_u128}, - {Sign::NEG, -136, 0xe499d06b'd6eeead5'deb547a0'd4a26ef9_u128}, - {Sign::NEG, -136, 0xe09d5949'751fb909'39c5bdfb'cf6087a0_u128}, - {Sign::NEG, -136, 0xdca0d243'0e671d18'53ea9bf1'52de635f_u128}, - {Sign::NEG, -136, 0xd8a43b58'2411537e'25b82043'6f5f4352_u128}, - {Sign::NEG, -136, 0xd4a79488'3764ad41'3c2d13ea'1d0be058_u128}, - {Sign::NEG, -136, 0xd0aaddd2'c9a18f95'4f3cfa62'bcb3ce3a_u128}, - {Sign::NEG, -136, 0xccae1737'5c02737c'd0fff6cd'f14a86c7_u128}, - {Sign::NEG, -136, 0xc8b140b5'6fbbe56a'7587b5f0'453ac3d2_u128}, - {Sign::NEG, -136, 0xc4b45a4c'85fc84e2'b358ad16'dfd0d085_u128}, - {Sign::NEG, -136, 0xc0b763fc'1fed041d'3c86fdce'5dbe7314_u128}, - {Sign::NEG, -136, 0xbcba5dc3'beb027a6'70764e46'ac18a96d_u128}, - {Sign::NEG, -136, 0xb8bd47a2'e362c600'c63be62b'8f285882_u128}, - {Sign::NEG, -136, 0xb3c0d59a'244325a4'72e7b5a3'86e5e31b_u128}, - {Sign::NEG, -136, 0xafc39bac'66434f27'c3ea2cd9'3f316b34_u128}, - {Sign::NEG, -136, 0xabc651d4'91a7b438'1dfb11a7'cc892843_u128}, - {Sign::NEG, -136, 0xa7c8f812'2773f38d'fc679a28'e9d9f212_u128}, - {Sign::NEG, -136, 0xa3cb8e64'a8a5bbe6'e7bc977e'eec42254_u128}, - {Sign::NEG, -136, 0x9fce14cb'9634cba6'b20f215b'd3b58c61_u128}, - {Sign::NEG, -136, 0x9bd08b46'7112f078'abe28625'08d67a98_u128}, - {Sign::NEG, -136, 0x97d2f1d4'ba2c06f0'd1aacedc'efe9d377_u128}, - {Sign::NEG, -136, 0x93d54875'f265fa2c'f1eb25e7'7d05f58d_u128}, - {Sign::NEG, -136, 0x8fd78f29'9aa0c375'cbef6fac'33691e95_u128}, - {Sign::NEG, -136, 0x8bd9c5ef'33b669e0'27206404'62a0f8ad_u128}, - {Sign::NEG, -136, 0x87dbecc6'3e7b01ed'e2f17751'34c8da75_u128}, - {Sign::NEG, -136, 0x83de03ae'3bbcad2e'ff67e201'c8c50d67_u128}, - {Sign::NEG, -137, 0xffc0154d'588733c5'3c742a7c'76356396_u128}, - {Sign::NEG, -137, 0xf7c4035e'21a4052f'f90dd6b2'4aa686ec_u128}, - {Sign::NEG, -137, 0xefc7d18d'd4485b9e'ca47c52b'7d7ffce2_u128}, - {Sign::NEG, -137, 0xe7cb7fdb'71e0db36'3703617a'd3d8311f_u128}, - {Sign::NEG, -137, 0xdfcf0e45'fbce3e80'7e4cfbd8'30393b88_u128}, - {Sign::NEG, -137, 0xd7d27ccc'736555af'4f7a29cf'0fc2c38e_u128}, - {Sign::NEG, -137, 0xcfd5cb6d'd9ef05dd'7370ae83'f9e72748_u128}, - {Sign::NEG, -137, 0xc7d8fa29'30a84850'671486eb'4cd76f65_u128}, - {Sign::NEG, -137, 0xbfdc08fd'78c229b9'e6dbb624'f9739782_u128}, - {Sign::NEG, -137, 0xb7def7e9'b361c979'6b866e09'e57d9079_u128}, - {Sign::NEG, -137, 0xafe1c6ec'e1a058dd'97fa2fd0'c9dc723e_u128}, - {Sign::NEG, -137, 0xa7e47606'048b1a65'983e8089'7cf1e60f_u128}, - {Sign::NEG, -137, 0x9fe70534'1d236102'7199cd06'ae5d39b3_u128}, - {Sign::NEG, -137, 0x97e97476'2c5e8f58'43cd18a7'2a051a96_u128}, - {Sign::NEG, -137, 0x8febc3cb'332616ff'7b6d1248'c3e1fd40_u128}, - {Sign::NEG, -137, 0x87edf332'325777c5'f5572a88'14c703af_u128}, - {Sign::NEG, -138, 0xffe00554'55887de0'26828c92'649a3a39_u128}, - {Sign::NEG, -138, 0xefe3e464'3a640cf3'82c550bd'1216d82a_u128}, - {Sign::NEG, -138, 0xdfe78392'14b4e8ae'da6959f7'f0e01bf0_u128}, - {Sign::NEG, -138, 0xcfeae2db'e5d6736d'da93e2fa'85a8f214_u128}, - {Sign::NEG, -138, 0xbfee023f'af0c2480'b47505bf'a5a03b06_u128}, - {Sign::NEG, -138, 0xaff0e1bb'718186ad'b1475a51'80a43520_u128}, - {Sign::NEG, -138, 0x9ff3814d'2e4a36b2'a8740b91'c95df537_u128}, - {Sign::NEG, -138, 0x8ff5e0f2'e661e1c6'57d895d3'5921b59c_u128}, - {Sign::NEG, -139, 0xfff00155'35588833'3c56c598'c659c2a3_u128}, - {Sign::NEG, -139, 0xdff3c0e4'97ea4eb1'2ef8ec33'ed9d782a_u128}, - {Sign::NEG, -139, 0xbff7008f'f5e0c257'379eba7e'6465ff63_u128}, - {Sign::NEG, -139, 0x9ff9c053'5073a370'3f972b78'3fcab757_u128}, - {Sign::NEG, -140, 0xfff80055'51558885'de026e27'1ee0549d_u128}, - {Sign::NEG, -140, 0xbffb8023'febc0c25'eceb47ea'01f6c632_u128}, - {Sign::NEG, -141, 0xfffc0015'54d55888'7333c578'57e1ed52_u128}, - {Sign::NEG, -142, 0xfffe0005'55455588'87dde026'fa704374_u128}, - {Sign::NEG, 0, 0_u128}, - {Sign::POS, -141, 0x80010002'aab2aac4'44999abe'2fe2cc65_u128}, - {Sign::POS, -140, 0x8002000a'aaeaac44'4eef3815'81464ccb_u128}, - {Sign::POS, -140, 0xc0048024'01440c26'dfeb4850'85f6f454_u128}, - {Sign::POS, -139, 0x8004002a'acaac445'99abe3be'3a1c6e93_u128}, - {Sign::POS, -139, 0xa0064053'5a37a37a'6bc1e20e'ac8448b4_u128}, - {Sign::POS, -139, 0xc0090090'0a20c275'979eedc0'64c242fd_u128}, - {Sign::POS, -139, 0xe00c40e4'bd6e4efd'c72446cc'1bf728bd_u128}, - {Sign::POS, -138, 0x800800aa'baac446e'f381b821'bbb569e5_u128}, - {Sign::POS, -138, 0x900a20f3'19a3e273'569b26aa'a485ea5c_u128}, - {Sign::POS, -138, 0xa00c814d'7c6a37f8'2dcf56c8'3c80b028_u128}, - {Sign::POS, -138, 0xb00f21bb'e3e388ee'5f697682'84463b9b_u128}, - {Sign::POS, -138, 0xc0120240'510c284c'b48ea6c0'5e2773a1_u128}, - {Sign::POS, -138, 0xd01522dc'c4f87991'14d9d761'96d8043a_u128}, - {Sign::POS, -138, 0xe0188393'40d4f241'e016a611'a4415d72_u128}, - {Sign::POS, -138, 0xf01c2465'c5e61b6f'661e135f'49a47c40_u128}, - {Sign::POS, -137, 0x801002ab'2ac4499a'be6bf0fa'435e8383_u128}, - {Sign::POS, -137, 0x88121333'7898871e'9a31ba0c'bc030353_u128}, - {Sign::POS, -137, 0x901443cc'cd362c9f'54b57dfe'0c4c840f_u128}, - {Sign::POS, -137, 0x98169478'296fad41'7ad1e9c3'15328f7e_u128}, - {Sign::POS, -137, 0xa0190536'8e2389b3'1f3f686c'f3d6be22_u128}, - {Sign::POS, -137, 0xa81b9608'fc3c50ec'f105b66e'c4703ede_u128}, - {Sign::POS, -137, 0xb01e46f0'74b0a0f3'610848c6'8df4d233_u128}, - {Sign::POS, -137, 0xb82117ed'f8832797'd6aef30c'd312169a_u128}, - {Sign::POS, -137, 0xc0240902'88c2a339'f3ac3796'08053d9d_u128}, - {Sign::POS, -137, 0xc8271a2f'2689e388'e6e2acf8'f4d4c24a_u128}, - {Sign::POS, -137, 0xd02a4b74'd2ffca44'ce6ae474'd860359f_u128}, - {Sign::POS, -137, 0xd82d9cd4'8f574c00'28bb3cd9'f2a65fb5_u128}, - {Sign::POS, -137, 0xe0310e4f'5ccf70e1'54f30dbe'f38a8066_u128}, - {Sign::POS, -137, 0xe8349fe6'3cb35564'224a96f5'a7471c46_u128}, - {Sign::POS, -137, 0xf038519a'305a2b1b'6ea92059'1aa02e1b_u128}, - {Sign::POS, -137, 0xf83c236c'39273972'd462b637'56c87e80_u128}, - {Sign::POS, -136, 0x80200aae'ac44ef38'338f7760'5fe77f2a_u128}, - {Sign::POS, -136, 0x842213b7'47fec7bb'3ff51287'882500ed_u128}, - {Sign::POS, -136, 0x88242cd0'7084ed02'cc394b3e'f0ebeb12_u128}, - {Sign::POS, -136, 0x8c2655fa'a6a1323f'1ab9679b'55f78a6b_u128}, - {Sign::POS, -136, 0x90288f36'6b237771'7025697d'10af0436_u128}, - {Sign::POS, -136, 0x942ad884'3ee1a9cd'17e4b7ac'6c600cb4_u128}, - {Sign::POS, -136, 0x982d31e4'a2b7c418'7013925a'9a8da7f3_u128}, - {Sign::POS, -136, 0x9c2f9b58'1787cf0d'fd1a09c8'48e3950e_u128}, - {Sign::POS, -136, 0xa03214df'1e39e1bd'84dd2de6'e3d90a37_u128}, - {Sign::POS, -136, 0xa4349e7a'37bc21ed'318b2ddd'9d0a33b4_u128}, - {Sign::POS, -136, 0xa8373829'e502c47a'bc031e6f'5acfd4a8_u128}, - {Sign::POS, -136, 0xac39e1ee'a7080dbc'9dd91e52'c79fd070_u128}, - {Sign::POS, -136, 0xb03c9bc8'fecc51e3'4af78fa1'cb48a12d_u128}, - {Sign::POS, -136, 0xb43f65b9'6d55f55a'72de1d99'ce252efd_u128}, - {Sign::POS, -136, 0xb74187bc'8ccffa84'efb1dbe7'21934877_u128}, - {Sign::POS, -136, 0xbb446dd4'd9bca499'b4b080f2'30c87598_u128}, - {Sign::POS, -136, 0xbf476404'a05f88f2'da6a7cd1'9c7fa4f2_u128}, - {Sign::POS, -136, 0xc34a6a4c'61d5cc3c'df00e378'3b50ecfb_u128}, - {Sign::POS, -136, 0xc74d80ac'9f42a52d'da2e5e02'ab4e183c_u128}, - {Sign::POS, -136, 0xcb50a725'd9cf5ce6'ea5f6ee9'9d30c626_u128}, - {Sign::POS, -136, 0xcf53ddb8'92ab4f55'a96d5956'531d7d8b_u128}, - {Sign::POS, -136, 0xd3572465'4b0beb95'a8fc636e'b36afa75_u128}, - {Sign::POS, -136, 0xd75a7b2c'842cb451'f67e2b82'7bfc4421_u128}, - {Sign::POS, -136, 0xdb5de20e'bf4f4026'a6d8c817'516303e6_u128}, - {Sign::POS, -136, 0xdf61590c'7dbb3a02'69b36ae5'962e85f4_u128}, - {Sign::POS, -136, 0xe364e026'40be6188'24693eec'2a831cc3_u128}, - {Sign::POS, -136, 0xe768775c'89ac8b70'94a339d5'6a55ab4a_u128}, - {Sign::POS, -136, 0xeb6c1eaf'd9dfa1eb'fa9998fb'f9703bf4_u128}, - {Sign::POS, -136, 0xef6fd620'b2b7a503'cafdc272'27b71eaa_u128}, - {Sign::POS, -136, 0xf3739daf'959aaafc'688d4282'f6026aa3_u128}, - {Sign::POS, -136, 0xf777755d'03f4e0b6'e54e9e38'04464cdd_u128}, - {Sign::POS, -136, 0xfb7b5d29'7f388a12'cb78b383'f4b59dce_u128}, - {Sign::POS, -136, 0xff7f5515'88de024f'ee055fc5'15062c04_u128}, - {Sign::POS, -135, 0x81c1ae90'd131de38'207812b4'3382acdd_u128}, - {Sign::POS, -135, 0x83c3baa7'26a721cc'dc90c4c4'b61f3a87_u128}, - {Sign::POS, -135, 0x85c5cece'05941dbc'1a03f13f'b2c978b1_u128}, - {Sign::POS, -135, 0x87c7eb05'aec1304f'b36f282e'83a7dc36_u128}, - {Sign::POS, -135, 0x89ca0f4e'62f9c476'6ad14c3d'fa414391_u128}, - {Sign::POS, -135, 0x8bcc3ba8'630c51f4'e8dd4ea0'd48b88e5_u128}, - {Sign::POS, -135, 0x8dce7013'efca5d96'c02515af'e8caeb90_u128}, - {Sign::POS, -135, 0x8fd0ac91'4a08795f'741ceaf3'349f3cf1_u128}, - {Sign::POS, -135, 0x91d2f120'b29e44bb'83f7cd49'29d2c28c_u128}, - {Sign::POS, -135, 0x93d53dc2'6a666cb1'795d03eb'c2fd03fa_u128}, - {Sign::POS, -135, 0x95d79276'b23eac12'faf74f1d'1ad16acc_u128}, - {Sign::POS, -135, 0x97d9ef3d'cb07cbad'e2de134f'72fee429_u128}, - {Sign::POS, -135, 0x99dc5417'f5a5a27d'58d8dba6'cadac5d5_u128}, - {Sign::POS, -135, 0x9bdec105'72ff15da'f07d90bc'5aae40a4_u128}, - {Sign::POS, -135, 0x9d609804'6659ea6b'1deaf79d'9fc40374_u128}, - {Sign::POS, -135, 0x9f631314'50b07988'7ba63e67'69b81999_u128}, - {Sign::POS, -135, 0xa1659638'404d5f92'59ebfc93'35094e59_u128}, - {Sign::POS, -135, 0xa3682170'7622f97a'16aae012'b5026f71_u128}, - {Sign::POS, -135, 0xa56ab4bd'3326b378'ff5d4f2c'0e4b9cae_u128}, - {Sign::POS, -135, 0xa76d501e'b8510941'855838b5'119dcb28_u128}, - {Sign::POS, -135, 0xa96ff395'469d8630'75f70cbb'e9cf1603_u128}, - {Sign::POS, -135, 0xab729f21'1f0ac57e'36a53ad4'd5541cc9_u128}, - {Sign::POS, -135, 0xad7552c2'829a7270'04c5934e'c32d20d9_u128}, - {Sign::POS, -135, 0xaf780e79'b2514889'3977e89a'ec59bfa2_u128}, - {Sign::POS, -135, 0xb17ad246'ef3713bc'913d4e3d'c55c3e6e_u128}, - {Sign::POS, -135, 0xb37d9e2a'7a56b09d'777b52a9'e70d8bcc_u128}, - {Sign::POS, -135, 0xb5807224'94be0c91'55de916f'd30591de_u128}, - {Sign::POS, -135, 0xb7834e35'7f7e2600'e79cfb37'be2861e4_u128}, - {Sign::POS, -135, 0xb986325d'7bab0c89'90983104'd3805389_u128}, - {Sign::POS, -135, 0xbb891e9c'ca5be12e'b860504b'aa6f984d_u128}, - {Sign::POS, -135, 0xbd8c12f3'acaad68b'29178d6f'f5712b96_u128}, - {Sign::POS, -135, 0xbf8f0f62'63b53102'7236fa47'ba19a198_u128}, - {Sign::POS, -135, 0xc19213e9'309b46f2'4f34d64c'afcc50e3_u128}, - {Sign::POS, -135, 0xc3952088'548080e4'120cc62e'b0a8db3e_u128}, - {Sign::POS, -135, 0xc5983540'108b59be'11aa5084'779060e3_u128}, - {Sign::POS, -135, 0xc79b5210'a5e55ef5'1c35fd62'36c8dcf1_u128}, - {Sign::POS, -135, 0xc99e76fa'55bb30bd'ed4576a7'e4b878fe_u128}, - {Sign::POS, -135, 0xcb20d7fa'3a336081'6caf4bb8'fd2c1131_u128}, - {Sign::POS, -135, 0xcd240b10'753e78de'3f24a6cb'b09c654f_u128}, - {Sign::POS, -135, 0xcf274640'7e0ff09f'78bc003b'b81e40f3_u128}, - {Sign::POS, -135, 0xd12a898a'95dff002'56647301'edfd8e8b_u128}, - {Sign::POS, -135, 0xd32dd4ee'fde9b2ef'28fe1c4d'04ca4ed9_u128}, - {Sign::POS, -135, 0xd531286d'f76b892a'e1ea9ea6'cbf57379_u128}, - {Sign::POS, -135, 0xd7348407'c3a6d688'a3832028'141a5cc2_u128}, - {Sign::POS, -135, 0xd937e7bc'a3e0131b'557421dd'379d3ead_u128}, - {Sign::POS, -135, 0xdb3b538c'd95ecb67'3cff8e87'a99bcaf0_u128}, - {Sign::POS, -135, 0xdd3ec778'a56da093'99255ef3'4bd0801f_u128}, - {Sign::POS, -135, 0xdf424380'495a489c'42b33220'abfa15cd_u128}, - {Sign::POS, -135, 0xe145c7a4'06758e83'503b378f'aa97dbc0_u128}, - {Sign::POS, -135, 0xe34953e4'1e135282'bdf2ca00'6f59b544_u128}, - {Sign::POS, -135, 0xe54ce840'd18a8a3e'1979190a'f37ed16f_u128}, - {Sign::POS, -135, 0xe75084ba'623540f4'31863ff7'cf898c9c_u128}, - {Sign::POS, -135, 0xe9542951'117097b0'c983284f'60293647_u128}, - {Sign::POS, -135, 0xeb57d605'209cc57e'510a969e'be03f804_u128}, - {Sign::POS, -135, 0xed5b8ad6'd11d1797'9f53bffc'6d23fe30_u128}, - {Sign::POS, -135, 0xef5f47c6'6457f199'b286c6e1'13337886_u128}, - {Sign::POS, -135, 0xf0e21acd'd6e7d412'b6ed8085'2ae6fd63_u128}, - {Sign::POS, -135, 0xf2e5e5f2'5450c5a2'df437fb0'f616082d_u128}, - {Sign::POS, -135, 0xf4e9b935'685dbe0b'f237cff1'acb306b3_u128}, - {Sign::POS, -135, 0xf6ed9497'5480b696'52dbfafb'4121a092_u128}, - {Sign::POS, -135, 0xf8f17818'5a2ebfd9'0d816482'49cece4c_u128}, - {Sign::POS, -135, 0xfaf563b8'bae001eb'ad95e6b0'b96903d3_u128}, - {Sign::POS, -135, 0xfcf95778'b80fbc98'176cd568'87ac7fe9_u128}, - {Sign::POS, -135, 0xfefd5358'933c478c'65f4c739'7f1f478d_u128}, + {Sign::NEG, -135, MType({0xa1c6f3fc242ef8d0, 0x803faacac419abf2})}, + {Sign::NEG, -136, MType({0xa225ebc02e6d9dd4, 0xfc834da16f0d9f57})}, + {Sign::NEG, -136, MType({0xc33f6ad340ae18a9, 0xf88735ccc7433381})}, + {Sign::NEG, -136, MType({0x70b2a4d38a242244, 0xf48b0e171249b6bc})}, + {Sign::NEG, -136, MType({0x1d54819048b811b0, 0xf08ed67fd190e280})}, + {Sign::NEG, -136, MType({0xaee5983701d2a02b, 0xec928f0686828706})}, + {Sign::NEG, -136, MType({0x40abb8ab72afa2d2, 0xe89637aab2828aed})}, + {Sign::NEG, -136, MType({0xdeb547a0d4a26ef9, 0xe499d06bd6eeead5})}, + {Sign::NEG, -136, MType({0x39c5bdfbcf6087a0, 0xe09d5949751fb909})}, + {Sign::NEG, -136, MType({0x53ea9bf152de635f, 0xdca0d2430e671d18})}, + {Sign::NEG, -136, MType({0x25b820436f5f4352, 0xd8a43b582411537e})}, + {Sign::NEG, -136, MType({0x3c2d13ea1d0be058, 0xd4a794883764ad41})}, + {Sign::NEG, -136, MType({0x4f3cfa62bcb3ce3a, 0xd0aaddd2c9a18f95})}, + {Sign::NEG, -136, MType({0xd0fff6cdf14a86c7, 0xccae17375c02737c})}, + {Sign::NEG, -136, MType({0x7587b5f0453ac3d2, 0xc8b140b56fbbe56a})}, + {Sign::NEG, -136, MType({0xb358ad16dfd0d085, 0xc4b45a4c85fc84e2})}, + {Sign::NEG, -136, MType({0x3c86fdce5dbe7314, 0xc0b763fc1fed041d})}, + {Sign::NEG, -136, MType({0x70764e46ac18a96d, 0xbcba5dc3beb027a6})}, + {Sign::NEG, -136, MType({0xc63be62b8f285882, 0xb8bd47a2e362c600})}, + {Sign::NEG, -136, MType({0x72e7b5a386e5e31b, 0xb3c0d59a244325a4})}, + {Sign::NEG, -136, MType({0xc3ea2cd93f316b34, 0xafc39bac66434f27})}, + {Sign::NEG, -136, MType({0x1dfb11a7cc892843, 0xabc651d491a7b438})}, + {Sign::NEG, -136, MType({0xfc679a28e9d9f212, 0xa7c8f8122773f38d})}, + {Sign::NEG, -136, MType({0xe7bc977eeec42254, 0xa3cb8e64a8a5bbe6})}, + {Sign::NEG, -136, MType({0xb20f215bd3b58c61, 0x9fce14cb9634cba6})}, + {Sign::NEG, -136, MType({0xabe2862508d67a98, 0x9bd08b467112f078})}, + {Sign::NEG, -136, MType({0xd1aacedcefe9d377, 0x97d2f1d4ba2c06f0})}, + {Sign::NEG, -136, MType({0xf1eb25e77d05f58d, 0x93d54875f265fa2c})}, + {Sign::NEG, -136, MType({0xcbef6fac33691e95, 0x8fd78f299aa0c375})}, + {Sign::NEG, -136, MType({0x2720640462a0f8ad, 0x8bd9c5ef33b669e0})}, + {Sign::NEG, -136, MType({0xe2f1775134c8da75, 0x87dbecc63e7b01ed})}, + {Sign::NEG, -136, MType({0xff67e201c8c50d67, 0x83de03ae3bbcad2e})}, + {Sign::NEG, -137, MType({0x3c742a7c76356396, 0xffc0154d588733c5})}, + {Sign::NEG, -137, MType({0xf90dd6b24aa686ec, 0xf7c4035e21a4052f})}, + {Sign::NEG, -137, MType({0xca47c52b7d7ffce2, 0xefc7d18dd4485b9e})}, + {Sign::NEG, -137, MType({0x3703617ad3d8311f, 0xe7cb7fdb71e0db36})}, + {Sign::NEG, -137, MType({0x7e4cfbd830393b88, 0xdfcf0e45fbce3e80})}, + {Sign::NEG, -137, MType({0x4f7a29cf0fc2c38e, 0xd7d27ccc736555af})}, + {Sign::NEG, -137, MType({0x7370ae83f9e72748, 0xcfd5cb6dd9ef05dd})}, + {Sign::NEG, -137, MType({0x671486eb4cd76f65, 0xc7d8fa2930a84850})}, + {Sign::NEG, -137, MType({0xe6dbb624f9739782, 0xbfdc08fd78c229b9})}, + {Sign::NEG, -137, MType({0x6b866e09e57d9079, 0xb7def7e9b361c979})}, + {Sign::NEG, -137, MType({0x97fa2fd0c9dc723e, 0xafe1c6ece1a058dd})}, + {Sign::NEG, -137, MType({0x983e80897cf1e60f, 0xa7e47606048b1a65})}, + {Sign::NEG, -137, MType({0x7199cd06ae5d39b3, 0x9fe705341d236102})}, + {Sign::NEG, -137, MType({0x43cd18a72a051a96, 0x97e974762c5e8f58})}, + {Sign::NEG, -137, MType({0x7b6d1248c3e1fd40, 0x8febc3cb332616ff})}, + {Sign::NEG, -137, MType({0xf5572a8814c703af, 0x87edf332325777c5})}, + {Sign::NEG, -138, MType({0x26828c92649a3a39, 0xffe0055455887de0})}, + {Sign::NEG, -138, MType({0x82c550bd1216d82a, 0xefe3e4643a640cf3})}, + {Sign::NEG, -138, MType({0xda6959f7f0e01bf0, 0xdfe7839214b4e8ae})}, + {Sign::NEG, -138, MType({0xda93e2fa85a8f214, 0xcfeae2dbe5d6736d})}, + {Sign::NEG, -138, MType({0xb47505bfa5a03b06, 0xbfee023faf0c2480})}, + {Sign::NEG, -138, MType({0xb1475a5180a43520, 0xaff0e1bb718186ad})}, + {Sign::NEG, -138, MType({0xa8740b91c95df537, 0x9ff3814d2e4a36b2})}, + {Sign::NEG, -138, MType({0x57d895d35921b59c, 0x8ff5e0f2e661e1c6})}, + {Sign::NEG, -139, MType({0x3c56c598c659c2a3, 0xfff0015535588833})}, + {Sign::NEG, -139, MType({0x2ef8ec33ed9d782a, 0xdff3c0e497ea4eb1})}, + {Sign::NEG, -139, MType({0x379eba7e6465ff63, 0xbff7008ff5e0c257})}, + {Sign::NEG, -139, MType({0x3f972b783fcab757, 0x9ff9c0535073a370})}, + {Sign::NEG, -140, MType({0xde026e271ee0549d, 0xfff8005551558885})}, + {Sign::NEG, -140, MType({0xeceb47ea01f6c632, 0xbffb8023febc0c25})}, + {Sign::NEG, -141, MType({0x7333c57857e1ed52, 0xfffc001554d55888})}, + {Sign::NEG, -142, MType({0x87dde026fa704374, 0xfffe000555455588})}, + {Sign::NEG, 0, MType({0x0, 0x0})}, + {Sign::POS, -141, MType({0x44999abe2fe2cc65, 0x80010002aab2aac4})}, + {Sign::POS, -140, MType({0x4eef381581464ccb, 0x8002000aaaeaac44})}, + {Sign::POS, -140, MType({0xdfeb485085f6f454, 0xc004802401440c26})}, + {Sign::POS, -139, MType({0x99abe3be3a1c6e93, 0x8004002aacaac445})}, + {Sign::POS, -139, MType({0x6bc1e20eac8448b4, 0xa00640535a37a37a})}, + {Sign::POS, -139, MType({0x979eedc064c242fd, 0xc00900900a20c275})}, + {Sign::POS, -139, MType({0xc72446cc1bf728bd, 0xe00c40e4bd6e4efd})}, + {Sign::POS, -138, MType({0xf381b821bbb569e5, 0x800800aabaac446e})}, + {Sign::POS, -138, MType({0x569b26aaa485ea5c, 0x900a20f319a3e273})}, + {Sign::POS, -138, MType({0x2dcf56c83c80b028, 0xa00c814d7c6a37f8})}, + {Sign::POS, -138, MType({0x5f69768284463b9b, 0xb00f21bbe3e388ee})}, + {Sign::POS, -138, MType({0xb48ea6c05e2773a1, 0xc0120240510c284c})}, + {Sign::POS, -138, MType({0x14d9d76196d8043a, 0xd01522dcc4f87991})}, + {Sign::POS, -138, MType({0xe016a611a4415d72, 0xe018839340d4f241})}, + {Sign::POS, -138, MType({0x661e135f49a47c40, 0xf01c2465c5e61b6f})}, + {Sign::POS, -137, MType({0xbe6bf0fa435e8383, 0x801002ab2ac4499a})}, + {Sign::POS, -137, MType({0x9a31ba0cbc030353, 0x881213337898871e})}, + {Sign::POS, -137, MType({0x54b57dfe0c4c840f, 0x901443cccd362c9f})}, + {Sign::POS, -137, MType({0x7ad1e9c315328f7e, 0x98169478296fad41})}, + {Sign::POS, -137, MType({0x1f3f686cf3d6be22, 0xa01905368e2389b3})}, + {Sign::POS, -137, MType({0xf105b66ec4703ede, 0xa81b9608fc3c50ec})}, + {Sign::POS, -137, MType({0x610848c68df4d233, 0xb01e46f074b0a0f3})}, + {Sign::POS, -137, MType({0xd6aef30cd312169a, 0xb82117edf8832797})}, + {Sign::POS, -137, MType({0xf3ac379608053d9d, 0xc024090288c2a339})}, + {Sign::POS, -137, MType({0xe6e2acf8f4d4c24a, 0xc8271a2f2689e388})}, + {Sign::POS, -137, MType({0xce6ae474d860359f, 0xd02a4b74d2ffca44})}, + {Sign::POS, -137, MType({0x28bb3cd9f2a65fb5, 0xd82d9cd48f574c00})}, + {Sign::POS, -137, MType({0x54f30dbef38a8066, 0xe0310e4f5ccf70e1})}, + {Sign::POS, -137, MType({0x224a96f5a7471c46, 0xe8349fe63cb35564})}, + {Sign::POS, -137, MType({0x6ea920591aa02e1b, 0xf038519a305a2b1b})}, + {Sign::POS, -137, MType({0xd462b63756c87e80, 0xf83c236c39273972})}, + {Sign::POS, -136, MType({0x338f77605fe77f2a, 0x80200aaeac44ef38})}, + {Sign::POS, -136, MType({0x3ff51287882500ed, 0x842213b747fec7bb})}, + {Sign::POS, -136, MType({0xcc394b3ef0ebeb12, 0x88242cd07084ed02})}, + {Sign::POS, -136, MType({0x1ab9679b55f78a6b, 0x8c2655faa6a1323f})}, + {Sign::POS, -136, MType({0x7025697d10af0436, 0x90288f366b237771})}, + {Sign::POS, -136, MType({0x17e4b7ac6c600cb4, 0x942ad8843ee1a9cd})}, + {Sign::POS, -136, MType({0x7013925a9a8da7f3, 0x982d31e4a2b7c418})}, + {Sign::POS, -136, MType({0xfd1a09c848e3950e, 0x9c2f9b581787cf0d})}, + {Sign::POS, -136, MType({0x84dd2de6e3d90a37, 0xa03214df1e39e1bd})}, + {Sign::POS, -136, MType({0x318b2ddd9d0a33b4, 0xa4349e7a37bc21ed})}, + {Sign::POS, -136, MType({0xbc031e6f5acfd4a8, 0xa8373829e502c47a})}, + {Sign::POS, -136, MType({0x9dd91e52c79fd070, 0xac39e1eea7080dbc})}, + {Sign::POS, -136, MType({0x4af78fa1cb48a12d, 0xb03c9bc8fecc51e3})}, + {Sign::POS, -136, MType({0x72de1d99ce252efd, 0xb43f65b96d55f55a})}, + {Sign::POS, -136, MType({0xefb1dbe721934877, 0xb74187bc8ccffa84})}, + {Sign::POS, -136, MType({0xb4b080f230c87598, 0xbb446dd4d9bca499})}, + {Sign::POS, -136, MType({0xda6a7cd19c7fa4f2, 0xbf476404a05f88f2})}, + {Sign::POS, -136, MType({0xdf00e3783b50ecfb, 0xc34a6a4c61d5cc3c})}, + {Sign::POS, -136, MType({0xda2e5e02ab4e183c, 0xc74d80ac9f42a52d})}, + {Sign::POS, -136, MType({0xea5f6ee99d30c626, 0xcb50a725d9cf5ce6})}, + {Sign::POS, -136, MType({0xa96d5956531d7d8b, 0xcf53ddb892ab4f55})}, + {Sign::POS, -136, MType({0xa8fc636eb36afa75, 0xd35724654b0beb95})}, + {Sign::POS, -136, MType({0xf67e2b827bfc4421, 0xd75a7b2c842cb451})}, + {Sign::POS, -136, MType({0xa6d8c817516303e6, 0xdb5de20ebf4f4026})}, + {Sign::POS, -136, MType({0x69b36ae5962e85f4, 0xdf61590c7dbb3a02})}, + {Sign::POS, -136, MType({0x24693eec2a831cc3, 0xe364e02640be6188})}, + {Sign::POS, -136, MType({0x94a339d56a55ab4a, 0xe768775c89ac8b70})}, + {Sign::POS, -136, MType({0xfa9998fbf9703bf4, 0xeb6c1eafd9dfa1eb})}, + {Sign::POS, -136, MType({0xcafdc27227b71eaa, 0xef6fd620b2b7a503})}, + {Sign::POS, -136, MType({0x688d4282f6026aa3, 0xf3739daf959aaafc})}, + {Sign::POS, -136, MType({0xe54e9e3804464cdd, 0xf777755d03f4e0b6})}, + {Sign::POS, -136, MType({0xcb78b383f4b59dce, 0xfb7b5d297f388a12})}, + {Sign::POS, -136, MType({0xee055fc515062c04, 0xff7f551588de024f})}, + {Sign::POS, -135, MType({0x207812b43382acdd, 0x81c1ae90d131de38})}, + {Sign::POS, -135, MType({0xdc90c4c4b61f3a87, 0x83c3baa726a721cc})}, + {Sign::POS, -135, MType({0x1a03f13fb2c978b1, 0x85c5cece05941dbc})}, + {Sign::POS, -135, MType({0xb36f282e83a7dc36, 0x87c7eb05aec1304f})}, + {Sign::POS, -135, MType({0x6ad14c3dfa414391, 0x89ca0f4e62f9c476})}, + {Sign::POS, -135, MType({0xe8dd4ea0d48b88e5, 0x8bcc3ba8630c51f4})}, + {Sign::POS, -135, MType({0xc02515afe8caeb90, 0x8dce7013efca5d96})}, + {Sign::POS, -135, MType({0x741ceaf3349f3cf1, 0x8fd0ac914a08795f})}, + {Sign::POS, -135, MType({0x83f7cd4929d2c28c, 0x91d2f120b29e44bb})}, + {Sign::POS, -135, MType({0x795d03ebc2fd03fa, 0x93d53dc26a666cb1})}, + {Sign::POS, -135, MType({0xfaf74f1d1ad16acc, 0x95d79276b23eac12})}, + {Sign::POS, -135, MType({0xe2de134f72fee429, 0x97d9ef3dcb07cbad})}, + {Sign::POS, -135, MType({0x58d8dba6cadac5d5, 0x99dc5417f5a5a27d})}, + {Sign::POS, -135, MType({0xf07d90bc5aae40a4, 0x9bdec10572ff15da})}, + {Sign::POS, -135, MType({0x1deaf79d9fc40374, 0x9d6098046659ea6b})}, + {Sign::POS, -135, MType({0x7ba63e6769b81999, 0x9f63131450b07988})}, + {Sign::POS, -135, MType({0x59ebfc9335094e59, 0xa1659638404d5f92})}, + {Sign::POS, -135, MType({0x16aae012b5026f71, 0xa36821707622f97a})}, + {Sign::POS, -135, MType({0xff5d4f2c0e4b9cae, 0xa56ab4bd3326b378})}, + {Sign::POS, -135, MType({0x855838b5119dcb28, 0xa76d501eb8510941})}, + {Sign::POS, -135, MType({0x75f70cbbe9cf1603, 0xa96ff395469d8630})}, + {Sign::POS, -135, MType({0x36a53ad4d5541cc9, 0xab729f211f0ac57e})}, + {Sign::POS, -135, MType({0x4c5934ec32d20d9, 0xad7552c2829a7270})}, + {Sign::POS, -135, MType({0x3977e89aec59bfa2, 0xaf780e79b2514889})}, + {Sign::POS, -135, MType({0x913d4e3dc55c3e6e, 0xb17ad246ef3713bc})}, + {Sign::POS, -135, MType({0x777b52a9e70d8bcc, 0xb37d9e2a7a56b09d})}, + {Sign::POS, -135, MType({0x55de916fd30591de, 0xb580722494be0c91})}, + {Sign::POS, -135, MType({0xe79cfb37be2861e4, 0xb7834e357f7e2600})}, + {Sign::POS, -135, MType({0x90983104d3805389, 0xb986325d7bab0c89})}, + {Sign::POS, -135, MType({0xb860504baa6f984d, 0xbb891e9cca5be12e})}, + {Sign::POS, -135, MType({0x29178d6ff5712b96, 0xbd8c12f3acaad68b})}, + {Sign::POS, -135, MType({0x7236fa47ba19a198, 0xbf8f0f6263b53102})}, + {Sign::POS, -135, MType({0x4f34d64cafcc50e3, 0xc19213e9309b46f2})}, + {Sign::POS, -135, MType({0x120cc62eb0a8db3e, 0xc3952088548080e4})}, + {Sign::POS, -135, MType({0x11aa5084779060e3, 0xc5983540108b59be})}, + {Sign::POS, -135, MType({0x1c35fd6236c8dcf1, 0xc79b5210a5e55ef5})}, + {Sign::POS, -135, MType({0xed4576a7e4b878fe, 0xc99e76fa55bb30bd})}, + {Sign::POS, -135, MType({0x6caf4bb8fd2c1131, 0xcb20d7fa3a336081})}, + {Sign::POS, -135, MType({0x3f24a6cbb09c654f, 0xcd240b10753e78de})}, + {Sign::POS, -135, MType({0x78bc003bb81e40f3, 0xcf2746407e0ff09f})}, + {Sign::POS, -135, MType({0x56647301edfd8e8b, 0xd12a898a95dff002})}, + {Sign::POS, -135, MType({0x28fe1c4d04ca4ed9, 0xd32dd4eefde9b2ef})}, + {Sign::POS, -135, MType({0xe1ea9ea6cbf57379, 0xd531286df76b892a})}, + {Sign::POS, -135, MType({0xa3832028141a5cc2, 0xd7348407c3a6d688})}, + {Sign::POS, -135, MType({0x557421dd379d3ead, 0xd937e7bca3e0131b})}, + {Sign::POS, -135, MType({0x3cff8e87a99bcaf0, 0xdb3b538cd95ecb67})}, + {Sign::POS, -135, MType({0x99255ef34bd0801f, 0xdd3ec778a56da093})}, + {Sign::POS, -135, MType({0x42b33220abfa15cd, 0xdf424380495a489c})}, + {Sign::POS, -135, MType({0x503b378faa97dbc0, 0xe145c7a406758e83})}, + {Sign::POS, -135, MType({0xbdf2ca006f59b544, 0xe34953e41e135282})}, + {Sign::POS, -135, MType({0x1979190af37ed16f, 0xe54ce840d18a8a3e})}, + {Sign::POS, -135, MType({0x31863ff7cf898c9c, 0xe75084ba623540f4})}, + {Sign::POS, -135, MType({0xc983284f60293647, 0xe9542951117097b0})}, + {Sign::POS, -135, MType({0x510a969ebe03f804, 0xeb57d605209cc57e})}, + {Sign::POS, -135, MType({0x9f53bffc6d23fe30, 0xed5b8ad6d11d1797})}, + {Sign::POS, -135, MType({0xb286c6e113337886, 0xef5f47c66457f199})}, + {Sign::POS, -135, MType({0xb6ed80852ae6fd63, 0xf0e21acdd6e7d412})}, + {Sign::POS, -135, MType({0xdf437fb0f616082d, 0xf2e5e5f25450c5a2})}, + {Sign::POS, -135, MType({0xf237cff1acb306b3, 0xf4e9b935685dbe0b})}, + {Sign::POS, -135, MType({0x52dbfafb4121a092, 0xf6ed94975480b696})}, + {Sign::POS, -135, MType({0xd81648249cece4c, 0xf8f178185a2ebfd9})}, + {Sign::POS, -135, MType({0xad95e6b0b96903d3, 0xfaf563b8bae001eb})}, + {Sign::POS, -135, MType({0x176cd56887ac7fe9, 0xfcf95778b80fbc98})}, + {Sign::POS, -135, MType({0x65f4c7397f1f478d, 0xfefd5358933c478c})}, }, // -log(r) for the third step, generated by SageMath with: // @@ -391,170 +388,170 @@ alignas(64) const LogRR LOG_TABLE = { // r = 2^-21 * round( 2^21 / (1 + i*2^(-21)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", - // format_hex(m), "},"); + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_3 = */ { - {Sign::NEG, -142, 0x9fff3801'4d52e45a'374b2940'76d669c3_u128}, - {Sign::NEG, -142, 0x9dff3cf9'40fad85a'7f6f05dc'dbeb776e_u128}, - {Sign::NEG, -142, 0x9bff41e1'34f1cb36'3d55e21d'41bbadf9_u128}, - {Sign::NEG, -142, 0x99ff46b9'2936bcf4'ccdba2d5'4aadbc5c_u128}, - {Sign::NEG, -142, 0x97ff4b81'1dc8ad9d'71dd16d3'073f79b2_u128}, - {Sign::NEG, -142, 0x95ff5039'12a69d37'5837f3df'1a58dd48_u128}, - {Sign::NEG, -142, 0x93ff54e1'07cf8bc9'93cad3bc'dd26fd6d_u128}, - {Sign::NEG, -142, 0x91ff5978'fd42795b'2075312a'827f14fa_u128}, - {Sign::NEG, -142, 0x8fff5e00'f2fe65f2'e21764e1'39c98f60_u128}, - {Sign::NEG, -142, 0x8dff6278'e9025197'a492a295'51751b4c_u128}, - {Sign::NEG, -142, 0x8bff66e0'df4d3c50'1bc8f5f6'58f1c3a2_u128}, - {Sign::NEG, -142, 0x89ff6b38'd5de2622'e39d3faf'42340ed7_u128}, - {Sign::NEG, -142, 0x87ff6f80'ccb40f16'7ff33266'82c02485_u128}, - {Sign::NEG, -142, 0x85ff73b8'c3cdf731'5caf4fbe'343cf928_u128}, - {Sign::NEG, -142, 0x83ff77e0'bb2ade79'cdb6e554'348f7fe8_u128}, - {Sign::NEG, -142, 0x81ff7bf8'b2c9c4f6'0ef009c2'457de25d_u128}, - {Sign::NEG, -143, 0xffff0001'55535558'8883333c'57b57c74_u128}, - {Sign::NEG, -143, 0xfbff07f1'45931f44'f32668f3'9c70d183_u128}, - {Sign::NEG, -143, 0xf7ff0fc1'3650e7bd'459a73c6'a6486fe3_u128}, - {Sign::NEG, -143, 0xf3ff1771'278aaecd'37b18cca'7dd3a29f_u128}, - {Sign::NEG, -143, 0xefff1f01'193e7480'513f610d'21bcfc78_u128}, - {Sign::NEG, -143, 0xebff2671'0b6a38e1'ea190b95'c0690b7b_u128}, - {Sign::NEG, -143, 0xe7ff2dc0'fe0bfbfd'2a150f64'f0ad1743_u128}, - {Sign::NEG, -143, 0xe3ff34f0'f121bddd'090b5174'e995e9d1_u128}, - {Sign::NEG, -143, 0xdfff3c00'e4a97e8c'4ed512b9'b93ea2bf_u128}, - {Sign::NEG, -143, 0xdbff42f0'd8a13e15'934cea21'7ab794a2_u128}, - {Sign::NEG, -143, 0xd7ff49c0'cd06fc83'3e4ebe94'8afd2c76_u128}, - {Sign::NEG, -143, 0xd3ff5070'c1d8b9df'87b7c0f5'bcfee2e1_u128}, - {Sign::NEG, -143, 0xcfff5700'b7147634'77666622'8cb6371b_u128}, - {Sign::NEG, -143, 0xcbff5d70'acb8318b'e53a60f3'514db358_u128}, - {Sign::NEG, -143, 0xc7ff63c0'a2c1ebef'79149c3b'6e57fa86_u128}, - {Sign::NEG, -143, 0xc3ff69f0'992fa568'aad734c9'8416df2a_u128}, - {Sign::NEG, -143, 0xbfff7000'8fff5e00'c2657367'9ed28334_u128}, - {Sign::NEG, -143, 0xbbff75f0'872f15c0'd7a3c6db'6540809f_u128}, - {Sign::NEG, -143, 0xb7ff7bc0'7ebcccb1'd277bde6'45fb1aad_u128}, - {Sign::NEG, -143, 0xb3ff8170'76a682dc'6ac80145'a4087793_u128}, - {Sign::NEG, -143, 0xafff8700'6eea3849'287c4db3'0271e265_u128}, - {Sign::NEG, -143, 0xabff8c70'6785ed00'637d6de4'2eeb151e_u128}, - {Sign::NEG, -143, 0xa7ff91c0'6077a10a'43b5348b'6b898a8c_u128}, - {Sign::NEG, -143, 0xa3ff96f0'59bd546e'c10e7657'978bd7f6_u128}, - {Sign::NEG, -143, 0x9fff9c00'53550735'a37503f4'57310e59_u128}, - {Sign::NEG, -143, 0x9bffa0f0'4d3cb966'82d5a40a'3aa022ff_u128}, - {Sign::NEG, -143, 0x97ffa5c0'47726b08'c71e0d3e'e3df5f4d_u128}, - {Sign::NEG, -143, 0x93ffaa70'41f41c23'a83ce035'2bdbd79b_u128}, - {Sign::NEG, -143, 0x8fffaf00'3cbfccbe'2e21a18d'4680e8e4_u128}, - {Sign::NEG, -143, 0x8bffb370'37d37cdf'30bcb3e4'e5dfbd28_u128}, - {Sign::NEG, -143, 0x87ffb7c0'332d2c8d'57ff51d7'5c66d64a_u128}, - {Sign::NEG, -143, 0x83ffbbf0'2ecadbcf'1bdb87fd'be299f43_u128}, - {Sign::NEG, -144, 0xffff8000'55551555'88885dde'02700703_u128}, - {Sign::NEG, -144, 0xf7ff87e0'4d94724c'd259ca80'3a0c1870_u128}, - {Sign::NEG, -144, 0xefff8f80'464fce8f'e5141308'51c7070a_u128}, - {Sign::NEG, -144, 0xe7ff96e0'3f832a2a'30a16898'f3073a64_u128}, - {Sign::NEG, -144, 0xdfff9e00'392a8526'c4ed6451'7b2949ce_u128}, - {Sign::NEG, -144, 0xd7ffa4e0'3341df90'51e4fb4e'32cf6350_u128}, - {Sign::NEG, -144, 0xcfffab80'2dc53971'277672a8'8350bcce_u128}, - {Sign::NEG, -144, 0xc7ffb1e0'28b092d3'35915377'2a490f06_u128}, - {Sign::NEG, -144, 0xbfffb800'23ffebc0'0c265ece'6b481a0e_u128}, - {Sign::NEG, -144, 0xb7ffbde0'1faf4440'db2781c0'3fa132f6_u128}, - {Sign::NEG, -144, 0xafffc380'1bba9c5e'7287c95c'845ada33_u128}, - {Sign::NEG, -144, 0xa7ffc8e0'181df421'423b56b1'263e5a77_u128}, - {Sign::NEG, -144, 0x9fffce00'14d54b91'5a3752ca'4c076fa3_u128}, - {Sign::NEG, -144, 0x97ffd2e0'11dca2b6'6a71e2b2'7eb3f573_u128}, - {Sign::NEG, -144, 0x8fffd780'0f2ff997'c2e21b72'cff39d8f_u128}, - {Sign::NEG, -144, 0x87ffdbe0'0ccb503c'537ff612'feb7ac9e_u128}, - {Sign::NEG, -145, 0xffffc000'15554d55'58888733'33c57c18_u128}, - {Sign::NEG, -145, 0xefffc7c0'1193f9d1'fa514218'42311c42_u128}, - {Sign::NEG, -145, 0xdfffcf00'0e4aa5fa'2c4ed6de'475b942c_u128}, - {Sign::NEG, -145, 0xcfffd5c0'0b7151d8'ce77678c'bb6fcb88_u128}, - {Sign::NEG, -145, 0xbfffdc00'08fffd78'00c26629'a679ed3b_u128}, - {Sign::NEG, -145, 0xafffe1c0'06eea8e1'23287cb9'd3072728_u128}, - {Sign::NEG, -145, 0x9fffe700'0535541c'd5a37540'fd057315_u128}, - {Sign::NEG, -145, 0x8fffebc0'03cbff32'f82e21c1'fce36810_u128}, - {Sign::NEG, -146, 0xffffe000'05555455'5588887d'dde02702_u128}, - {Sign::NEG, -146, 0xdfffe780'0392aa14'9ac4ed72'adf5b295_u128}, - {Sign::NEG, -146, 0xbfffee00'023fffaf'000c2664'8066b482_u128}, - {Sign::NEG, -146, 0x9ffff380'014d552e'455a3754'b292c077_u128}, - {Sign::NEG, -147, 0xfffff000'01555535'55588888'33333c58_u128}, - {Sign::NEG, -147, 0xbffff700'008ffff5'e000c266'5736679f_u128}, - {Sign::NEG, -148, 0xfffff800'00555551'55558888'85ddde02_u128}, - {Sign::NEG, -149, 0xfffffc00'00155554'd5555888'88733334_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -148, 0x80000200'000aaaaa'eaaaac44'444eeeef_u128}, - {Sign::POS, -147, 0x80000400'002aaaac'aaaac444'459999ac_u128}, - {Sign::POS, -147, 0xc0000900'0090000a'2000c266'7596679f_u128}, - {Sign::POS, -146, 0x80000800'00aaaaba'aaac4444'6eeef381_u128}, - {Sign::POS, -146, 0xa0000c80'014d557c'655a3755'f81815cc_u128}, - {Sign::POS, -146, 0xc0001200'02400051'000c2668'4c66b482_u128}, - {Sign::POS, -146, 0xe0001880'0392ab40'bac4ed7c'40fb07eb_u128}, - {Sign::POS, -145, 0x80001000'02aaab2a'aac44449'999abe2c_u128}, - {Sign::POS, -145, 0x90001440'03cc00cd'082e21d7'9cbb6812_u128}, - {Sign::POS, -145, 0xa0001900'0535568d'd5a37569'adb01dc3_u128}, - {Sign::POS, -145, 0xb0001e40'06eeac74'33287d01'e8c9d1d9_u128}, - {Sign::POS, -145, 0xc0002400'09000288'00c266a3'2679ed48_u128}, - {Sign::POS, -145, 0xd0002a40'0b7158d1'de776851'22b2764b_u128}, - {Sign::POS, -145, 0xe0003100'0e4aaf5b'2c4ed810'a8063f03_u128}, - {Sign::POS, -145, 0xf0003840'1194062e'0a5143e7'be891c8f_u128}, - {Sign::POS, -144, 0x80002000'0aaaaeaa'ac4444ee'ef3813a1_u128}, - {Sign::POS, -144, 0x88002420'0ccb5a6e'5b7ff7fe'1339025b_u128}, - {Sign::POS, -144, 0x90002880'0f300668'42e21e26'caf39e33_u128}, - {Sign::POS, -144, 0x98002d20'11dcb29e'f271e66f'a5554bc6_u128}, - {Sign::POS, -144, 0xa0003200'14d55f19'5a3757e0'615cc676_u128}, - {Sign::POS, -144, 0xa8003720'181e0bde'ca3b5d82'10ca5cab_u128}, - {Sign::POS, -144, 0xb0003c80'1bbab8f6'f287d25f'3cb032bb_u128}, - {Sign::POS, -144, 0xb8004220'1faf6669'e3278d84'0be28cdb_u128}, - {Sign::POS, -144, 0xc0004800'24001440'0c266dfe'6b482076_u128}, - {Sign::POS, -144, 0xc8004e20'28b0c282'3d9166de'380a6d3d_u128}, - {Sign::POS, -144, 0xd0005480'2dc57139'a7768b35'6ba61e4b_u128}, - {Sign::POS, -144, 0xd8005b20'3342206f'd9e51a18'49db73c1_u128}, - {Sign::POS, -144, 0xe0006200'392ad02e'c4ed8a9d'907eb521_u128}, - {Sign::POS, -144, 0xe8006920'3f838080'b8a197de'a928acd7_u128}, - {Sign::POS, -144, 0xf0007080'46503170'65144cf7'dcc72d3b_u128}, - {Sign::POS, -144, 0xf8007820'4d94e308'da5a1108'890d9f6a_u128}, - {Sign::POS, -143, 0x80004000'2aaacaaa'c4445999'abe2ce2c_u128}, - {Sign::POS, -143, 0x84004410'2ecb2431'1fdbbb4f'3bffc832_u128}, - {Sign::POS, -143, 0x88004840'332d7e1d'97ff8f39'ec91b4ee_u128}, - {Sign::POS, -143, 0x8c004c90'37d3d876'74bcfcf0'b3f0a95d_u128}, - {Sign::POS, -143, 0x90005100'3cc03342'2e21f80c'a6813aff_u128}, - {Sign::POS, -143, 0x94005590'41f48e87'6c3d4629'170ce87f_u128}, - {Sign::POS, -143, 0x98005a40'4772ea4d'071e84e3'b80a8881_u128}, - {Sign::POS, -143, 0x9c005f10'4d3d469a'06d62fdc'bdd6bec3_u128}, - {Sign::POS, -143, 0xa0006400'5355a375'a375a6b7'01dc77c0_u128}, - {Sign::POS, -143, 0xa4006910'59be00e7'450f3318'26ad6b05_u128}, - {Sign::POS, -143, 0xa8006e40'60785ef6'83b60ea8'bd0aa459_u128}, - {Sign::POS, -143, 0xac007390'6786bdab'277e6914'69dd13f5_u128}, - {Sign::POS, -143, 0xb0007900'6eeb1d0d'287d6e0a'0d1e25eb_u128}, - {Sign::POS, -143, 0xb4007e90'76a77d24'aec94b3b'e9b060f5_u128}, - {Sign::POS, -143, 0xb8008440'7ebdddfa'1279365f'ce280cce_u128}, - {Sign::POS, -143, 0xbc008a10'87303f95'dba5732f'3e83e04a_u128}, - {Sign::POS, -143, 0xc0009000'9000a200'c2675967'9ed5b754_u128}, - {Sign::POS, -143, 0xc4009610'99310543'aed95aca'5edb5109_u128}, - {Sign::POS, -143, 0xc8009c40'a2c36967'b917091d'2687160f_u128}, - {Sign::POS, -143, 0xcc00a290'acb9ce76'293d1c2a'0378e75d_u128}, - {Sign::POS, -143, 0xd000a900'b7163478'776977bf'9766f5a7_u128}, - {Sign::POS, -143, 0xd400af90'c1da9b78'4bbb31b1'4776a18b_u128}, - {Sign::POS, -143, 0xd800b640'cd09037f'7e5297d7'6c8564ba_u128}, - {Sign::POS, -143, 0xdc00bd10'd8a36c98'1751360f'8461c447_u128}, - {Sign::POS, -143, 0xe000c400'e4abd6cc'4ed9dc3c'63f44c41_u128}, - {Sign::POS, -143, 0xe400cb10'f1244226'8d10a446'6a5894d5_u128}, - {Sign::POS, -143, 0xe800d240'fe0eaeb1'6a1af81b'b4e6510e_u128}, - {Sign::POS, -143, 0xec00d991'0b6d1c77'ae1f97b0'542a677a_u128}, - {Sign::POS, -143, 0xf000e101'19418b84'51469efe'81d014cc_u128}, - {Sign::POS, -143, 0xf400e891'278dfbe2'7bb98c06'd77a18b4_u128}, - {Sign::POS, -143, 0xf800f041'36546d9d'85a344d0'868bed17_u128}, - {Sign::POS, -143, 0xfc00f811'4596e0c0'f7301d69'90e307cc_u128}, - {Sign::POS, -142, 0x80008000'aaabaaac'4446eef3'8140138f_u128}, - {Sign::POS, -142, 0x82008408'b2cbe5b8'10f5e432'96105497_u128}, - {Sign::POS, -142, 0x84008820'bb2d2189'edbd4f83'ef63f730_u128}, - {Sign::POS, -142, 0x86008c48'c3d05e27'feb654fd'541c638e_u128}, - {Sign::POS, -142, 0x88009080'ccb69b98'7ffadeb8'882f7674_u128}, - {Sign::POS, -142, 0x8a0094c8'd5e0d9e1'c5a59fd3'6bd44397_u128}, - {Sign::POS, -142, 0x8c009920'df50190a'3bd21770'1b27dddb_u128}, - {Sign::POS, -142, 0x8e009d88'e9055918'669c93b5'0e4a2595_u128}, - {Sign::POS, -142, 0x9000a200'f3019a12'e22234cd'39f29cd4_u128}, - {Sign::POS, -142, 0x9200a688'fd45dc00'6280efe8'307d41d9_u128}, - {Sign::POS, -142, 0x9400ab21'07d31ee7'b3d7923a'436f6fc4_u128}, - {Sign::POS, -142, 0x9600afc9'12aa62cf'ba45c3fc'a574c5a0_u128}, - {Sign::POS, -142, 0x9800b481'1dcca7bf'71ec0b6d'8cd413d1_u128}, - {Sign::POS, -142, 0x9a00b949'293aedbd'eeebcfd0'565c5006_u128}, - {Sign::POS, -142, 0x9c00be21'34f634d2'5d675c6d'a8c98fc3_u128}, - {Sign::POS, -142, 0x9e00c309'40ff7d04'0181e393'98a2099a_u128}, - {Sign::POS, -142, 0xa000c801'4d57c65a'375f8195'cc8b1d29_u128}, + {Sign::NEG, -142, MType({0x374b294076d669c3, 0x9fff38014d52e45a})}, + {Sign::NEG, -142, MType({0x7f6f05dcdbeb776e, 0x9dff3cf940fad85a})}, + {Sign::NEG, -142, MType({0x3d55e21d41bbadf9, 0x9bff41e134f1cb36})}, + {Sign::NEG, -142, MType({0xccdba2d54aadbc5c, 0x99ff46b92936bcf4})}, + {Sign::NEG, -142, MType({0x71dd16d3073f79b2, 0x97ff4b811dc8ad9d})}, + {Sign::NEG, -142, MType({0x5837f3df1a58dd48, 0x95ff503912a69d37})}, + {Sign::NEG, -142, MType({0x93cad3bcdd26fd6d, 0x93ff54e107cf8bc9})}, + {Sign::NEG, -142, MType({0x2075312a827f14fa, 0x91ff5978fd42795b})}, + {Sign::NEG, -142, MType({0xe21764e139c98f60, 0x8fff5e00f2fe65f2})}, + {Sign::NEG, -142, MType({0xa492a29551751b4c, 0x8dff6278e9025197})}, + {Sign::NEG, -142, MType({0x1bc8f5f658f1c3a2, 0x8bff66e0df4d3c50})}, + {Sign::NEG, -142, MType({0xe39d3faf42340ed7, 0x89ff6b38d5de2622})}, + {Sign::NEG, -142, MType({0x7ff3326682c02485, 0x87ff6f80ccb40f16})}, + {Sign::NEG, -142, MType({0x5caf4fbe343cf928, 0x85ff73b8c3cdf731})}, + {Sign::NEG, -142, MType({0xcdb6e554348f7fe8, 0x83ff77e0bb2ade79})}, + {Sign::NEG, -142, MType({0xef009c2457de25d, 0x81ff7bf8b2c9c4f6})}, + {Sign::NEG, -143, MType({0x8883333c57b57c74, 0xffff000155535558})}, + {Sign::NEG, -143, MType({0xf32668f39c70d183, 0xfbff07f145931f44})}, + {Sign::NEG, -143, MType({0x459a73c6a6486fe3, 0xf7ff0fc13650e7bd})}, + {Sign::NEG, -143, MType({0x37b18cca7dd3a29f, 0xf3ff1771278aaecd})}, + {Sign::NEG, -143, MType({0x513f610d21bcfc78, 0xefff1f01193e7480})}, + {Sign::NEG, -143, MType({0xea190b95c0690b7b, 0xebff26710b6a38e1})}, + {Sign::NEG, -143, MType({0x2a150f64f0ad1743, 0xe7ff2dc0fe0bfbfd})}, + {Sign::NEG, -143, MType({0x90b5174e995e9d1, 0xe3ff34f0f121bddd})}, + {Sign::NEG, -143, MType({0x4ed512b9b93ea2bf, 0xdfff3c00e4a97e8c})}, + {Sign::NEG, -143, MType({0x934cea217ab794a2, 0xdbff42f0d8a13e15})}, + {Sign::NEG, -143, MType({0x3e4ebe948afd2c76, 0xd7ff49c0cd06fc83})}, + {Sign::NEG, -143, MType({0x87b7c0f5bcfee2e1, 0xd3ff5070c1d8b9df})}, + {Sign::NEG, -143, MType({0x776666228cb6371b, 0xcfff5700b7147634})}, + {Sign::NEG, -143, MType({0xe53a60f3514db358, 0xcbff5d70acb8318b})}, + {Sign::NEG, -143, MType({0x79149c3b6e57fa86, 0xc7ff63c0a2c1ebef})}, + {Sign::NEG, -143, MType({0xaad734c98416df2a, 0xc3ff69f0992fa568})}, + {Sign::NEG, -143, MType({0xc26573679ed28334, 0xbfff70008fff5e00})}, + {Sign::NEG, -143, MType({0xd7a3c6db6540809f, 0xbbff75f0872f15c0})}, + {Sign::NEG, -143, MType({0xd277bde645fb1aad, 0xb7ff7bc07ebcccb1})}, + {Sign::NEG, -143, MType({0x6ac80145a4087793, 0xb3ff817076a682dc})}, + {Sign::NEG, -143, MType({0x287c4db30271e265, 0xafff87006eea3849})}, + {Sign::NEG, -143, MType({0x637d6de42eeb151e, 0xabff8c706785ed00})}, + {Sign::NEG, -143, MType({0x43b5348b6b898a8c, 0xa7ff91c06077a10a})}, + {Sign::NEG, -143, MType({0xc10e7657978bd7f6, 0xa3ff96f059bd546e})}, + {Sign::NEG, -143, MType({0xa37503f457310e59, 0x9fff9c0053550735})}, + {Sign::NEG, -143, MType({0x82d5a40a3aa022ff, 0x9bffa0f04d3cb966})}, + {Sign::NEG, -143, MType({0xc71e0d3ee3df5f4d, 0x97ffa5c047726b08})}, + {Sign::NEG, -143, MType({0xa83ce0352bdbd79b, 0x93ffaa7041f41c23})}, + {Sign::NEG, -143, MType({0x2e21a18d4680e8e4, 0x8fffaf003cbfccbe})}, + {Sign::NEG, -143, MType({0x30bcb3e4e5dfbd28, 0x8bffb37037d37cdf})}, + {Sign::NEG, -143, MType({0x57ff51d75c66d64a, 0x87ffb7c0332d2c8d})}, + {Sign::NEG, -143, MType({0x1bdb87fdbe299f43, 0x83ffbbf02ecadbcf})}, + {Sign::NEG, -144, MType({0x88885dde02700703, 0xffff800055551555})}, + {Sign::NEG, -144, MType({0xd259ca803a0c1870, 0xf7ff87e04d94724c})}, + {Sign::NEG, -144, MType({0xe514130851c7070a, 0xefff8f80464fce8f})}, + {Sign::NEG, -144, MType({0x30a16898f3073a64, 0xe7ff96e03f832a2a})}, + {Sign::NEG, -144, MType({0xc4ed64517b2949ce, 0xdfff9e00392a8526})}, + {Sign::NEG, -144, MType({0x51e4fb4e32cf6350, 0xd7ffa4e03341df90})}, + {Sign::NEG, -144, MType({0x277672a88350bcce, 0xcfffab802dc53971})}, + {Sign::NEG, -144, MType({0x359153772a490f06, 0xc7ffb1e028b092d3})}, + {Sign::NEG, -144, MType({0xc265ece6b481a0e, 0xbfffb80023ffebc0})}, + {Sign::NEG, -144, MType({0xdb2781c03fa132f6, 0xb7ffbde01faf4440})}, + {Sign::NEG, -144, MType({0x7287c95c845ada33, 0xafffc3801bba9c5e})}, + {Sign::NEG, -144, MType({0x423b56b1263e5a77, 0xa7ffc8e0181df421})}, + {Sign::NEG, -144, MType({0x5a3752ca4c076fa3, 0x9fffce0014d54b91})}, + {Sign::NEG, -144, MType({0x6a71e2b27eb3f573, 0x97ffd2e011dca2b6})}, + {Sign::NEG, -144, MType({0xc2e21b72cff39d8f, 0x8fffd7800f2ff997})}, + {Sign::NEG, -144, MType({0x537ff612feb7ac9e, 0x87ffdbe00ccb503c})}, + {Sign::NEG, -145, MType({0x5888873333c57c18, 0xffffc00015554d55})}, + {Sign::NEG, -145, MType({0xfa51421842311c42, 0xefffc7c01193f9d1})}, + {Sign::NEG, -145, MType({0x2c4ed6de475b942c, 0xdfffcf000e4aa5fa})}, + {Sign::NEG, -145, MType({0xce77678cbb6fcb88, 0xcfffd5c00b7151d8})}, + {Sign::NEG, -145, MType({0xc26629a679ed3b, 0xbfffdc0008fffd78})}, + {Sign::NEG, -145, MType({0x23287cb9d3072728, 0xafffe1c006eea8e1})}, + {Sign::NEG, -145, MType({0xd5a37540fd057315, 0x9fffe7000535541c})}, + {Sign::NEG, -145, MType({0xf82e21c1fce36810, 0x8fffebc003cbff32})}, + {Sign::NEG, -146, MType({0x5588887ddde02702, 0xffffe00005555455})}, + {Sign::NEG, -146, MType({0x9ac4ed72adf5b295, 0xdfffe7800392aa14})}, + {Sign::NEG, -146, MType({0xc26648066b482, 0xbfffee00023fffaf})}, + {Sign::NEG, -146, MType({0x455a3754b292c077, 0x9ffff380014d552e})}, + {Sign::NEG, -147, MType({0x5558888833333c58, 0xfffff00001555535})}, + {Sign::NEG, -147, MType({0xe000c2665736679f, 0xbffff700008ffff5})}, + {Sign::NEG, -148, MType({0x5555888885ddde02, 0xfffff80000555551})}, + {Sign::NEG, -149, MType({0xd555588888733334, 0xfffffc0000155554})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -148, MType({0xeaaaac44444eeeef, 0x80000200000aaaaa})}, + {Sign::POS, -147, MType({0xaaaac444459999ac, 0x80000400002aaaac})}, + {Sign::POS, -147, MType({0x2000c2667596679f, 0xc00009000090000a})}, + {Sign::POS, -146, MType({0xaaac44446eeef381, 0x8000080000aaaaba})}, + {Sign::POS, -146, MType({0x655a3755f81815cc, 0xa0000c80014d557c})}, + {Sign::POS, -146, MType({0xc26684c66b482, 0xc000120002400051})}, + {Sign::POS, -146, MType({0xbac4ed7c40fb07eb, 0xe00018800392ab40})}, + {Sign::POS, -145, MType({0xaac44449999abe2c, 0x8000100002aaab2a})}, + {Sign::POS, -145, MType({0x82e21d79cbb6812, 0x9000144003cc00cd})}, + {Sign::POS, -145, MType({0xd5a37569adb01dc3, 0xa00019000535568d})}, + {Sign::POS, -145, MType({0x33287d01e8c9d1d9, 0xb0001e4006eeac74})}, + {Sign::POS, -145, MType({0xc266a32679ed48, 0xc000240009000288})}, + {Sign::POS, -145, MType({0xde77685122b2764b, 0xd0002a400b7158d1})}, + {Sign::POS, -145, MType({0x2c4ed810a8063f03, 0xe00031000e4aaf5b})}, + {Sign::POS, -145, MType({0xa5143e7be891c8f, 0xf00038401194062e})}, + {Sign::POS, -144, MType({0xac4444eeef3813a1, 0x800020000aaaaeaa})}, + {Sign::POS, -144, MType({0x5b7ff7fe1339025b, 0x880024200ccb5a6e})}, + {Sign::POS, -144, MType({0x42e21e26caf39e33, 0x900028800f300668})}, + {Sign::POS, -144, MType({0xf271e66fa5554bc6, 0x98002d2011dcb29e})}, + {Sign::POS, -144, MType({0x5a3757e0615cc676, 0xa000320014d55f19})}, + {Sign::POS, -144, MType({0xca3b5d8210ca5cab, 0xa8003720181e0bde})}, + {Sign::POS, -144, MType({0xf287d25f3cb032bb, 0xb0003c801bbab8f6})}, + {Sign::POS, -144, MType({0xe3278d840be28cdb, 0xb80042201faf6669})}, + {Sign::POS, -144, MType({0xc266dfe6b482076, 0xc000480024001440})}, + {Sign::POS, -144, MType({0x3d9166de380a6d3d, 0xc8004e2028b0c282})}, + {Sign::POS, -144, MType({0xa7768b356ba61e4b, 0xd00054802dc57139})}, + {Sign::POS, -144, MType({0xd9e51a1849db73c1, 0xd8005b203342206f})}, + {Sign::POS, -144, MType({0xc4ed8a9d907eb521, 0xe0006200392ad02e})}, + {Sign::POS, -144, MType({0xb8a197dea928acd7, 0xe80069203f838080})}, + {Sign::POS, -144, MType({0x65144cf7dcc72d3b, 0xf000708046503170})}, + {Sign::POS, -144, MType({0xda5a1108890d9f6a, 0xf80078204d94e308})}, + {Sign::POS, -143, MType({0xc4445999abe2ce2c, 0x800040002aaacaaa})}, + {Sign::POS, -143, MType({0x1fdbbb4f3bffc832, 0x840044102ecb2431})}, + {Sign::POS, -143, MType({0x97ff8f39ec91b4ee, 0x88004840332d7e1d})}, + {Sign::POS, -143, MType({0x74bcfcf0b3f0a95d, 0x8c004c9037d3d876})}, + {Sign::POS, -143, MType({0x2e21f80ca6813aff, 0x900051003cc03342})}, + {Sign::POS, -143, MType({0x6c3d4629170ce87f, 0x9400559041f48e87})}, + {Sign::POS, -143, MType({0x71e84e3b80a8881, 0x98005a404772ea4d})}, + {Sign::POS, -143, MType({0x6d62fdcbdd6bec3, 0x9c005f104d3d469a})}, + {Sign::POS, -143, MType({0xa375a6b701dc77c0, 0xa00064005355a375})}, + {Sign::POS, -143, MType({0x450f331826ad6b05, 0xa400691059be00e7})}, + {Sign::POS, -143, MType({0x83b60ea8bd0aa459, 0xa8006e4060785ef6})}, + {Sign::POS, -143, MType({0x277e691469dd13f5, 0xac0073906786bdab})}, + {Sign::POS, -143, MType({0x287d6e0a0d1e25eb, 0xb00079006eeb1d0d})}, + {Sign::POS, -143, MType({0xaec94b3be9b060f5, 0xb4007e9076a77d24})}, + {Sign::POS, -143, MType({0x1279365fce280cce, 0xb80084407ebdddfa})}, + {Sign::POS, -143, MType({0xdba5732f3e83e04a, 0xbc008a1087303f95})}, + {Sign::POS, -143, MType({0xc26759679ed5b754, 0xc00090009000a200})}, + {Sign::POS, -143, MType({0xaed95aca5edb5109, 0xc400961099310543})}, + {Sign::POS, -143, MType({0xb917091d2687160f, 0xc8009c40a2c36967})}, + {Sign::POS, -143, MType({0x293d1c2a0378e75d, 0xcc00a290acb9ce76})}, + {Sign::POS, -143, MType({0x776977bf9766f5a7, 0xd000a900b7163478})}, + {Sign::POS, -143, MType({0x4bbb31b14776a18b, 0xd400af90c1da9b78})}, + {Sign::POS, -143, MType({0x7e5297d76c8564ba, 0xd800b640cd09037f})}, + {Sign::POS, -143, MType({0x1751360f8461c447, 0xdc00bd10d8a36c98})}, + {Sign::POS, -143, MType({0x4ed9dc3c63f44c41, 0xe000c400e4abd6cc})}, + {Sign::POS, -143, MType({0x8d10a4466a5894d5, 0xe400cb10f1244226})}, + {Sign::POS, -143, MType({0x6a1af81bb4e6510e, 0xe800d240fe0eaeb1})}, + {Sign::POS, -143, MType({0xae1f97b0542a677a, 0xec00d9910b6d1c77})}, + {Sign::POS, -143, MType({0x51469efe81d014cc, 0xf000e10119418b84})}, + {Sign::POS, -143, MType({0x7bb98c06d77a18b4, 0xf400e891278dfbe2})}, + {Sign::POS, -143, MType({0x85a344d0868bed17, 0xf800f04136546d9d})}, + {Sign::POS, -143, MType({0xf7301d6990e307cc, 0xfc00f8114596e0c0})}, + {Sign::POS, -142, MType({0x4446eef38140138f, 0x80008000aaabaaac})}, + {Sign::POS, -142, MType({0x10f5e43296105497, 0x82008408b2cbe5b8})}, + {Sign::POS, -142, MType({0xedbd4f83ef63f730, 0x84008820bb2d2189})}, + {Sign::POS, -142, MType({0xfeb654fd541c638e, 0x86008c48c3d05e27})}, + {Sign::POS, -142, MType({0x7ffadeb8882f7674, 0x88009080ccb69b98})}, + {Sign::POS, -142, MType({0xc5a59fd36bd44397, 0x8a0094c8d5e0d9e1})}, + {Sign::POS, -142, MType({0x3bd217701b27dddb, 0x8c009920df50190a})}, + {Sign::POS, -142, MType({0x669c93b50e4a2595, 0x8e009d88e9055918})}, + {Sign::POS, -142, MType({0xe22234cd39f29cd4, 0x9000a200f3019a12})}, + {Sign::POS, -142, MType({0x6280efe8307d41d9, 0x9200a688fd45dc00})}, + {Sign::POS, -142, MType({0xb3d7923a436f6fc4, 0x9400ab2107d31ee7})}, + {Sign::POS, -142, MType({0xba45c3fca574c5a0, 0x9600afc912aa62cf})}, + {Sign::POS, -142, MType({0x71ec0b6d8cd413d1, 0x9800b4811dcca7bf})}, + {Sign::POS, -142, MType({0xeeebcfd0565c5006, 0x9a00b949293aedbd})}, + {Sign::POS, -142, MType({0x5d675c6da8c98fc3, 0x9c00be2134f634d2})}, + {Sign::POS, -142, MType({0x181e39398a2099a, 0x9e00c30940ff7d04})}, + {Sign::POS, -142, MType({0x375f8195cc8b1d29, 0xa000c8014d57c65a})}, }, // -log(r) for the fourth step, generated by SageMath with: @@ -563,139 +560,139 @@ alignas(64) const LogRR LOG_TABLE = { // r = 2^-28 * round( 2^28 / (1 + i*2^(-28)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", - // format_hex(m), "},"); + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_4 = */ { - {Sign::NEG, -149, 0x81fffef7'f002cb2b'4cd24d68'ff2f11ae_u128}, - {Sign::NEG, -150, 0xfffffe00'00055555'45555588'8887ddde_u128}, - {Sign::NEG, -150, 0xfbfffe0f'e0051653'f0fa101f'52b3971f_u128}, - {Sign::NEG, -150, 0xf7fffe1f'8004d94a'9c9329d6'59ed3734_u128}, - {Sign::NEG, -150, 0xf3fffe2e'e0049e31'4821006d'9b58462e_u128}, - {Sign::NEG, -150, 0xeffffe3e'000464ff'f3a3f025'142f8c21_u128}, - {Sign::NEG, -150, 0xebfffe4c'e0042dae'9f1c53bc'c1c4b11c_u128}, - {Sign::NEG, -150, 0xe7fffe5b'8003f835'4a8a8474'a17fdd30_u128}, - {Sign::NEG, -150, 0xe3fffe69'e003c48b'f5eeda0c'b0df586d_u128}, - {Sign::NEG, -150, 0xdffffe78'000392aa'a149aac4'ed772adf_u128}, - {Sign::NEG, -150, 0xdbfffe85'e0036289'4c9b4b5d'54f0bc96_u128}, - {Sign::NEG, -150, 0xd7fffe93'8003341f'f7e40f15'e50a759f_u128}, - {Sign::NEG, -150, 0xd3fffea0'e0030766'a32447ae'9b975e05_u128}, - {Sign::NEG, -150, 0xcffffeae'0002dc55'4e5c4567'767ebdd5_u128}, - {Sign::NEG, -150, 0xcbfffeba'e002b2e3'f98c5700'73bbbd19_u128}, - {Sign::NEG, -150, 0xc7fffec7'80028b0a'a4b4c9b9'915d03dd_u128}, - {Sign::NEG, -150, 0xc3fffed3'e00264c1'4fd5e952'cd845a28_u128}, - {Sign::NEG, -150, 0xbffffee0'00023fff'faf0000c'26664806_u128}, - {Sign::NEG, -150, 0xbbfffeeb'e0021cbe'a60356a5'9a49b57f_u128}, - {Sign::NEG, -150, 0xb7fffef7'8001faf5'5110345f'27878a9b_u128}, - {Sign::NEG, -150, 0xb3ffff02'e001da9b'fc16def8'cc8a4f61_u128}, - {Sign::NEG, -150, 0xafffff0e'0001bbaa'a7179ab2'87cdcbd8_u128}, - {Sign::NEG, -150, 0xabffff18'e0019e19'5212aa4c'57dea809_u128}, - {Sign::NEG, -150, 0xa7ffff23'800181df'fd084f06'3b5a0bf8_u128}, - {Sign::NEG, -150, 0xa3ffff2d'e00166f6'a7f8c8a0'30ed3fab_u128}, - {Sign::NEG, -150, 0x9fffff38'00014d55'52e4555a'37554b29_u128}, - {Sign::NEG, -150, 0x9bffff41'e00134f3'fdcb31f4'4d5e9676_u128}, - {Sign::NEG, -150, 0x97ffff4b'80011dca'a8ad99ae'71e48997_u128}, - {Sign::NEG, -150, 0x93ffff54'e00107d1'538bc648'a3d12c90_u128}, - {Sign::NEG, -150, 0x8fffff5e'0000f2ff'fe65f002'e21cc765_u128}, - {Sign::NEG, -150, 0x8bffff66'e000df4e'a93c4d9d'2bcd821a_u128}, - {Sign::NEG, -150, 0x87ffff6f'8000ccb5'540f1457'7ff704b2_u128}, - {Sign::NEG, -150, 0x83ffff77'e000bb2b'fede77f1'ddba1731_u128}, - {Sign::NEG, -151, 0xffffff00'00015555'53555558'88888333_u128}, - {Sign::NEG, -151, 0xf7ffff0f'c0013652'a8e7ba8d'659ed7dc_u128}, - {Sign::NEG, -151, 0xefffff1f'0001193f'fe747e02'5142fc61_u128}, - {Sign::NEG, -151, 0xe7ffff2d'c000fe0d'53fbfb37'4a1800c7_u128}, - {Sign::NEG, -151, 0xdfffff3c'0000e4aa'a97e8aac'4ed77513_u128}, - {Sign::NEG, -151, 0xd7ffff49'c000cd07'fefc81e1'5e50a947_u128}, - {Sign::NEG, -151, 0xcfffff57'0000b715'54763356'7767ed66_u128}, - {Sign::NEG, -151, 0xc7ffff63'c000a2c2'a9ebee8b'9915d174_u128}, - {Sign::NEG, -151, 0xbfffff70'00008fff'ff5e0000'c2666573_u128}, - {Sign::NEG, -151, 0xb7ffff7b'c0007ebd'54ccb135'f2787966_u128}, - {Sign::NEG, -151, 0xafffff87'00006eea'aa3848ab'287cdd4e_u128}, - {Sign::NEG, -151, 0xa7ffff91'c0006077'ffa109e0'63b5a12d_u128}, - {Sign::NEG, -151, 0x9fffff9c'00005355'55073555'a3755504_u128}, - {Sign::NEG, -151, 0x97ffffa5'c0004772'aa6b088a'e71e48d5_u128}, - {Sign::NEG, -151, 0x8fffffaf'00003cbf'ffccbe00'2e21cca2_u128}, - {Sign::NEG, -151, 0x87ffffb7'c000332d'552c8d35'77ff706a_u128}, - {Sign::NEG, -152, 0xffffff80'00005555'55155555'8888885e_u128}, - {Sign::NEG, -152, 0xefffff8f'8000464f'ffce8fc0'25142fe3_u128}, - {Sign::NEG, -152, 0xdfffff9e'0000392a'aa8526aa'c4ed7764_u128}, - {Sign::NEG, -152, 0xcfffffab'80002dc5'55397115'67767ee3_u128}, - {Sign::NEG, -152, 0xbfffffb8'000023ff'ffebc000'0c26665f_u128}, - {Sign::NEG, -152, 0xafffffc3'80001bba'aa9c5e6a'b287cdd9_u128}, - {Sign::NEG, -152, 0x9fffffce'000014d5'554b9155'5a375553_u128}, - {Sign::NEG, -152, 0x8fffffd7'80000f2f'fff997c0'02e21ccb_u128}, - {Sign::NEG, -153, 0xffffffc0'00001555'554d5555'58888887_u128}, - {Sign::NEG, -153, 0xdfffffcf'00000e4a'aaa5fa2a'ac4ed777_u128}, - {Sign::NEG, -153, 0xbfffffdc'000008ff'fffd7800'00c26666_u128}, - {Sign::NEG, -153, 0x9fffffe7'00000535'55541cd5'55a37555_u128}, - {Sign::NEG, -154, 0xffffffe0'00000555'55545555'55888888_u128}, - {Sign::NEG, -154, 0xbfffffee'0000023f'ffffaf00'000c2666_u128}, - {Sign::NEG, -155, 0xfffffff0'00000155'55553555'55588889_u128}, - {Sign::NEG, -156, 0xfffffff8'00000055'55555155'55558889_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -155, 0x80000004'0000002a'aaaaacaa'aaaac444_u128}, - {Sign::POS, -154, 0x80000008'000000aa'aaaabaaa'aaac4444_u128}, - {Sign::POS, -154, 0xc0000012'00000240'00005100'000c2666_u128}, - {Sign::POS, -153, 0x80000010'000002aa'aaab2aaa'aac44444_u128}, - {Sign::POS, -153, 0xa0000019'00000535'55568dd5'55a37555_u128}, - {Sign::POS, -153, 0xc0000024'00000900'00028800'00c26667_u128}, - {Sign::POS, -153, 0xe0000031'00000e4a'aaaf5b2a'ac4ed778_u128}, - {Sign::POS, -152, 0x80000020'00000aaa'aaaeaaaa'ac444445_u128}, - {Sign::POS, -152, 0x90000028'80000f30'00066840'02e21cce_u128}, - {Sign::POS, -152, 0xa0000032'000014d5'555f1955'5a375558_u128}, - {Sign::POS, -152, 0xb000003c'80001bba'aab8f6ea'b287cde2_u128}, - {Sign::POS, -152, 0xc0000048'00002400'00144000'0c26666e_u128}, - {Sign::POS, -152, 0xd0000054'80002dc5'55713995'67767efb_u128}, - {Sign::POS, -152, 0xe0000062'0000392a'aad02eaa'c4ed778b_u128}, - {Sign::POS, -152, 0xf0000070'80004650'00317040'2514301d_u128}, - {Sign::POS, -151, 0x80000040'00002aaa'aacaaaaa'c444445a_u128}, - {Sign::POS, -151, 0x88000048'4000332d'557e1d75'77ff70a7_u128}, - {Sign::POS, -151, 0x90000051'00003cc0'00334200'2e21ccf8_u128}, - {Sign::POS, -151, 0x9800005a'40004772'aaea4cca'e71e494d_u128}, - {Sign::POS, -151, 0xa0000064'00005355'55a37555'a37555a7_u128}, - {Sign::POS, -151, 0xa800006e'40006078'005ef620'63b5a207_u128}, - {Sign::POS, -151, 0xb0000079'00006eea'ab1d0cab'287cde6e_u128}, - {Sign::POS, -151, 0xb8000084'40007ebd'55ddf975'f2787ade_u128}, - {Sign::POS, -151, 0xc0000090'00009000'00a20000'c2666759_u128}, - {Sign::POS, -151, 0xc800009c'4000a2c2'ab6966cb'9915d3e1_u128}, - {Sign::POS, -151, 0xd00000a9'0000b715'56347756'7767f078_u128}, - {Sign::POS, -151, 0xd80000b6'4000cd08'01037e21'5e50ad20_u128}, - {Sign::POS, -151, 0xe00000c4'0000e4aa'abd6caac'4ed779dc_u128}, - {Sign::POS, -151, 0xe80000d2'4000fe0d'56aeaf77'4a1806b0_u128}, - {Sign::POS, -151, 0xf00000e1'00011940'018b8202'5143039f_u128}, - {Sign::POS, -151, 0xf80000f0'40013652'ac6d9acd'659ee0ad_u128}, - {Sign::POS, -150, 0x80000080'0000aaaa'abaaaaac'444446ef_u128}, - {Sign::POS, -150, 0x84000088'2000bb2c'01218811'ddba1d9b_u128}, - {Sign::POS, -150, 0x88000090'8000ccb5'569b9657'7ff70c5f_u128}, - {Sign::POS, -150, 0x8c000099'2000df4e'ac1907bd'2bcd8b3b_u128}, - {Sign::POS, -150, 0x900000a2'0000f300'019a1002'e21cd235_u128}, - {Sign::POS, -150, 0x940000ab'200107d1'571ee468'a3d1394e_u128}, - {Sign::POS, -150, 0x980000b4'80011dca'aca7bbae'71e4988b_u128}, - {Sign::POS, -150, 0x9c0000be'200134f4'0234ce14'4d5ea7f0_u128}, - {Sign::POS, -150, 0xa00000c8'00014d55'57c6555a'37555f82_u128}, - {Sign::POS, -150, 0xa40000d2'200166f6'ad5c8cc0'30ed5744_u128}, - {Sign::POS, -150, 0xa80000dc'800181e0'02f7b106'3b5a273b_u128}, - {Sign::POS, -150, 0xac0000e7'20019e19'5898006c'57dec76f_u128}, - {Sign::POS, -150, 0xb00000f2'0001bbaa'ae3dbab2'87cdefe3_u128}, - {Sign::POS, -150, 0xb40000fd'2001da9c'03e92118'cc8a789f_u128}, - {Sign::POS, -150, 0xb8000108'8001faf5'599a765f'2787b9aa_u128}, - {Sign::POS, -150, 0xbc000114'20021cbe'af51fec5'9a49eb0a_u128}, - {Sign::POS, -150, 0xc0000120'00024000'0510000c'266684c6_u128}, - {Sign::POS, -150, 0xc400012c'200264c1'5ad4c172'cd849ee9_u128}, - {Sign::POS, -150, 0xc8000138'80028b0a'b0a08bb9'915d5179_u128}, - {Sign::POS, -150, 0xcc000145'2002b2e4'0673a920'73bc1480_u128}, - {Sign::POS, -150, 0xd0000152'0002dc55'5c4e6567'767f2009_u128}, - {Sign::POS, -150, 0xd400015f'20030766'b2310dce'9b97cc1d_u128}, - {Sign::POS, -150, 0xd800016c'80033420'081bf115'e50af0c7_u128}, - {Sign::POS, -150, 0xdc00017a'20036289'5e0f5f7d'54f14614_u128}, - {Sign::POS, -150, 0xe0000188'000392aa'b40baac4'ed77c410_u128}, - {Sign::POS, -150, 0xe4000196'2003c48c'0a11262c'b0e002c7_u128}, - {Sign::POS, -150, 0xe80001a4'8003f835'60202674'a1809a47_u128}, - {Sign::POS, -150, 0xec0001b3'20042dae'b63901dc'c1c582a0_u128}, - {Sign::POS, -150, 0xf00001c2'00046500'0c5c1025'143073df_u128}, - {Sign::POS, -150, 0xf40001d1'20049e31'6289aa8d'9b594616_u128}, - {Sign::POS, -150, 0xf80001e0'8004d94a'b8c22bd6'59ee5155_u128}, - {Sign::POS, -150, 0xfc0001f0'20051654'0f05f03f'52b4cdae_u128}, - {Sign::POS, -149, 0x80000100'0002aaaa'b2aaaac4'4444999a_u128}, + {Sign::NEG, -149, MType({0x4cd24d68ff2f11ae, 0x81fffef7f002cb2b})}, + {Sign::NEG, -150, MType({0x455555888887ddde, 0xfffffe0000055555})}, + {Sign::NEG, -150, MType({0xf0fa101f52b3971f, 0xfbfffe0fe0051653})}, + {Sign::NEG, -150, MType({0x9c9329d659ed3734, 0xf7fffe1f8004d94a})}, + {Sign::NEG, -150, MType({0x4821006d9b58462e, 0xf3fffe2ee0049e31})}, + {Sign::NEG, -150, MType({0xf3a3f025142f8c21, 0xeffffe3e000464ff})}, + {Sign::NEG, -150, MType({0x9f1c53bcc1c4b11c, 0xebfffe4ce0042dae})}, + {Sign::NEG, -150, MType({0x4a8a8474a17fdd30, 0xe7fffe5b8003f835})}, + {Sign::NEG, -150, MType({0xf5eeda0cb0df586d, 0xe3fffe69e003c48b})}, + {Sign::NEG, -150, MType({0xa149aac4ed772adf, 0xdffffe78000392aa})}, + {Sign::NEG, -150, MType({0x4c9b4b5d54f0bc96, 0xdbfffe85e0036289})}, + {Sign::NEG, -150, MType({0xf7e40f15e50a759f, 0xd7fffe938003341f})}, + {Sign::NEG, -150, MType({0xa32447ae9b975e05, 0xd3fffea0e0030766})}, + {Sign::NEG, -150, MType({0x4e5c4567767ebdd5, 0xcffffeae0002dc55})}, + {Sign::NEG, -150, MType({0xf98c570073bbbd19, 0xcbfffebae002b2e3})}, + {Sign::NEG, -150, MType({0xa4b4c9b9915d03dd, 0xc7fffec780028b0a})}, + {Sign::NEG, -150, MType({0x4fd5e952cd845a28, 0xc3fffed3e00264c1})}, + {Sign::NEG, -150, MType({0xfaf0000c26664806, 0xbffffee000023fff})}, + {Sign::NEG, -150, MType({0xa60356a59a49b57f, 0xbbfffeebe0021cbe})}, + {Sign::NEG, -150, MType({0x5110345f27878a9b, 0xb7fffef78001faf5})}, + {Sign::NEG, -150, MType({0xfc16def8cc8a4f61, 0xb3ffff02e001da9b})}, + {Sign::NEG, -150, MType({0xa7179ab287cdcbd8, 0xafffff0e0001bbaa})}, + {Sign::NEG, -150, MType({0x5212aa4c57dea809, 0xabffff18e0019e19})}, + {Sign::NEG, -150, MType({0xfd084f063b5a0bf8, 0xa7ffff23800181df})}, + {Sign::NEG, -150, MType({0xa7f8c8a030ed3fab, 0xa3ffff2de00166f6})}, + {Sign::NEG, -150, MType({0x52e4555a37554b29, 0x9fffff3800014d55})}, + {Sign::NEG, -150, MType({0xfdcb31f44d5e9676, 0x9bffff41e00134f3})}, + {Sign::NEG, -150, MType({0xa8ad99ae71e48997, 0x97ffff4b80011dca})}, + {Sign::NEG, -150, MType({0x538bc648a3d12c90, 0x93ffff54e00107d1})}, + {Sign::NEG, -150, MType({0xfe65f002e21cc765, 0x8fffff5e0000f2ff})}, + {Sign::NEG, -150, MType({0xa93c4d9d2bcd821a, 0x8bffff66e000df4e})}, + {Sign::NEG, -150, MType({0x540f14577ff704b2, 0x87ffff6f8000ccb5})}, + {Sign::NEG, -150, MType({0xfede77f1ddba1731, 0x83ffff77e000bb2b})}, + {Sign::NEG, -151, MType({0x5355555888888333, 0xffffff0000015555})}, + {Sign::NEG, -151, MType({0xa8e7ba8d659ed7dc, 0xf7ffff0fc0013652})}, + {Sign::NEG, -151, MType({0xfe747e025142fc61, 0xefffff1f0001193f})}, + {Sign::NEG, -151, MType({0x53fbfb374a1800c7, 0xe7ffff2dc000fe0d})}, + {Sign::NEG, -151, MType({0xa97e8aac4ed77513, 0xdfffff3c0000e4aa})}, + {Sign::NEG, -151, MType({0xfefc81e15e50a947, 0xd7ffff49c000cd07})}, + {Sign::NEG, -151, MType({0x547633567767ed66, 0xcfffff570000b715})}, + {Sign::NEG, -151, MType({0xa9ebee8b9915d174, 0xc7ffff63c000a2c2})}, + {Sign::NEG, -151, MType({0xff5e0000c2666573, 0xbfffff7000008fff})}, + {Sign::NEG, -151, MType({0x54ccb135f2787966, 0xb7ffff7bc0007ebd})}, + {Sign::NEG, -151, MType({0xaa3848ab287cdd4e, 0xafffff8700006eea})}, + {Sign::NEG, -151, MType({0xffa109e063b5a12d, 0xa7ffff91c0006077})}, + {Sign::NEG, -151, MType({0x55073555a3755504, 0x9fffff9c00005355})}, + {Sign::NEG, -151, MType({0xaa6b088ae71e48d5, 0x97ffffa5c0004772})}, + {Sign::NEG, -151, MType({0xffccbe002e21cca2, 0x8fffffaf00003cbf})}, + {Sign::NEG, -151, MType({0x552c8d3577ff706a, 0x87ffffb7c000332d})}, + {Sign::NEG, -152, MType({0x551555558888885e, 0xffffff8000005555})}, + {Sign::NEG, -152, MType({0xffce8fc025142fe3, 0xefffff8f8000464f})}, + {Sign::NEG, -152, MType({0xaa8526aac4ed7764, 0xdfffff9e0000392a})}, + {Sign::NEG, -152, MType({0x5539711567767ee3, 0xcfffffab80002dc5})}, + {Sign::NEG, -152, MType({0xffebc0000c26665f, 0xbfffffb8000023ff})}, + {Sign::NEG, -152, MType({0xaa9c5e6ab287cdd9, 0xafffffc380001bba})}, + {Sign::NEG, -152, MType({0x554b91555a375553, 0x9fffffce000014d5})}, + {Sign::NEG, -152, MType({0xfff997c002e21ccb, 0x8fffffd780000f2f})}, + {Sign::NEG, -153, MType({0x554d555558888887, 0xffffffc000001555})}, + {Sign::NEG, -153, MType({0xaaa5fa2aac4ed777, 0xdfffffcf00000e4a})}, + {Sign::NEG, -153, MType({0xfffd780000c26666, 0xbfffffdc000008ff})}, + {Sign::NEG, -153, MType({0x55541cd555a37555, 0x9fffffe700000535})}, + {Sign::NEG, -154, MType({0x5554555555888888, 0xffffffe000000555})}, + {Sign::NEG, -154, MType({0xffffaf00000c2666, 0xbfffffee0000023f})}, + {Sign::NEG, -155, MType({0x5555355555588889, 0xfffffff000000155})}, + {Sign::NEG, -156, MType({0x5555515555558889, 0xfffffff800000055})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -155, MType({0xaaaaacaaaaaac444, 0x800000040000002a})}, + {Sign::POS, -154, MType({0xaaaabaaaaaac4444, 0x80000008000000aa})}, + {Sign::POS, -154, MType({0x5100000c2666, 0xc000001200000240})}, + {Sign::POS, -153, MType({0xaaab2aaaaac44444, 0x80000010000002aa})}, + {Sign::POS, -153, MType({0x55568dd555a37555, 0xa000001900000535})}, + {Sign::POS, -153, MType({0x2880000c26667, 0xc000002400000900})}, + {Sign::POS, -153, MType({0xaaaf5b2aac4ed778, 0xe000003100000e4a})}, + {Sign::POS, -152, MType({0xaaaeaaaaac444445, 0x8000002000000aaa})}, + {Sign::POS, -152, MType({0x6684002e21cce, 0x9000002880000f30})}, + {Sign::POS, -152, MType({0x555f19555a375558, 0xa0000032000014d5})}, + {Sign::POS, -152, MType({0xaab8f6eab287cde2, 0xb000003c80001bba})}, + {Sign::POS, -152, MType({0x1440000c26666e, 0xc000004800002400})}, + {Sign::POS, -152, MType({0x5571399567767efb, 0xd000005480002dc5})}, + {Sign::POS, -152, MType({0xaad02eaac4ed778b, 0xe00000620000392a})}, + {Sign::POS, -152, MType({0x3170402514301d, 0xf000007080004650})}, + {Sign::POS, -151, MType({0xaacaaaaac444445a, 0x8000004000002aaa})}, + {Sign::POS, -151, MType({0x557e1d7577ff70a7, 0x880000484000332d})}, + {Sign::POS, -151, MType({0x3342002e21ccf8, 0x9000005100003cc0})}, + {Sign::POS, -151, MType({0xaaea4ccae71e494d, 0x9800005a40004772})}, + {Sign::POS, -151, MType({0x55a37555a37555a7, 0xa000006400005355})}, + {Sign::POS, -151, MType({0x5ef62063b5a207, 0xa800006e40006078})}, + {Sign::POS, -151, MType({0xab1d0cab287cde6e, 0xb000007900006eea})}, + {Sign::POS, -151, MType({0x55ddf975f2787ade, 0xb800008440007ebd})}, + {Sign::POS, -151, MType({0xa20000c2666759, 0xc000009000009000})}, + {Sign::POS, -151, MType({0xab6966cb9915d3e1, 0xc800009c4000a2c2})}, + {Sign::POS, -151, MType({0x563477567767f078, 0xd00000a90000b715})}, + {Sign::POS, -151, MType({0x1037e215e50ad20, 0xd80000b64000cd08})}, + {Sign::POS, -151, MType({0xabd6caac4ed779dc, 0xe00000c40000e4aa})}, + {Sign::POS, -151, MType({0x56aeaf774a1806b0, 0xe80000d24000fe0d})}, + {Sign::POS, -151, MType({0x18b82025143039f, 0xf00000e100011940})}, + {Sign::POS, -151, MType({0xac6d9acd659ee0ad, 0xf80000f040013652})}, + {Sign::POS, -150, MType({0xabaaaaac444446ef, 0x800000800000aaaa})}, + {Sign::POS, -150, MType({0x1218811ddba1d9b, 0x840000882000bb2c})}, + {Sign::POS, -150, MType({0x569b96577ff70c5f, 0x880000908000ccb5})}, + {Sign::POS, -150, MType({0xac1907bd2bcd8b3b, 0x8c0000992000df4e})}, + {Sign::POS, -150, MType({0x19a1002e21cd235, 0x900000a20000f300})}, + {Sign::POS, -150, MType({0x571ee468a3d1394e, 0x940000ab200107d1})}, + {Sign::POS, -150, MType({0xaca7bbae71e4988b, 0x980000b480011dca})}, + {Sign::POS, -150, MType({0x234ce144d5ea7f0, 0x9c0000be200134f4})}, + {Sign::POS, -150, MType({0x57c6555a37555f82, 0xa00000c800014d55})}, + {Sign::POS, -150, MType({0xad5c8cc030ed5744, 0xa40000d2200166f6})}, + {Sign::POS, -150, MType({0x2f7b1063b5a273b, 0xa80000dc800181e0})}, + {Sign::POS, -150, MType({0x5898006c57dec76f, 0xac0000e720019e19})}, + {Sign::POS, -150, MType({0xae3dbab287cdefe3, 0xb00000f20001bbaa})}, + {Sign::POS, -150, MType({0x3e92118cc8a789f, 0xb40000fd2001da9c})}, + {Sign::POS, -150, MType({0x599a765f2787b9aa, 0xb80001088001faf5})}, + {Sign::POS, -150, MType({0xaf51fec59a49eb0a, 0xbc00011420021cbe})}, + {Sign::POS, -150, MType({0x510000c266684c6, 0xc000012000024000})}, + {Sign::POS, -150, MType({0x5ad4c172cd849ee9, 0xc400012c200264c1})}, + {Sign::POS, -150, MType({0xb0a08bb9915d5179, 0xc800013880028b0a})}, + {Sign::POS, -150, MType({0x673a92073bc1480, 0xcc0001452002b2e4})}, + {Sign::POS, -150, MType({0x5c4e6567767f2009, 0xd00001520002dc55})}, + {Sign::POS, -150, MType({0xb2310dce9b97cc1d, 0xd400015f20030766})}, + {Sign::POS, -150, MType({0x81bf115e50af0c7, 0xd800016c80033420})}, + {Sign::POS, -150, MType({0x5e0f5f7d54f14614, 0xdc00017a20036289})}, + {Sign::POS, -150, MType({0xb40baac4ed77c410, 0xe0000188000392aa})}, + {Sign::POS, -150, MType({0xa11262cb0e002c7, 0xe40001962003c48c})}, + {Sign::POS, -150, MType({0x60202674a1809a47, 0xe80001a48003f835})}, + {Sign::POS, -150, MType({0xb63901dcc1c582a0, 0xec0001b320042dae})}, + {Sign::POS, -150, MType({0xc5c1025143073df, 0xf00001c200046500})}, + {Sign::POS, -150, MType({0x6289aa8d9b594616, 0xf40001d120049e31})}, + {Sign::POS, -150, MType({0xb8c22bd659ee5155, 0xf80001e08004d94a})}, + {Sign::POS, -150, MType({0xf05f03f52b4cdae, 0xfc0001f020051654})}, + {Sign::POS, -149, MType({0xb2aaaac44444999a, 0x800001000002aaaa})}, }}; // > P = fpminimax((log(1 + x) - x)/x^2, 2, [|1, 128...|], @@ -704,9 +701,9 @@ alignas(64) const LogRR LOG_TABLE = { // > dirtyinfnorm(log(1 + x)/x - x*P, [-0x1.0002143p-29 , 0x1p-29]); // 0x1.99a3...p-121 const Float128 BIG_COEFFS[3]{ - {Sign::NEG, -129, 0x80000000'0006a710'b59c58e5'554d581c_u128}, - {Sign::POS, -129, 0xaaaaaaaa'aaaaaabd'de05c7c9'4ae9cbae_u128}, - {Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128}, + {Sign::NEG, -129, MType({0xb59c58e5554d581c, 0x800000000006a710})}, + {Sign::POS, -129, MType({0xde05c7c94ae9cbae, 0xaaaaaaaaaaaaaabd})}, + {Sign::NEG, -128, MType({0x0, 0x8000000000000000})}, }; // Reuse the output of the fast pass range reduction. diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp index 66a2e455cf59b3..b3dc8982a7fb98 100644 --- a/libc/src/math/generic/log10.cpp +++ b/libc/src/math/generic/log10.cpp @@ -14,7 +14,6 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" -#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "common_constants.h" @@ -24,8 +23,8 @@ namespace LIBC_NAMESPACE { // 128-bit precision dyadic floating point numbers. using Float128 = typename fputil::DyadicFloat<128>; +using MType = typename Float128::MantissaType; using Sign = fputil::Sign; -using LIBC_NAMESPACE::operator""_u128; namespace { @@ -38,16 +37,11 @@ constexpr double HI_ERR = 0x1.0p-85; // Extra errors from P is from using x^2 to reduce evaluation latency. constexpr double P_ERR = 0x1.0p-51; -// log10(2) with 128-bit precision generated by SageMath with: -// def format_hex(value): -// l = hex(value)[2:] -// n = 8 -// x = [l[i:i + n] for i in range(0, len(l), n)] -// return "0x" + "'".join(x) + "_uint128" -// (s, m, e) = RealField(128)(2).log10().sign_exponent_mantissa(); -// print(format_hex(m)); +// log10(2) with 128-bit prepcision generated by SageMath with: +// sage: (s, m, e) = RealField(128)(2).log10().sign_exponent_mantissa(); +// sage: print("MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})"); const Float128 LOG10_2(Sign::POS, /*exponent=*/-129, /*mantissa=*/ - 0x9a209a84'fbcff798'8f8959ac'0b7c9178_u128); + MType({0x8f8959ac0b7c9178, 0x9a209a84fbcff798})); const LogRR LOG10_TABLE = { // -log10(r) with 128-bit precision generated by SageMath with: @@ -55,136 +49,138 @@ const LogRR LOG10_TABLE = { // for i in range(128): // r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^(-7)) ); // s, m, e = RealField(128)(r).log10().sign_mantissa_exponent(); - // print("{Sign::POS,", e, ", format_hex(m), "},"); + // print("{Sign::POS,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) + // % 2^64), + // "})},"); /* .step_1 = */ { - {Sign::POS, 0, 0_u128}, - {Sign::POS, -136, 0xdf3b5ebb'da7e186b'65af394f'e05eafd3_u128}, - {Sign::POS, -135, 0xe01d4057'2f029c16'a8fb8d87'b30163b5_u128}, - {Sign::POS, -134, 0xa8c1263a'c3f57eb3'6bb0170e'5bb5d630_u128}, - {Sign::POS, -134, 0xe1e841bb'c26204e5'fc2ea6eb'0ea1370e_u128}, - {Sign::POS, -133, 0x8dc2eb02'274d6ff4'dc8a199a'4bb63382_u128}, - {Sign::POS, -133, 0xaacde920'361dd054'86b57ea6'10c7db33_u128}, - {Sign::POS, -133, 0xc81618eb'15421bab'5f034a40'e6a2f09d_u128}, - {Sign::POS, -133, 0xe59c7e66'c5fedb4b'594a31b2'c5cc891c_u128}, - {Sign::POS, -133, 0xf477584f'97b654de'221efda5'8221904b_u128}, - {Sign::POS, -132, 0x892e8219'75106e09'68a0dc47'567691c9_u128}, - {Sign::POS, -132, 0x9841c66e'17dfe7da'10bc94f4'4d216b49_u128}, - {Sign::POS, -132, 0x9fd7be33'18306cc5'e303ea7e'23c9d6fb_u128}, - {Sign::POS, -132, 0xaf1cb35b'f494a8dd'ce697dba'a00d4c7d_u128}, - {Sign::POS, -132, 0xbe8380a2'fa7eba5a'9c216079'dcf0ea96_u128}, - {Sign::POS, -132, 0xc643c775'8283a271'75278940'eecfc3a9_u128}, - {Sign::POS, -132, 0xd5de75ec'27e4fe68'2d3467d2'53e2d1fc_u128}, - {Sign::POS, -132, 0xddb904e8'f1272a95'ead4055d'cdec7b22_u128}, - {Sign::POS, -132, 0xed88f6bb'355fa196'e1e0dda0'b3d375a4_u128}, - {Sign::POS, -132, 0xf57e8281'ade9d92d'38dc40c4'fe11e608_u128}, - {Sign::POS, -131, 0x82c2941b'b20bbe1f'3bcdcfe7'b23976cd_u128}, - {Sign::POS, -131, 0x86cb3663'2807cdcd'456350b0'bda452a6_u128}, - {Sign::POS, -131, 0x8eeaa306'458b760a'78185dcc'37fda01a_u128}, - {Sign::POS, -131, 0x93018395'12fc1168'307643ad'bbbde1b3_u128}, - {Sign::POS, -131, 0x9b3dd1d5'50c41443'6c449d40'9f883fe3_u128}, - {Sign::POS, -131, 0x9f6356aa'03c34389'8ea7b30c'8b4ad886_u128}, - {Sign::POS, -131, 0xa7bd56cd'de5d76a2'961c6e69'0d8879b4_u128}, - {Sign::POS, -131, 0xabf1ea3e'1d7bd7cf'042643ce'd81ec14a_u128}, - {Sign::POS, -131, 0xb02b9af7'4c2f879e'4742fb3d'0b5cdd19_u128}, - {Sign::POS, -131, 0xb8ae8671'b3d7dd6c'f7e2ab36'f09e9014_u128}, - {Sign::POS, -131, 0xbcf7dabd'87c01afc'8d3fc634'85e7ff13_u128}, - {Sign::POS, -131, 0xc1467f69'4d10a581'f3edc493'75fbc5a5_u128}, - {Sign::POS, -131, 0xc9f3ef07'e1f3fc5e'5fcd7d0c'e937375f_u128}, - {Sign::POS, -131, 0xce52d50b'94fa253a'58252dad'a9f06111_u128}, - {Sign::POS, -131, 0xd2b74192'fae43777'62f01e5f'f43708ab_u128}, - {Sign::POS, -131, 0xd72142a8'4ca85abd'481d9b31'31f52639_u128}, - {Sign::POS, -131, 0xdb90e68b'8abf14af'b305ced1'419fe924_u128}, - {Sign::POS, -131, 0xe48150cf'32888b9c'849266a8'5513dc6d_u128}, - {Sign::POS, -131, 0xe90234c6'5a15e533'080ecf32'66b4dcf4_u128}, - {Sign::POS, -131, 0xed88f6bb'355fa196'e1e0dda0'b3d375a4_u128}, - {Sign::POS, -131, 0xf215a60b'6557943f'ce3537a3'a211b25b_u128}, - {Sign::POS, -131, 0xf6a85251'3757dfbd'5dab6830'7fedefcd_u128}, - {Sign::POS, -131, 0xffdfe15d'e3c01bac'1be2585c'279c50a5_u128}, - {Sign::POS, -130, 0x8242724a'155219f3'18aa3021'71017dcb_u128}, - {Sign::POS, -130, 0x849812d0'ccbb5cbd'abc7e698'502d43c0_u128}, - {Sign::POS, -130, 0x86f0dab1'ab5822b6'c339089a'51663370_u128}, - {Sign::POS, -130, 0x894cd27d'9f182c63'26f70b34'ce5cf201_u128}, - {Sign::POS, -130, 0x8bac02e8'ac3e09ac'676f20a8'7ab433df_u128}, - {Sign::POS, -130, 0x8e0e74ca'ae062e24'6db4169c'c4b83bc3_u128}, - {Sign::POS, -130, 0x90743120'1c7f651a'cd3fdb2f'ad0d1fd6_u128}, - {Sign::POS, -130, 0x92dd410a'd7bfe103'49d03e16'3250d1d4_u128}, - {Sign::POS, -130, 0x9549add2'f8a3c7e0'9ec7dc02'd5e723b9_u128}, - {Sign::POS, -130, 0x97b980e7'a743d71c'34698d03'a5442573_u128}, - {Sign::POS, -130, 0x9a2cc3df'f7548556'0522904d'1e47f3de_u128}, - {Sign::POS, -130, 0x9ca3807b'ca9fe93f'791a7264'6c87b976_u128}, - {Sign::POS, -130, 0x9f1dc0a4'b9cea286'3826f190'd655d736_u128}, - {Sign::POS, -130, 0xa19b8e6f'03b60e45'544ab3e4'8199b299_u128}, - {Sign::POS, -130, 0xa41cf41a'83643487'be775fa8'2961114e_u128}, - {Sign::POS, -130, 0xa6a1fc13'ad241953'45798e50'19e6c082_u128}, - {Sign::POS, -130, 0xa92ab0f4'92b772bd'91fb1ed0'cdc4d1fb_u128}, - {Sign::POS, -130, 0xabb71d85'ef05380d'818b8b9c'bbd17b72_u128}, - {Sign::POS, -130, 0xae474cc0'397f0d4f'a50c2fea'60c5b3b2_u128}, - {Sign::POS, -130, 0xb0db49cc'c1823c8e'58ea3498'0ad8b720_u128}, - {Sign::POS, -130, 0xb3732006'd1fbbba5'4b5f7194'1be508a4_u128}, - {Sign::POS, -130, 0xb60edafc'dd99ad1d'9e405fb8'bcb1ff1e_u128}, - {Sign::POS, -130, 0xb60edafc'dd99ad1d'9e405fb8'bcb1ff1e_u128}, - {Sign::POS, -130, 0xb8ae8671'b3d7dd6c'f7e2ab36'f09e9014_u128}, - {Sign::POS, -130, 0xbb522e5d'bf37f63b'c6696396'40c305bb_u128}, - {Sign::POS, -130, 0xbdf9def0'4cf980ff'a3dc9e46'4e98764b_u128}, - {Sign::POS, -130, 0xc0a5a490'dea95b5e'ffd3256b'59fa9c59_u128}, - {Sign::POS, -130, 0xc3558be0'85e3f4bc'b0a2d486'72a051a5_u128}, - {Sign::POS, -130, 0xc3558be0'85e3f4bc'b0a2d486'72a051a5_u128}, - {Sign::POS, -130, 0xc609a1bb'4aa98f59'acb2ca5d'4ca1c10e_u128}, - {Sign::POS, -130, 0xc8c1f339'9ca7d33b'43690b9e'3cde0d02_u128}, - {Sign::POS, -130, 0xcb7e8db1'cfe04827'18b1fd60'383f7e5a_u128}, - {Sign::POS, -130, 0xce3f7eb9'a517c969'0248757e'5f45af3d_u128}, - {Sign::POS, -130, 0xd104d427'de7fbcc4'7c4acd60'5be48bc1_u128}, - {Sign::POS, -130, 0xd104d427'de7fbcc4'7c4acd60'5be48bc1_u128}, - {Sign::POS, -130, 0xd3ce9c15'e10ec927'58ff6362'9a92652d_u128}, - {Sign::POS, -130, 0xd69ce4e1'6303fcdd'6b49be3b'd8c89f10_u128}, - {Sign::POS, -130, 0xd96fbd2e'2814c9cc'e6dd603a'881e9060_u128}, - {Sign::POS, -130, 0xd96fbd2e'2814c9cc'e6dd603a'881e9060_u128}, - {Sign::POS, -130, 0xdc4733e7'cbcbfc8c'89e281c9'8c1d705c_u128}, - {Sign::POS, -130, 0xdf235843'9aa5dd12'dc0db7cf'0cce9f32_u128}, - {Sign::POS, -130, 0xe20439c2'7a7c01b8'fdf1c5b8'46db9deb_u128}, - {Sign::POS, -130, 0xe20439c2'7a7c01b8'fdf1c5b8'46db9deb_u128}, - {Sign::POS, -130, 0xe4e9e832'e2da0c05'3dd7eab4'8869c402_u128}, - {Sign::POS, -130, 0xe7d473b2'e5db8f2a'4e8fcc90'0b41daef_u128}, - {Sign::POS, -130, 0xe7d473b2'e5db8f2a'4e8fcc90'0b41daef_u128}, - {Sign::POS, -130, 0xeac3ecb2'4a3ac7b4'7593e1a9'e917359a_u128}, - {Sign::POS, -130, 0xedb863f4'b73f982d'e7741396'b49e1ce5_u128}, - {Sign::POS, -130, 0xedb863f4'b73f982d'e7741396'b49e1ce5_u128}, - {Sign::POS, -130, 0xf0b1ea93'f34675a7'c8ba4f8f'47b85a5c_u128}, - {Sign::POS, -130, 0xf3b09202'359f9787'7007c127'6821b705_u128}, - {Sign::POS, -130, 0xf3b09202'359f9787'7007c127'6821b705_u128}, - {Sign::POS, -130, 0xf6b46c0c'8c8fdea1'7ee19afe'6db7e324_u128}, - {Sign::POS, -130, 0xf9bd8add'584687f0'edf54f37'f6d40420_u128}, - {Sign::POS, -130, 0xf9bd8add'584687f0'edf54f37'f6d40420_u128}, - {Sign::POS, -130, 0xfccc00fe'dba4e6fb'efe52ccf'03e7dee1_u128}, - {Sign::POS, -130, 0xffdfe15d'e3c01bac'1be2585c'279c50a5_u128}, - {Sign::POS, -130, 0xffdfe15d'e3c01bac'1be2585c'279c50a5_u128}, - {Sign::POS, -129, 0x817c9fa6'43880404'e0b571f5'c91b0446_u128}, - {Sign::POS, -129, 0x830c1742'7ea55eca'7178594b'ef2def59_u128}, - {Sign::POS, -129, 0x830c1742'7ea55eca'7178594b'ef2def59_u128}, - {Sign::POS, -129, 0x849e6196'487c1d1c'9a741bb1'71158d2a_u128}, - {Sign::POS, -129, 0x849e6196'487c1d1c'9a741bb1'71158d2a_u128}, - {Sign::POS, -129, 0x863388eb'55ebd295'1a618264'446cb495_u128}, - {Sign::POS, -129, 0x87cb97c3'ff9eac18'71dbdbbe'c51d7657_u128}, - {Sign::POS, -129, 0x87cb97c3'ff9eac18'71dbdbbe'c51d7657_u128}, - {Sign::POS, -129, 0x896698dc'e4cff76c'abe0b522'230f7d14_u128}, - {Sign::POS, -129, 0x896698dc'e4cff76c'abe0b522'230f7d14_u128}, - {Sign::POS, -129, 0x8b04972e'9d4d3011'd28e8ada'fea703b4_u128}, - {Sign::POS, -129, 0x8ca59def'7b5cefc5'208422d8'3be34b27_u128}, - {Sign::POS, -129, 0x8ca59def'7b5cefc5'208422d8'3be34b27_u128}, - {Sign::POS, -129, 0x8e49b895'5e3ffb8a'c385cf49'402af0e4_u128}, - {Sign::POS, -129, 0x8e49b895'5e3ffb8a'c385cf49'402af0e4_u128}, - {Sign::POS, -129, 0x8ff0f2d7'960a075c'da982a61'4e12c6dd_u128}, - {Sign::POS, -129, 0x8ff0f2d7'960a075c'da982a61'4e12c6dd_u128}, - {Sign::POS, -129, 0x919b58b0'd999bbc8'038401fc'1c1b5c2c_u128}, - {Sign::POS, -129, 0x919b58b0'd999bbc8'038401fc'1c1b5c2c_u128}, - {Sign::POS, -129, 0x9348f661'4f821394'a9b55d3f'16da746a_u128}, - {Sign::POS, -129, 0x9348f661'4f821394'a9b55d3f'16da746a_u128}, - {Sign::POS, -129, 0x94f9d870'aac256a5'088d2d14'73d4f7f5_u128}, - {Sign::POS, -129, 0x94f9d870'aac256a5'088d2d14'73d4f7f5_u128}, - {Sign::POS, -129, 0x96ae0bb0'5c35d5bd'7c1e117d'ea19e9e6_u128}, - {Sign::POS, -129, 0x96ae0bb0'5c35d5bd'7c1e117d'ea19e9e6_u128}, - {Sign::POS, -129, 0x98659d3d'd9b12532'336db063'0f536fb9_u128}, - {Sign::POS, 0, 0_u128}, + {Sign::POS, 0, MType(0)}, + {Sign::POS, -136, MType({0x65af394fe05eafd3, 0xdf3b5ebbda7e186b})}, + {Sign::POS, -135, MType({0xa8fb8d87b30163b5, 0xe01d40572f029c16})}, + {Sign::POS, -134, MType({0x6bb0170e5bb5d630, 0xa8c1263ac3f57eb3})}, + {Sign::POS, -134, MType({0xfc2ea6eb0ea1370e, 0xe1e841bbc26204e5})}, + {Sign::POS, -133, MType({0xdc8a199a4bb63382, 0x8dc2eb02274d6ff4})}, + {Sign::POS, -133, MType({0x86b57ea610c7db33, 0xaacde920361dd054})}, + {Sign::POS, -133, MType({0x5f034a40e6a2f09d, 0xc81618eb15421bab})}, + {Sign::POS, -133, MType({0x594a31b2c5cc891c, 0xe59c7e66c5fedb4b})}, + {Sign::POS, -133, MType({0x221efda58221904b, 0xf477584f97b654de})}, + {Sign::POS, -132, MType({0x68a0dc47567691c9, 0x892e821975106e09})}, + {Sign::POS, -132, MType({0x10bc94f44d216b49, 0x9841c66e17dfe7da})}, + {Sign::POS, -132, MType({0xe303ea7e23c9d6fb, 0x9fd7be3318306cc5})}, + {Sign::POS, -132, MType({0xce697dbaa00d4c7d, 0xaf1cb35bf494a8dd})}, + {Sign::POS, -132, MType({0x9c216079dcf0ea96, 0xbe8380a2fa7eba5a})}, + {Sign::POS, -132, MType({0x75278940eecfc3a9, 0xc643c7758283a271})}, + {Sign::POS, -132, MType({0x2d3467d253e2d1fc, 0xd5de75ec27e4fe68})}, + {Sign::POS, -132, MType({0xead4055dcdec7b22, 0xddb904e8f1272a95})}, + {Sign::POS, -132, MType({0xe1e0dda0b3d375a4, 0xed88f6bb355fa196})}, + {Sign::POS, -132, MType({0x38dc40c4fe11e608, 0xf57e8281ade9d92d})}, + {Sign::POS, -131, MType({0x3bcdcfe7b23976cd, 0x82c2941bb20bbe1f})}, + {Sign::POS, -131, MType({0x456350b0bda452a6, 0x86cb36632807cdcd})}, + {Sign::POS, -131, MType({0x78185dcc37fda01a, 0x8eeaa306458b760a})}, + {Sign::POS, -131, MType({0x307643adbbbde1b3, 0x9301839512fc1168})}, + {Sign::POS, -131, MType({0x6c449d409f883fe3, 0x9b3dd1d550c41443})}, + {Sign::POS, -131, MType({0x8ea7b30c8b4ad886, 0x9f6356aa03c34389})}, + {Sign::POS, -131, MType({0x961c6e690d8879b4, 0xa7bd56cdde5d76a2})}, + {Sign::POS, -131, MType({0x42643ced81ec14a, 0xabf1ea3e1d7bd7cf})}, + {Sign::POS, -131, MType({0x4742fb3d0b5cdd19, 0xb02b9af74c2f879e})}, + {Sign::POS, -131, MType({0xf7e2ab36f09e9014, 0xb8ae8671b3d7dd6c})}, + {Sign::POS, -131, MType({0x8d3fc63485e7ff13, 0xbcf7dabd87c01afc})}, + {Sign::POS, -131, MType({0xf3edc49375fbc5a5, 0xc1467f694d10a581})}, + {Sign::POS, -131, MType({0x5fcd7d0ce937375f, 0xc9f3ef07e1f3fc5e})}, + {Sign::POS, -131, MType({0x58252dada9f06111, 0xce52d50b94fa253a})}, + {Sign::POS, -131, MType({0x62f01e5ff43708ab, 0xd2b74192fae43777})}, + {Sign::POS, -131, MType({0x481d9b3131f52639, 0xd72142a84ca85abd})}, + {Sign::POS, -131, MType({0xb305ced1419fe924, 0xdb90e68b8abf14af})}, + {Sign::POS, -131, MType({0x849266a85513dc6d, 0xe48150cf32888b9c})}, + {Sign::POS, -131, MType({0x80ecf3266b4dcf4, 0xe90234c65a15e533})}, + {Sign::POS, -131, MType({0xe1e0dda0b3d375a4, 0xed88f6bb355fa196})}, + {Sign::POS, -131, MType({0xce3537a3a211b25b, 0xf215a60b6557943f})}, + {Sign::POS, -131, MType({0x5dab68307fedefcd, 0xf6a852513757dfbd})}, + {Sign::POS, -131, MType({0x1be2585c279c50a5, 0xffdfe15de3c01bac})}, + {Sign::POS, -130, MType({0x18aa302171017dcb, 0x8242724a155219f3})}, + {Sign::POS, -130, MType({0xabc7e698502d43c0, 0x849812d0ccbb5cbd})}, + {Sign::POS, -130, MType({0xc339089a51663370, 0x86f0dab1ab5822b6})}, + {Sign::POS, -130, MType({0x26f70b34ce5cf201, 0x894cd27d9f182c63})}, + {Sign::POS, -130, MType({0x676f20a87ab433df, 0x8bac02e8ac3e09ac})}, + {Sign::POS, -130, MType({0x6db4169cc4b83bc3, 0x8e0e74caae062e24})}, + {Sign::POS, -130, MType({0xcd3fdb2fad0d1fd6, 0x907431201c7f651a})}, + {Sign::POS, -130, MType({0x49d03e163250d1d4, 0x92dd410ad7bfe103})}, + {Sign::POS, -130, MType({0x9ec7dc02d5e723b9, 0x9549add2f8a3c7e0})}, + {Sign::POS, -130, MType({0x34698d03a5442573, 0x97b980e7a743d71c})}, + {Sign::POS, -130, MType({0x522904d1e47f3de, 0x9a2cc3dff7548556})}, + {Sign::POS, -130, MType({0x791a72646c87b976, 0x9ca3807bca9fe93f})}, + {Sign::POS, -130, MType({0x3826f190d655d736, 0x9f1dc0a4b9cea286})}, + {Sign::POS, -130, MType({0x544ab3e48199b299, 0xa19b8e6f03b60e45})}, + {Sign::POS, -130, MType({0xbe775fa82961114e, 0xa41cf41a83643487})}, + {Sign::POS, -130, MType({0x45798e5019e6c082, 0xa6a1fc13ad241953})}, + {Sign::POS, -130, MType({0x91fb1ed0cdc4d1fb, 0xa92ab0f492b772bd})}, + {Sign::POS, -130, MType({0x818b8b9cbbd17b72, 0xabb71d85ef05380d})}, + {Sign::POS, -130, MType({0xa50c2fea60c5b3b2, 0xae474cc0397f0d4f})}, + {Sign::POS, -130, MType({0x58ea34980ad8b720, 0xb0db49ccc1823c8e})}, + {Sign::POS, -130, MType({0x4b5f71941be508a4, 0xb3732006d1fbbba5})}, + {Sign::POS, -130, MType({0x9e405fb8bcb1ff1e, 0xb60edafcdd99ad1d})}, + {Sign::POS, -130, MType({0x9e405fb8bcb1ff1e, 0xb60edafcdd99ad1d})}, + {Sign::POS, -130, MType({0xf7e2ab36f09e9014, 0xb8ae8671b3d7dd6c})}, + {Sign::POS, -130, MType({0xc669639640c305bb, 0xbb522e5dbf37f63b})}, + {Sign::POS, -130, MType({0xa3dc9e464e98764b, 0xbdf9def04cf980ff})}, + {Sign::POS, -130, MType({0xffd3256b59fa9c59, 0xc0a5a490dea95b5e})}, + {Sign::POS, -130, MType({0xb0a2d48672a051a5, 0xc3558be085e3f4bc})}, + {Sign::POS, -130, MType({0xb0a2d48672a051a5, 0xc3558be085e3f4bc})}, + {Sign::POS, -130, MType({0xacb2ca5d4ca1c10e, 0xc609a1bb4aa98f59})}, + {Sign::POS, -130, MType({0x43690b9e3cde0d02, 0xc8c1f3399ca7d33b})}, + {Sign::POS, -130, MType({0x18b1fd60383f7e5a, 0xcb7e8db1cfe04827})}, + {Sign::POS, -130, MType({0x248757e5f45af3d, 0xce3f7eb9a517c969})}, + {Sign::POS, -130, MType({0x7c4acd605be48bc1, 0xd104d427de7fbcc4})}, + {Sign::POS, -130, MType({0x7c4acd605be48bc1, 0xd104d427de7fbcc4})}, + {Sign::POS, -130, MType({0x58ff63629a92652d, 0xd3ce9c15e10ec927})}, + {Sign::POS, -130, MType({0x6b49be3bd8c89f10, 0xd69ce4e16303fcdd})}, + {Sign::POS, -130, MType({0xe6dd603a881e9060, 0xd96fbd2e2814c9cc})}, + {Sign::POS, -130, MType({0xe6dd603a881e9060, 0xd96fbd2e2814c9cc})}, + {Sign::POS, -130, MType({0x89e281c98c1d705c, 0xdc4733e7cbcbfc8c})}, + {Sign::POS, -130, MType({0xdc0db7cf0cce9f32, 0xdf2358439aa5dd12})}, + {Sign::POS, -130, MType({0xfdf1c5b846db9deb, 0xe20439c27a7c01b8})}, + {Sign::POS, -130, MType({0xfdf1c5b846db9deb, 0xe20439c27a7c01b8})}, + {Sign::POS, -130, MType({0x3dd7eab48869c402, 0xe4e9e832e2da0c05})}, + {Sign::POS, -130, MType({0x4e8fcc900b41daef, 0xe7d473b2e5db8f2a})}, + {Sign::POS, -130, MType({0x4e8fcc900b41daef, 0xe7d473b2e5db8f2a})}, + {Sign::POS, -130, MType({0x7593e1a9e917359a, 0xeac3ecb24a3ac7b4})}, + {Sign::POS, -130, MType({0xe7741396b49e1ce5, 0xedb863f4b73f982d})}, + {Sign::POS, -130, MType({0xe7741396b49e1ce5, 0xedb863f4b73f982d})}, + {Sign::POS, -130, MType({0xc8ba4f8f47b85a5c, 0xf0b1ea93f34675a7})}, + {Sign::POS, -130, MType({0x7007c1276821b705, 0xf3b09202359f9787})}, + {Sign::POS, -130, MType({0x7007c1276821b705, 0xf3b09202359f9787})}, + {Sign::POS, -130, MType({0x7ee19afe6db7e324, 0xf6b46c0c8c8fdea1})}, + {Sign::POS, -130, MType({0xedf54f37f6d40420, 0xf9bd8add584687f0})}, + {Sign::POS, -130, MType({0xedf54f37f6d40420, 0xf9bd8add584687f0})}, + {Sign::POS, -130, MType({0xefe52ccf03e7dee1, 0xfccc00fedba4e6fb})}, + {Sign::POS, -130, MType({0x1be2585c279c50a5, 0xffdfe15de3c01bac})}, + {Sign::POS, -130, MType({0x1be2585c279c50a5, 0xffdfe15de3c01bac})}, + {Sign::POS, -129, MType({0xe0b571f5c91b0446, 0x817c9fa643880404})}, + {Sign::POS, -129, MType({0x7178594bef2def59, 0x830c17427ea55eca})}, + {Sign::POS, -129, MType({0x7178594bef2def59, 0x830c17427ea55eca})}, + {Sign::POS, -129, MType({0x9a741bb171158d2a, 0x849e6196487c1d1c})}, + {Sign::POS, -129, MType({0x9a741bb171158d2a, 0x849e6196487c1d1c})}, + {Sign::POS, -129, MType({0x1a618264446cb495, 0x863388eb55ebd295})}, + {Sign::POS, -129, MType({0x71dbdbbec51d7657, 0x87cb97c3ff9eac18})}, + {Sign::POS, -129, MType({0x71dbdbbec51d7657, 0x87cb97c3ff9eac18})}, + {Sign::POS, -129, MType({0xabe0b522230f7d14, 0x896698dce4cff76c})}, + {Sign::POS, -129, MType({0xabe0b522230f7d14, 0x896698dce4cff76c})}, + {Sign::POS, -129, MType({0xd28e8adafea703b4, 0x8b04972e9d4d3011})}, + {Sign::POS, -129, MType({0x208422d83be34b27, 0x8ca59def7b5cefc5})}, + {Sign::POS, -129, MType({0x208422d83be34b27, 0x8ca59def7b5cefc5})}, + {Sign::POS, -129, MType({0xc385cf49402af0e4, 0x8e49b8955e3ffb8a})}, + {Sign::POS, -129, MType({0xc385cf49402af0e4, 0x8e49b8955e3ffb8a})}, + {Sign::POS, -129, MType({0xda982a614e12c6dd, 0x8ff0f2d7960a075c})}, + {Sign::POS, -129, MType({0xda982a614e12c6dd, 0x8ff0f2d7960a075c})}, + {Sign::POS, -129, MType({0x38401fc1c1b5c2c, 0x919b58b0d999bbc8})}, + {Sign::POS, -129, MType({0x38401fc1c1b5c2c, 0x919b58b0d999bbc8})}, + {Sign::POS, -129, MType({0xa9b55d3f16da746a, 0x9348f6614f821394})}, + {Sign::POS, -129, MType({0xa9b55d3f16da746a, 0x9348f6614f821394})}, + {Sign::POS, -129, MType({0x88d2d1473d4f7f5, 0x94f9d870aac256a5})}, + {Sign::POS, -129, MType({0x88d2d1473d4f7f5, 0x94f9d870aac256a5})}, + {Sign::POS, -129, MType({0x7c1e117dea19e9e6, 0x96ae0bb05c35d5bd})}, + {Sign::POS, -129, MType({0x7c1e117dea19e9e6, 0x96ae0bb05c35d5bd})}, + {Sign::POS, -129, MType({0x336db0630f536fb9, 0x98659d3dd9b12532})}, + {Sign::POS, 0, MType(0)}, }, // -log10(r) for the second step, generated by SageMath with: // @@ -192,513 +188,513 @@ const LogRR LOG10_TABLE = { // r = 2^-16 * round( 2^16 / (1 + i*2^(-14)) ); // s, m, e = RealField(128)(r).log10().sign_mantissa_exponent(); // print("{Sign::POS," if s == -1 else "{Sign::NEG,", e, ", - // format_hex(m), "},"); + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_2 = */ { - {Sign::NEG, -137, 0xdeca7290'13cd7c31'7f1ce002'fa34131b_u128}, - {Sign::NEG, -137, 0xdb5475b4'4946d986'639afa08'5dd8b4c7_u128}, - {Sign::NEG, -137, 0xd7de6b0e'10cab7d2'05512632'fe9a58cb_u128}, - {Sign::NEG, -137, 0xd468529c'fc6fb395'b5380a99'53117d07_u128}, - {Sign::NEG, -137, 0xd0f22c60'9e474741'70af2d7d'53be1f31_u128}, - {Sign::NEG, -137, 0xcd7bf858'885dcae2'0ccd499c'49b74cc2_u128}, - {Sign::NEG, -137, 0xca05b684'4cba73cf'5b51ddc3'987ebfb8_u128}, - {Sign::NEG, -137, 0xc68f66e3'7d5f545a'49375f51'89b3782b_u128}, - {Sign::NEG, -137, 0xc3190975'ac495b7a'f6e57738'865c712f_u128}, - {Sign::NEG, -137, 0xbfa29e3a'6b70547e'ca02b10a'8c712acd_u128}, - {Sign::NEG, -137, 0xbc2c2531'4cc6e6b6'78e50382'10208151_u128}, - {Sign::NEG, -137, 0xb8b59e59'e23a9524'0fa099ec'd71ee0ea_u128}, - {Sign::NEG, -137, 0xb53f09b3'bdb3be28'eeb445cc'b8fb09ed_u128}, - {Sign::NEG, -137, 0xb1c8673e'71159b33'c352fff1'8a1c02fb_u128}, - {Sign::NEG, -137, 0xae51b6f9'8e3e406e'7949e03e'cf9b390b_u128}, - {Sign::NEG, -137, 0xaadaf8e4'a7069c6c'2681f33f'30aadedc_u128}, - {Sign::NEG, -137, 0xa7642cff'4d4277d6'f01d5496'eea213b3_u128}, - {Sign::NEG, -137, 0xa3ed5349'12c0751d'e92ef555'ff1de975_u128}, - {Sign::NEG, -137, 0xa0766bc1'894a1022'eb0c7519'b3e7c1e0_u128}, - {Sign::NEG, -137, 0x9c21b6e9'1e7f03a3'f60d204f'f0fe5296_u128}, - {Sign::NEG, -137, 0x98aab049'1050bea8'125c19a4'f057c18b_u128}, - {Sign::NEG, -137, 0x95339bd6'4cd953e7'7e9383ce'1bdf9575_u128}, - {Sign::NEG, -137, 0x91bc7990'65cc57d6'bf274f4d'8f770253_u128}, - {Sign::NEG, -137, 0x8e454976'ecd836ad'656bd9b7'58fe44ba_u128}, - {Sign::NEG, -137, 0x8ace0b89'73a63413'bfdd2c7f'388fc014_u128}, - {Sign::NEG, -137, 0x8756bfc7'8bda6ad0'83fbf6ed'936c493a_u128}, - {Sign::NEG, -137, 0x83df6630'c713cc76'71bfa9a1'8bec01cc_u128}, - {Sign::NEG, -137, 0x8067fec4'b6ec2111'f09d19f5'6dbfef72_u128}, - {Sign::NEG, -138, 0xf9e11305'd9f00dad'4c422713'b1642228_u128}, - {Sign::NEG, -138, 0xf2f20cd5'f58de39a'0c3c7c56'99b7a0a4_u128}, - {Sign::NEG, -138, 0xec02eaf8'e3c656ff'b8db7c69'e3fa0797_u128}, - {Sign::NEG, -138, 0xe513ad6d'c7a3a553'a083eb05'506ff7ed_u128}, - {Sign::NEG, -138, 0xde245433'c425b5c5'c21595e7'45f1fa15_u128}, - {Sign::NEG, -138, 0xd734df49'fc42189b'b9d5bcdb'fe719389_u128}, - {Sign::NEG, -138, 0xd0454eaf'92e4068b'a17a1e85'e93461f4_u128}, - {Sign::NEG, -138, 0xc955a263'aaec6016'e3537584'da333fda_u128}, - {Sign::NEG, -138, 0xc265da65'6731ace5'00963177'f24682c2_u128}, - {Sign::NEG, -138, 0xbb75f6b3'ea801b1e'4ac03734'7bcfc50e_u128}, - {Sign::NEG, -138, 0xb485f74e'57997ec6'901a736a'4364cdfd_u128}, - {Sign::NEG, -138, 0xad95dc33'd1355117'bb550acc'3b9d7247_u128}, - {Sign::NEG, -138, 0xa6a5a563'7a00afdc'663cf2b2'7e8f1ffb_u128}, - {Sign::NEG, -138, 0x9fb552dc'749e5cca'5f89bd08'feb39952_u128}, - {Sign::NEG, -138, 0x98c4e49d'e3a6bcdd'23c2623c'73f494db_u128}, - {Sign::NEG, -138, 0x91d45aa6'e9a7d7b0'4937d3b5'485af61e_u128}, - {Sign::NEG, -138, 0x8ae3b4f6'a92556d9'df14214e'7a6d8111_u128}, - {Sign::NEG, -138, 0x83f2f38c'44988544'bf7cfc14'999fb4bc_u128}, - {Sign::NEG, -139, 0xfa042ccd'bce09d15'a990c0ee'569a8d51_u128}, - {Sign::NEG, -139, 0xec223b0b'32227c9e'a38463e9'd941e1c2_u128}, - {Sign::NEG, -139, 0xde4011cf'2daaff31'ba032453'0edaa03f_u128}, - {Sign::NEG, -139, 0xd05db117'f419b857'5e997a02'dad7ace7_u128}, - {Sign::NEG, -139, 0xc27b18e3'c9f977c7'4a14676d'4d0f817e_u128}, - {Sign::NEG, -139, 0xb4984930'f3c0481c'857c002e'e7a1e473_u128}, - {Sign::NEG, -139, 0xa6b541fd'b5cf6d89'5923b2eb'72d8012a_u128}, - {Sign::NEG, -139, 0x98d20348'5473648b'21cde8f8'5ca1f9fd_u128}, - {Sign::NEG, -139, 0x8aee8d0f'13e3e09e'0be08e08'b1d212d4_u128}, - {Sign::NEG, -140, 0xfa15bea0'708795e1'69502399'8e6bd7b0_u128}, - {Sign::NEG, -140, 0xde4df414'0b42822f'634cea67'50617a92_u128}, - {Sign::NEG, -140, 0xc285ba75'7feb2781'fbd7e970'aef9dbb8_u128}, - {Sign::NEG, -140, 0xa6bd11c1'564a8ace'9aedc1c1'ba7d0695_u128}, - {Sign::NEG, -140, 0x8af3f9f4'1600120a'8d306ba2'07233c44_u128}, - {Sign::NEG, -141, 0xde54e614'8d030322'856a0a3a'00fcf3c1_u128}, - {Sign::NEG, -141, 0xa6c0fa00'de35f314'b3a2c140'7cf6d38d_u128}, - {Sign::NEG, -142, 0xde585f4c'5bbbcd3d'd791cf6a'70c3a504_u128}, - {Sign::NEG, -143, 0xde5a1bf6'27b1f68f'10a633f2'c4a8ea22_u128}, - {Sign::NEG, 0, 0_u128}, - {Sign::POS, -143, 0xde5d9565'8a729eab'ed4a68e5'e6e83ddf_u128}, - {Sign::POS, -142, 0xde5f522b'21e3e25a'3281f187'2cdbee94_u128}, - {Sign::POS, -141, 0xa6c8cb3b'7e5bbbfd'f1466eda'a96e356e_u128}, - {Sign::POS, -141, 0xde62cbd2'1e895473'8a607fd6'95dfc3d9_u128}, - {Sign::POS, -140, 0x8afed570'32bebc7c'c36b8713'ceefe2de_u128}, - {Sign::POS, -140, 0xa6ccb436'a3c72fa4'5c2e76c9'53e3e3e6_u128}, - {Sign::POS, -140, 0xc29b023f'dcb2dccf'8e4950fa'5c943bbf_u128}, - {Sign::POS, -140, 0xde69bf8f'58005dfc'20fa8a73'c585f634_u128}, - {Sign::POS, -140, 0xfa38ec28'905810a3'0aa106d9'b0a9717a_u128}, - {Sign::POS, -139, 0x8b044407'80460c2a'85d70e03'2de41aec_u128}, - {Sign::POS, -139, 0x98ec49a3'11cc30ab'beee21cb'b82a9a78_u128}, - {Sign::POS, -139, 0xa6d486e8'ba5151a0'abd7b0fd'd8efe6f6_u128}, - {Sign::POS, -139, 0xb4bcfbda'377d31cc'3221c56e'2c1aa912_u128}, - {Sign::POS, -139, 0xc2a5a879'470c7c37'57b795a3'6d9c5f19_u128}, - {Sign::POS, -139, 0xd08e8cc7'a6d0c580'131ec142'c053ac3b_u128}, - {Sign::POS, -139, 0xde77a8c7'14b08d28'35e3298f'4bb2aa0a_u128}, - {Sign::POS, -139, 0xec60fc79'4ea73ee4'7133dafd'fc44f160_u128}, - {Sign::POS, -139, 0xfa4a87e0'12c533eb'74b37d23'121c59d5_u128}, - {Sign::POS, -138, 0x841a257e'8f97da22'93bf5f42'07da8a4c_u128}, - {Sign::POS, -138, 0x8b0f22e9'19107c0c'fdb5990e'c6057f4e_u128}, - {Sign::POS, -138, 0x92043c30'84f41481'2d408a58'b1b202fe_u128}, - {Sign::POS, -138, 0x98f97155'b274b1ab'1759381b'61dfbf01_u128}, - {Sign::POS, -138, 0x9feec259'80cedbbe'41e90a05'4df4b9f1_u128}, - {Sign::POS, -138, 0xa6e42f3c'cf49959d'a1e66c62'03725d50_u128}, - {Sign::POS, -138, 0xadd9b800'7d365d83'8693d36a'b45bd7ce_u128}, - {Sign::POS, -138, 0xb4cf5ca5'69f12da9'91e25bb4'0ad3f098_u128}, - {Sign::POS, -138, 0xbbc51d2c'74e07cf0'bdf94392'c4cc7f6c_u128}, - {Sign::POS, -138, 0xc2baf996'7d753f89'6fe37973'354a82f9_u128}, - {Sign::POS, -138, 0xc9b0f1e4'632ae79b'97647b42'67bfd801_u128}, - {Sign::POS, -138, 0xd0a70617'058765ee'dbf5c32a'454f7bdf_u128}, - {Sign::POS, -138, 0xd79d362f'441b2a92'd6edfe04'c37ba916_u128}, - {Sign::POS, -138, 0xde93822d'fe812587'5ad3480c'cfbe9890_u128}, - {Sign::POS, -138, 0xe589ea14'145ec764'c7d9ac76'5be7e325_u128}, - {Sign::POS, -138, 0xec806de2'65640204'6d8f24b9'a3ca011b_u128}, - {Sign::POS, -138, 0xf3770d99'd14b4928'f9b65480'7dcdd5b2_u128}, - {Sign::POS, -138, 0xfa6dc93b'37d99326'f4513f47'45663028_u128}, - {Sign::POS, -137, 0x80b25063'bc6f2cc6'a46e9a72'd80da75f_u128}, - {Sign::POS, -137, 0x842dca1f'ba19cce6'ee60992b'51ffac4b_u128}, - {Sign::POS, -137, 0x87a951d2'04deeaf3'1977fa1c'786886b3_u128}, - {Sign::POS, -137, 0x8b24e77b'0cb60a84'0e5f7c52'cdf119d5_u128}, - {Sign::POS, -137, 0x8ea08b1b'419bf221'3bf9d70d'a1021a10_u128}, - {Sign::POS, -137, 0x921c3cb3'1392ab94'fd0406b0'7523b8e6_u128}, - {Sign::POS, -137, 0x9597fc42'f2a18441'0453ee32'c020f2a8_u128}, - {Sign::POS, -137, 0x9913c9cb'4ed50d72'cfb3ec22'066bf7f6_u128}, - {Sign::POS, -137, 0x9c8fa54c'983f1cb8'215c025b'd493ecf9_u128}, - {Sign::POS, -137, 0x9f2c9319'2e68232b'39c116b7'ee3a83ec_u128}, - {Sign::POS, -137, 0xa2a8870f'24ac5f66'f41f4b3e'de2782f0_u128}, - {Sign::POS, -137, 0xa62488ff'3c735799'61196927'723eb75c_u128}, - {Sign::POS, -137, 0xa9a098e9'e5e2a432'0e615e83'6cb1edab_u128}, - {Sign::POS, -137, 0xad1cb6cf'91252372'6981331c'5fc71cfc_u128}, - {Sign::POS, -137, 0xb098e2b0'ae6af9c2'5f6a4faa'054f11fa_u128}, - {Sign::POS, -137, 0xb4151c8d'ade99205'02a68bc6'81a74c28_u128}, - {Sign::POS, -137, 0xb7916466'ffdb9ded'382ba24d'90566403_u128}, - {Sign::POS, -137, 0xbb0dba3d'14811652'6ad1abe5'1dd22e00_u128}, - {Sign::POS, -137, 0xbe8a1e10'5c1f3b85'456d3f7f'59b13960_u128}, - {Sign::POS, -137, 0xc2068fe1'470095a4'738dd8b7'd66e9058_u128}, - {Sign::POS, -137, 0xc5830fb0'4574f4f1'68e123fe'd7ff11c6_u128}, - {Sign::POS, -137, 0xc8ff9d7d'c7d17225'2f3bd097'80c3aa11_u128}, - {Sign::POS, -137, 0xcc7c394a'3e706ec5'3b48887f'1ce36935_u128}, - {Sign::POS, -137, 0xcff8e316'19b19578'47ddae65'5ecc4633_u128}, - {Sign::POS, -137, 0xd3759ae1'c9f9da5b'37fa81ee'f4819c88_u128}, - {Sign::POS, -137, 0xd6f260ad'bfb37b55'ff6c4a8d'747c65ed_u128}, - {Sign::POS, -137, 0xda6f347a'6b4e0070'921c2949'3a33318c_u128}, - {Sign::POS, -137, 0xddec1648'3d3e3c27'da0631eb'65e731d8_u128}, - {Sign::POS, -137, 0xe1690617'a5fe4bc2'b3da6c07'd110babc_u128}, - {Sign::POS, -137, 0xe4e603e9'160d97a6'f2485c78'68b8835a_u128}, - {Sign::POS, -137, 0xe8630fbc'fdf0d3ae'67f5b7ed'01344055_u128}, - {Sign::POS, -137, 0xebe02993'ce31ff7b'f820df44'5b1d0622_u128}, - {Sign::POS, -137, 0xef5d516d'f76066d0'adefc674'b7eca5cd_u128}, - {Sign::POS, -137, 0xf2da874b'ea10a1e0'da6be6dc'057d3235_u128}, - {Sign::POS, -137, 0xf657cb2e'16dc95a9'392bdde1'52ab5ff5_u128}, - {Sign::POS, -137, 0xf9d51d14'ee637444'1bab58e2'ec99cf73_u128}, - {Sign::POS, -137, 0xfd527d00'e149bd3e'9b51ef7e'3388d692_u128}, - {Sign::POS, -136, 0x8067f579'301c9ef6'e914c6a7'f3f22fa2_u128}, - {Sign::POS, -136, 0x8226b374'edf088e2'0d22862e'b2081c94_u128}, - {Sign::POS, -136, 0x83e57873'e27ad153'29ebd0b4'76cd8fd8_u128}, - {Sign::POS, -136, 0x85a44476'461854a0'98feddc2'806d01ed_u128}, - {Sign::POS, -136, 0x8763177c'512896af'471bfc26'1a401854_u128}, - {Sign::POS, -136, 0x88b23a5b'61430a16'b6f89c19'b4cd1acd_u128}, - {Sign::POS, -136, 0x8a7119a8'5909ebe9'b39aaf34'163fb099_u128}, - {Sign::POS, -136, 0x8c2ffff9'9357e887'1665f0f8'21541c36_u128}, - {Sign::POS, -136, 0x8deeed4f'489679a6'a5051754'e049c1cb_u128}, - {Sign::POS, -136, 0x8fade1a9'b131c159'8c5a9a1c'57b2e986_u128}, - {Sign::POS, -136, 0x916cdd09'05988a35'1d844843'8a26a9ae_u128}, - {Sign::POS, -136, 0x932bdf6d'7e3c477d'8e3a0913'ecd2fd02_u128}, - {Sign::POS, -136, 0x94eae8d7'53911550'bc881a45'f47f1d36_u128}, - {Sign::POS, -136, 0x96a9f946'be0db8d0'f5e51c05'499b06d0_u128}, - {Sign::POS, -136, 0x986910bb'f62ba04f'c1a43be8'1a243fde_u128}, - {Sign::POS, -136, 0x9a282f37'3466e378'aec3cfeb'e971beb7_u128}, - {Sign::POS, -136, 0x9be754b8'b13e437c'2518b293'28614989_u128}, - {Sign::POS, -136, 0x9da68140'a5332b3a'39d6b147'cbe803a4_u128}, - {Sign::POS, -136, 0x9f65b4cf'48c9af6d'87765e30'04ae428d_u128}, - {Sign::POS, -136, 0xa124ef64'd4888ed6'08f896ab'28245bac_u128}, - {Sign::POS, -136, 0xa2e43101'80f93263'f8880fb5'ca630c87_u128}, - {Sign::POS, -136, 0xa4a379a5'86a7ad62'b179397c'f82e935c_u128}, - {Sign::POS, -136, 0xa662c951'1e22bda3'95a8cb71'7197ad81_u128}, - {Sign::POS, -136, 0xa8222004'7ffbcba8'f6394a34'b7f9a4a4_u128}, - {Sign::POS, -136, 0xa9e17dbf'e4c6ead0'ffafd8c2'b57884e8_u128}, - {Sign::POS, -136, 0xaba0e283'851ad980'a970a643'b8a6ac2b_u128}, - {Sign::POS, -136, 0xad604e4f'9991014e'a89b49fb'749d47e0_u128}, - {Sign::POS, -136, 0xaf1fc124'5ac5772e'66475ed2'ac983305_u128}, - {Sign::POS, -136, 0xb06f5be1'bf1918e7'b4fd6209'364bb36f_u128}, - {Sign::POS, -136, 0xb22edb06'36da31d6'8b5ce79b'0965962a_u128}, - {Sign::POS, -136, 0xb3ee6133'f7149769'6724232b'07396427_u128}, - {Sign::POS, -136, 0xb5adee6b'386e62ae'2f02b14d'cad8a49c_u128}, - {Sign::POS, -136, 0xb76d82ac'339058db'bd6443a8'1f792e07_u128}, - {Sign::POS, -136, 0xb92d1df7'2125eb7c'ea1cd962'5749939a_u128}, - {Sign::POS, -136, 0xbaecc04c'39dd389b'97775e31'42198913_u128}, - {Sign::POS, -136, 0xbcac69ab'b6670aeb'c2a701b8'09a2bc39_u128}, - {Sign::POS, -136, 0xbe6c1a15'cf76d9f6'979b990f'39e662e3_u128}, - {Sign::POS, -136, 0xc02bd18a'bdc2ca45'88395c46'3ddd82b2_u128}, - {Sign::POS, -136, 0xc1eb900a'ba03ad8d'66f451bd'9ba5ed05_u128}, - {Sign::POS, -136, 0xc3ab5595'fcf502d9'84cfb941'3f6437a6_u128}, - {Sign::POS, -136, 0xc56b222c'bf54f6b6'd2c1c8d3'2943ca42_u128}, - {Sign::POS, -136, 0xc72af5cf'39e4635f'067c0d1f'd95192e6_u128}, - {Sign::POS, -136, 0xc8ead07d'a566d0e3'c298bf9e'db6441f2_u128}, - {Sign::POS, -136, 0xcaaab238'3aa27559'c22d646a'ddde3910_u128}, - {Sign::POS, -136, 0xcc6a9aff'32603504'07c301e5'c7d1ca40_u128}, - {Sign::POS, -136, 0xce2a8ad2'c56ba27f'0fb44446'4df02505_u128}, - {Sign::POS, -136, 0xcfea81b3'2c92feec'05f1df35'91ae898f_u128}, - {Sign::POS, -136, 0xd13a7f7c'07506f7d'b43caf8e'7b891066_u128}, - {Sign::POS, -136, 0xd2fa82b3'6a610c4f'597fb13f'0d0fdf19_u128}, - {Sign::POS, -136, 0xd4ba8cf8'3dd2a06b'3c21f1c6'0a60b0d6_u128}, - {Sign::POS, -136, 0xd67a9e4a'ba7d7ce5'2b745590'9a0428a4_u128}, - {Sign::POS, -136, 0xd83ab6ab'193ca223'1438b605'73d2da10_u128}, - {Sign::POS, -136, 0xd9fad619'92edc008'49f86400'c5ab2b11_u128}, - {Sign::POS, -136, 0xdbbafc96'60713620'd3c313d1'48a23c35_u128}, - {Sign::POS, -136, 0xdd7b2a21'baaa13cc'bc568523'55e0f0d5_u128}, + {Sign::NEG, -137, MType({0x7f1ce002fa34131b, 0xdeca729013cd7c31})}, + {Sign::NEG, -137, MType({0x639afa085dd8b4c7, 0xdb5475b44946d986})}, + {Sign::NEG, -137, MType({0x5512632fe9a58cb, 0xd7de6b0e10cab7d2})}, + {Sign::NEG, -137, MType({0xb5380a9953117d07, 0xd468529cfc6fb395})}, + {Sign::NEG, -137, MType({0x70af2d7d53be1f31, 0xd0f22c609e474741})}, + {Sign::NEG, -137, MType({0xccd499c49b74cc2, 0xcd7bf858885dcae2})}, + {Sign::NEG, -137, MType({0x5b51ddc3987ebfb8, 0xca05b6844cba73cf})}, + {Sign::NEG, -137, MType({0x49375f5189b3782b, 0xc68f66e37d5f545a})}, + {Sign::NEG, -137, MType({0xf6e57738865c712f, 0xc3190975ac495b7a})}, + {Sign::NEG, -137, MType({0xca02b10a8c712acd, 0xbfa29e3a6b70547e})}, + {Sign::NEG, -137, MType({0x78e5038210208151, 0xbc2c25314cc6e6b6})}, + {Sign::NEG, -137, MType({0xfa099ecd71ee0ea, 0xb8b59e59e23a9524})}, + {Sign::NEG, -137, MType({0xeeb445ccb8fb09ed, 0xb53f09b3bdb3be28})}, + {Sign::NEG, -137, MType({0xc352fff18a1c02fb, 0xb1c8673e71159b33})}, + {Sign::NEG, -137, MType({0x7949e03ecf9b390b, 0xae51b6f98e3e406e})}, + {Sign::NEG, -137, MType({0x2681f33f30aadedc, 0xaadaf8e4a7069c6c})}, + {Sign::NEG, -137, MType({0xf01d5496eea213b3, 0xa7642cff4d4277d6})}, + {Sign::NEG, -137, MType({0xe92ef555ff1de975, 0xa3ed534912c0751d})}, + {Sign::NEG, -137, MType({0xeb0c7519b3e7c1e0, 0xa0766bc1894a1022})}, + {Sign::NEG, -137, MType({0xf60d204ff0fe5296, 0x9c21b6e91e7f03a3})}, + {Sign::NEG, -137, MType({0x125c19a4f057c18b, 0x98aab0491050bea8})}, + {Sign::NEG, -137, MType({0x7e9383ce1bdf9575, 0x95339bd64cd953e7})}, + {Sign::NEG, -137, MType({0xbf274f4d8f770253, 0x91bc799065cc57d6})}, + {Sign::NEG, -137, MType({0x656bd9b758fe44ba, 0x8e454976ecd836ad})}, + {Sign::NEG, -137, MType({0xbfdd2c7f388fc014, 0x8ace0b8973a63413})}, + {Sign::NEG, -137, MType({0x83fbf6ed936c493a, 0x8756bfc78bda6ad0})}, + {Sign::NEG, -137, MType({0x71bfa9a18bec01cc, 0x83df6630c713cc76})}, + {Sign::NEG, -137, MType({0xf09d19f56dbfef72, 0x8067fec4b6ec2111})}, + {Sign::NEG, -138, MType({0x4c422713b1642228, 0xf9e11305d9f00dad})}, + {Sign::NEG, -138, MType({0xc3c7c5699b7a0a4, 0xf2f20cd5f58de39a})}, + {Sign::NEG, -138, MType({0xb8db7c69e3fa0797, 0xec02eaf8e3c656ff})}, + {Sign::NEG, -138, MType({0xa083eb05506ff7ed, 0xe513ad6dc7a3a553})}, + {Sign::NEG, -138, MType({0xc21595e745f1fa15, 0xde245433c425b5c5})}, + {Sign::NEG, -138, MType({0xb9d5bcdbfe719389, 0xd734df49fc42189b})}, + {Sign::NEG, -138, MType({0xa17a1e85e93461f4, 0xd0454eaf92e4068b})}, + {Sign::NEG, -138, MType({0xe3537584da333fda, 0xc955a263aaec6016})}, + {Sign::NEG, -138, MType({0x963177f24682c2, 0xc265da656731ace5})}, + {Sign::NEG, -138, MType({0x4ac037347bcfc50e, 0xbb75f6b3ea801b1e})}, + {Sign::NEG, -138, MType({0x901a736a4364cdfd, 0xb485f74e57997ec6})}, + {Sign::NEG, -138, MType({0xbb550acc3b9d7247, 0xad95dc33d1355117})}, + {Sign::NEG, -138, MType({0x663cf2b27e8f1ffb, 0xa6a5a5637a00afdc})}, + {Sign::NEG, -138, MType({0x5f89bd08feb39952, 0x9fb552dc749e5cca})}, + {Sign::NEG, -138, MType({0x23c2623c73f494db, 0x98c4e49de3a6bcdd})}, + {Sign::NEG, -138, MType({0x4937d3b5485af61e, 0x91d45aa6e9a7d7b0})}, + {Sign::NEG, -138, MType({0xdf14214e7a6d8111, 0x8ae3b4f6a92556d9})}, + {Sign::NEG, -138, MType({0xbf7cfc14999fb4bc, 0x83f2f38c44988544})}, + {Sign::NEG, -139, MType({0xa990c0ee569a8d51, 0xfa042ccdbce09d15})}, + {Sign::NEG, -139, MType({0xa38463e9d941e1c2, 0xec223b0b32227c9e})}, + {Sign::NEG, -139, MType({0xba0324530edaa03f, 0xde4011cf2daaff31})}, + {Sign::NEG, -139, MType({0x5e997a02dad7ace7, 0xd05db117f419b857})}, + {Sign::NEG, -139, MType({0x4a14676d4d0f817e, 0xc27b18e3c9f977c7})}, + {Sign::NEG, -139, MType({0x857c002ee7a1e473, 0xb4984930f3c0481c})}, + {Sign::NEG, -139, MType({0x5923b2eb72d8012a, 0xa6b541fdb5cf6d89})}, + {Sign::NEG, -139, MType({0x21cde8f85ca1f9fd, 0x98d203485473648b})}, + {Sign::NEG, -139, MType({0xbe08e08b1d212d4, 0x8aee8d0f13e3e09e})}, + {Sign::NEG, -140, MType({0x695023998e6bd7b0, 0xfa15bea0708795e1})}, + {Sign::NEG, -140, MType({0x634cea6750617a92, 0xde4df4140b42822f})}, + {Sign::NEG, -140, MType({0xfbd7e970aef9dbb8, 0xc285ba757feb2781})}, + {Sign::NEG, -140, MType({0x9aedc1c1ba7d0695, 0xa6bd11c1564a8ace})}, + {Sign::NEG, -140, MType({0x8d306ba207233c44, 0x8af3f9f41600120a})}, + {Sign::NEG, -141, MType({0x856a0a3a00fcf3c1, 0xde54e6148d030322})}, + {Sign::NEG, -141, MType({0xb3a2c1407cf6d38d, 0xa6c0fa00de35f314})}, + {Sign::NEG, -142, MType({0xd791cf6a70c3a504, 0xde585f4c5bbbcd3d})}, + {Sign::NEG, -143, MType({0x10a633f2c4a8ea22, 0xde5a1bf627b1f68f})}, + {Sign::NEG, 0, MType({0x0, 0x0})}, + {Sign::POS, -143, MType({0xed4a68e5e6e83ddf, 0xde5d95658a729eab})}, + {Sign::POS, -142, MType({0x3281f1872cdbee94, 0xde5f522b21e3e25a})}, + {Sign::POS, -141, MType({0xf1466edaa96e356e, 0xa6c8cb3b7e5bbbfd})}, + {Sign::POS, -141, MType({0x8a607fd695dfc3d9, 0xde62cbd21e895473})}, + {Sign::POS, -140, MType({0xc36b8713ceefe2de, 0x8afed57032bebc7c})}, + {Sign::POS, -140, MType({0x5c2e76c953e3e3e6, 0xa6ccb436a3c72fa4})}, + {Sign::POS, -140, MType({0x8e4950fa5c943bbf, 0xc29b023fdcb2dccf})}, + {Sign::POS, -140, MType({0x20fa8a73c585f634, 0xde69bf8f58005dfc})}, + {Sign::POS, -140, MType({0xaa106d9b0a9717a, 0xfa38ec28905810a3})}, + {Sign::POS, -139, MType({0x85d70e032de41aec, 0x8b04440780460c2a})}, + {Sign::POS, -139, MType({0xbeee21cbb82a9a78, 0x98ec49a311cc30ab})}, + {Sign::POS, -139, MType({0xabd7b0fdd8efe6f6, 0xa6d486e8ba5151a0})}, + {Sign::POS, -139, MType({0x3221c56e2c1aa912, 0xb4bcfbda377d31cc})}, + {Sign::POS, -139, MType({0x57b795a36d9c5f19, 0xc2a5a879470c7c37})}, + {Sign::POS, -139, MType({0x131ec142c053ac3b, 0xd08e8cc7a6d0c580})}, + {Sign::POS, -139, MType({0x35e3298f4bb2aa0a, 0xde77a8c714b08d28})}, + {Sign::POS, -139, MType({0x7133dafdfc44f160, 0xec60fc794ea73ee4})}, + {Sign::POS, -139, MType({0x74b37d23121c59d5, 0xfa4a87e012c533eb})}, + {Sign::POS, -138, MType({0x93bf5f4207da8a4c, 0x841a257e8f97da22})}, + {Sign::POS, -138, MType({0xfdb5990ec6057f4e, 0x8b0f22e919107c0c})}, + {Sign::POS, -138, MType({0x2d408a58b1b202fe, 0x92043c3084f41481})}, + {Sign::POS, -138, MType({0x1759381b61dfbf01, 0x98f97155b274b1ab})}, + {Sign::POS, -138, MType({0x41e90a054df4b9f1, 0x9feec25980cedbbe})}, + {Sign::POS, -138, MType({0xa1e66c6203725d50, 0xa6e42f3ccf49959d})}, + {Sign::POS, -138, MType({0x8693d36ab45bd7ce, 0xadd9b8007d365d83})}, + {Sign::POS, -138, MType({0x91e25bb40ad3f098, 0xb4cf5ca569f12da9})}, + {Sign::POS, -138, MType({0xbdf94392c4cc7f6c, 0xbbc51d2c74e07cf0})}, + {Sign::POS, -138, MType({0x6fe37973354a82f9, 0xc2baf9967d753f89})}, + {Sign::POS, -138, MType({0x97647b4267bfd801, 0xc9b0f1e4632ae79b})}, + {Sign::POS, -138, MType({0xdbf5c32a454f7bdf, 0xd0a70617058765ee})}, + {Sign::POS, -138, MType({0xd6edfe04c37ba916, 0xd79d362f441b2a92})}, + {Sign::POS, -138, MType({0x5ad3480ccfbe9890, 0xde93822dfe812587})}, + {Sign::POS, -138, MType({0xc7d9ac765be7e325, 0xe589ea14145ec764})}, + {Sign::POS, -138, MType({0x6d8f24b9a3ca011b, 0xec806de265640204})}, + {Sign::POS, -138, MType({0xf9b654807dcdd5b2, 0xf3770d99d14b4928})}, + {Sign::POS, -138, MType({0xf4513f4745663028, 0xfa6dc93b37d99326})}, + {Sign::POS, -137, MType({0xa46e9a72d80da75f, 0x80b25063bc6f2cc6})}, + {Sign::POS, -137, MType({0xee60992b51ffac4b, 0x842dca1fba19cce6})}, + {Sign::POS, -137, MType({0x1977fa1c786886b3, 0x87a951d204deeaf3})}, + {Sign::POS, -137, MType({0xe5f7c52cdf119d5, 0x8b24e77b0cb60a84})}, + {Sign::POS, -137, MType({0x3bf9d70da1021a10, 0x8ea08b1b419bf221})}, + {Sign::POS, -137, MType({0xfd0406b07523b8e6, 0x921c3cb31392ab94})}, + {Sign::POS, -137, MType({0x453ee32c020f2a8, 0x9597fc42f2a18441})}, + {Sign::POS, -137, MType({0xcfb3ec22066bf7f6, 0x9913c9cb4ed50d72})}, + {Sign::POS, -137, MType({0x215c025bd493ecf9, 0x9c8fa54c983f1cb8})}, + {Sign::POS, -137, MType({0x39c116b7ee3a83ec, 0x9f2c93192e68232b})}, + {Sign::POS, -137, MType({0xf41f4b3ede2782f0, 0xa2a8870f24ac5f66})}, + {Sign::POS, -137, MType({0x61196927723eb75c, 0xa62488ff3c735799})}, + {Sign::POS, -137, MType({0xe615e836cb1edab, 0xa9a098e9e5e2a432})}, + {Sign::POS, -137, MType({0x6981331c5fc71cfc, 0xad1cb6cf91252372})}, + {Sign::POS, -137, MType({0x5f6a4faa054f11fa, 0xb098e2b0ae6af9c2})}, + {Sign::POS, -137, MType({0x2a68bc681a74c28, 0xb4151c8dade99205})}, + {Sign::POS, -137, MType({0x382ba24d90566403, 0xb7916466ffdb9ded})}, + {Sign::POS, -137, MType({0x6ad1abe51dd22e00, 0xbb0dba3d14811652})}, + {Sign::POS, -137, MType({0x456d3f7f59b13960, 0xbe8a1e105c1f3b85})}, + {Sign::POS, -137, MType({0x738dd8b7d66e9058, 0xc2068fe1470095a4})}, + {Sign::POS, -137, MType({0x68e123fed7ff11c6, 0xc5830fb04574f4f1})}, + {Sign::POS, -137, MType({0x2f3bd09780c3aa11, 0xc8ff9d7dc7d17225})}, + {Sign::POS, -137, MType({0x3b48887f1ce36935, 0xcc7c394a3e706ec5})}, + {Sign::POS, -137, MType({0x47ddae655ecc4633, 0xcff8e31619b19578})}, + {Sign::POS, -137, MType({0x37fa81eef4819c88, 0xd3759ae1c9f9da5b})}, + {Sign::POS, -137, MType({0xff6c4a8d747c65ed, 0xd6f260adbfb37b55})}, + {Sign::POS, -137, MType({0x921c29493a33318c, 0xda6f347a6b4e0070})}, + {Sign::POS, -137, MType({0xda0631eb65e731d8, 0xddec16483d3e3c27})}, + {Sign::POS, -137, MType({0xb3da6c07d110babc, 0xe1690617a5fe4bc2})}, + {Sign::POS, -137, MType({0xf2485c7868b8835a, 0xe4e603e9160d97a6})}, + {Sign::POS, -137, MType({0x67f5b7ed01344055, 0xe8630fbcfdf0d3ae})}, + {Sign::POS, -137, MType({0xf820df445b1d0622, 0xebe02993ce31ff7b})}, + {Sign::POS, -137, MType({0xadefc674b7eca5cd, 0xef5d516df76066d0})}, + {Sign::POS, -137, MType({0xda6be6dc057d3235, 0xf2da874bea10a1e0})}, + {Sign::POS, -137, MType({0x392bdde152ab5ff5, 0xf657cb2e16dc95a9})}, + {Sign::POS, -137, MType({0x1bab58e2ec99cf73, 0xf9d51d14ee637444})}, + {Sign::POS, -137, MType({0x9b51ef7e3388d692, 0xfd527d00e149bd3e})}, + {Sign::POS, -136, MType({0xe914c6a7f3f22fa2, 0x8067f579301c9ef6})}, + {Sign::POS, -136, MType({0xd22862eb2081c94, 0x8226b374edf088e2})}, + {Sign::POS, -136, MType({0x29ebd0b476cd8fd8, 0x83e57873e27ad153})}, + {Sign::POS, -136, MType({0x98feddc2806d01ed, 0x85a44476461854a0})}, + {Sign::POS, -136, MType({0x471bfc261a401854, 0x8763177c512896af})}, + {Sign::POS, -136, MType({0xb6f89c19b4cd1acd, 0x88b23a5b61430a16})}, + {Sign::POS, -136, MType({0xb39aaf34163fb099, 0x8a7119a85909ebe9})}, + {Sign::POS, -136, MType({0x1665f0f821541c36, 0x8c2ffff99357e887})}, + {Sign::POS, -136, MType({0xa5051754e049c1cb, 0x8deeed4f489679a6})}, + {Sign::POS, -136, MType({0x8c5a9a1c57b2e986, 0x8fade1a9b131c159})}, + {Sign::POS, -136, MType({0x1d8448438a26a9ae, 0x916cdd0905988a35})}, + {Sign::POS, -136, MType({0x8e3a0913ecd2fd02, 0x932bdf6d7e3c477d})}, + {Sign::POS, -136, MType({0xbc881a45f47f1d36, 0x94eae8d753911550})}, + {Sign::POS, -136, MType({0xf5e51c05499b06d0, 0x96a9f946be0db8d0})}, + {Sign::POS, -136, MType({0xc1a43be81a243fde, 0x986910bbf62ba04f})}, + {Sign::POS, -136, MType({0xaec3cfebe971beb7, 0x9a282f373466e378})}, + {Sign::POS, -136, MType({0x2518b29328614989, 0x9be754b8b13e437c})}, + {Sign::POS, -136, MType({0x39d6b147cbe803a4, 0x9da68140a5332b3a})}, + {Sign::POS, -136, MType({0x87765e3004ae428d, 0x9f65b4cf48c9af6d})}, + {Sign::POS, -136, MType({0x8f896ab28245bac, 0xa124ef64d4888ed6})}, + {Sign::POS, -136, MType({0xf8880fb5ca630c87, 0xa2e4310180f93263})}, + {Sign::POS, -136, MType({0xb179397cf82e935c, 0xa4a379a586a7ad62})}, + {Sign::POS, -136, MType({0x95a8cb717197ad81, 0xa662c9511e22bda3})}, + {Sign::POS, -136, MType({0xf6394a34b7f9a4a4, 0xa82220047ffbcba8})}, + {Sign::POS, -136, MType({0xffafd8c2b57884e8, 0xa9e17dbfe4c6ead0})}, + {Sign::POS, -136, MType({0xa970a643b8a6ac2b, 0xaba0e283851ad980})}, + {Sign::POS, -136, MType({0xa89b49fb749d47e0, 0xad604e4f9991014e})}, + {Sign::POS, -136, MType({0x66475ed2ac983305, 0xaf1fc1245ac5772e})}, + {Sign::POS, -136, MType({0xb4fd6209364bb36f, 0xb06f5be1bf1918e7})}, + {Sign::POS, -136, MType({0x8b5ce79b0965962a, 0xb22edb0636da31d6})}, + {Sign::POS, -136, MType({0x6724232b07396427, 0xb3ee6133f7149769})}, + {Sign::POS, -136, MType({0x2f02b14dcad8a49c, 0xb5adee6b386e62ae})}, + {Sign::POS, -136, MType({0xbd6443a81f792e07, 0xb76d82ac339058db})}, + {Sign::POS, -136, MType({0xea1cd9625749939a, 0xb92d1df72125eb7c})}, + {Sign::POS, -136, MType({0x97775e3142198913, 0xbaecc04c39dd389b})}, + {Sign::POS, -136, MType({0xc2a701b809a2bc39, 0xbcac69abb6670aeb})}, + {Sign::POS, -136, MType({0x979b990f39e662e3, 0xbe6c1a15cf76d9f6})}, + {Sign::POS, -136, MType({0x88395c463ddd82b2, 0xc02bd18abdc2ca45})}, + {Sign::POS, -136, MType({0x66f451bd9ba5ed05, 0xc1eb900aba03ad8d})}, + {Sign::POS, -136, MType({0x84cfb9413f6437a6, 0xc3ab5595fcf502d9})}, + {Sign::POS, -136, MType({0xd2c1c8d32943ca42, 0xc56b222cbf54f6b6})}, + {Sign::POS, -136, MType({0x67c0d1fd95192e6, 0xc72af5cf39e4635f})}, + {Sign::POS, -136, MType({0xc298bf9edb6441f2, 0xc8ead07da566d0e3})}, + {Sign::POS, -136, MType({0xc22d646addde3910, 0xcaaab2383aa27559})}, + {Sign::POS, -136, MType({0x7c301e5c7d1ca40, 0xcc6a9aff32603504})}, + {Sign::POS, -136, MType({0xfb444464df02505, 0xce2a8ad2c56ba27f})}, + {Sign::POS, -136, MType({0x5f1df3591ae898f, 0xcfea81b32c92feec})}, + {Sign::POS, -136, MType({0xb43caf8e7b891066, 0xd13a7f7c07506f7d})}, + {Sign::POS, -136, MType({0x597fb13f0d0fdf19, 0xd2fa82b36a610c4f})}, + {Sign::POS, -136, MType({0x3c21f1c60a60b0d6, 0xd4ba8cf83dd2a06b})}, + {Sign::POS, -136, MType({0x2b7455909a0428a4, 0xd67a9e4aba7d7ce5})}, + {Sign::POS, -136, MType({0x1438b60573d2da10, 0xd83ab6ab193ca223})}, + {Sign::POS, -136, MType({0x49f86400c5ab2b11, 0xd9fad61992edc008})}, + {Sign::POS, -136, MType({0xd3c313d148a23c35, 0xdbbafc9660713620})}, + {Sign::POS, -136, MType({0xbc56852355e0f0d5, 0xdd7b2a21baaa13cc})}, }, // -log10(r) for the third step, generated by SageMath with: // // for i in range(-80, 81): // r = 2^-21 * round( 2^21 / (1 + i*2^(-21)) ); // s, m, e = RealField(128)(r).log10().sign_mantissa_exponent(); - // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, "," , - // format_hex(m), "},"); + // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_3 = */ { - {Sign::NEG, -143, 0x8af8b9b3'22ba8c7d'54d7e498'98ca0093_u128}, - {Sign::NEG, -143, 0x893c0652'9deffc3d'c321bbf1'6665f29c_u128}, - {Sign::NEG, -143, 0x877f52e4'33ac7ec4'8246df71'40c3e4ae_u128}, - {Sign::NEG, -143, 0x85c29f67'e3ef35bc'1deaa9e8'5780e4c1_u128}, - {Sign::NEG, -143, 0x8405ebdd'aeb742cf'0cd8a512'1a9162d0_u128}, - {Sign::NEG, -143, 0x82493845'9403c7a7'b10486fa'4644308d_u128}, - {Sign::NEG, -143, 0x808c849f'93d3e5f0'578a2f61'eedd4be8_u128}, - {Sign::NEG, -144, 0xfd9fa1d7'5c4d7ea6'715b4a49'1790e8a7_u128}, - {Sign::NEG, -144, 0xfa263a53'c5f6eaf4'efb6273a'04c71573_u128}, - {Sign::NEG, -144, 0xf6acd2b4'64a25420'474d9015'60c17807_u128}, - {Sign::NEG, -144, 0xf3336af9'384dfd7c'6b9a5dec'eb80ec57_u128}, - {Sign::NEG, -144, 0xefba0322'40f82a5d'2665a32f'7cc64f79_u128}, - {Sign::NEG, -144, 0xec409b2f'7e9f1e16'17c8a673'16659363_u128}, - {Sign::NEG, -144, 0xe8c73320'f1411bfa'b62cdd3e'f5c8673d_u128}, - {Sign::NEG, -144, 0xe54dcaf6'98dc675e'4e4be6d5'a4a07422_u128}, - {Sign::NEG, -144, 0xe1d462b0'756f4394'032f86ff'08c92e22_u128}, - {Sign::NEG, -144, 0xde5afa4e'86f7f3ee'ce31a0d2'7359396f_u128}, - {Sign::NEG, -144, 0xdae191d0'cd74bbc1'7efc3180'aee36373_u128}, - {Sign::NEG, -144, 0xd7682937'48e3de5e'bb894b1e'0ce72fc4_u128}, - {Sign::NEG, -144, 0xd3eec081'f9439f19'00230f6c'7270f8be_u128}, - {Sign::NEG, -144, 0xd07557b0'de924142'9f63aaa5'63e9a399_u128}, - {Sign::NEG, -144, 0xccfbeec3'f8ce082d'c2354e44'1015e7eb_u128}, - {Sign::NEG, -144, 0xc98285bb'47f5372c'67d22bcf'5a452a4c_u128}, - {Sign::NEG, -144, 0xc6091c96'cc061190'65c46fa3'e3afea18_u128}, - {Sign::NEG, -144, 0xc28fb356'84fedaab'67e63bbe'1405c20d_u128}, - {Sign::NEG, -144, 0xbf1649fa'72ddd5ce'f061a284'212afbad_u128}, - {Sign::NEG, -144, 0xbb9ce082'95a1464c'57b0a190'1625b539_u128}, - {Sign::NEG, -144, 0xb82376ee'ed476f74'cc9d1c79'd93a9a1e_u128}, - {Sign::NEG, -144, 0xb4aa0d3f'79ce9499'5440d7a1'31392da8_u128}, - {Sign::NEG, -144, 0xb130a374'3b34f90a'ca0572f7'c9f7a7de_u128}, - {Sign::NEG, -144, 0xadb7398d'3178e019'dfa464cb'37fe6455_u128}, - {Sign::NEG, -144, 0xaa3dcf8a'5c988d17'1d26f48e'fb62e2e0_u128}, - {Sign::NEG, -144, 0xa6c4656b'bc924352'e0e635a6'81d259e2_u128}, - {Sign::NEG, -144, 0xa34afb31'5164461d'5f8b022f'27cbda35_u128}, - {Sign::NEG, -144, 0x9fd190db'1b0cd8c6'a40df5ca'390a0465_u128}, - {Sign::NEG, -144, 0x9c582669'198a3e9e'8fb76866'f01c4f2d_u128}, - {Sign::NEG, -144, 0x98debbdb'4cdabaf4'da1f690c'752fdeff_u128}, - {Sign::NEG, -144, 0x95655131'b4fc9119'112db8a3'dc07ee78_u128}, - {Sign::NEG, -144, 0x91ebe66c'51ee045a'9919c4c2'2125c79e_u128}, - {Sign::NEG, -144, 0x8e727b8b'23ad5808'ac6aa272'26204db3_u128}, - {Sign::NEG, -144, 0x8af9108e'2a38cf72'5bf708fe'ad2b1780_u128}, - {Sign::NEG, -144, 0x877fa575'658eade6'8ee54cbc'53cd19ed_u128}, - {Sign::NEG, -144, 0x84063a40'd5ad36b4'02ab59d3'8cc6e2c5_u128}, - {Sign::NEG, -144, 0x808ccef0'7a92ad29'4b0eaf0a'99286378_u128}, - {Sign::NEG, -145, 0xfa26c708'a87aa929'a448b11f'012c975c_u128}, - {Sign::NEG, -145, 0xf333eff8'c556e089'b0a1d584'117de73b_u128}, - {Sign::NEG, -145, 0xec4118b1'4bb6870e'e890f9fb'57fdabb6_u128}, - {Sign::NEG, -145, 0xe54e4132'3b962355'261d48c7'1e693130_u128}, - {Sign::NEG, -145, 0xde5b697b'94f23bf7'efecdd48'ed894c32_u128}, - {Sign::NEG, -145, 0xd768918d'57c75792'7944b995'7598a88a_u128}, - {Sign::NEG, -145, 0xd075b967'8411fcbf'a208bc08'75093645_u128}, - {Sign::NEG, -145, 0xc982e10a'19ceb219'f6bb94d8'9da8b432_u128}, - {Sign::NEG, -145, 0xc2900875'18f9fe3b'b07ebbab'782457b0_u128}, - {Sign::NEG, -145, 0xbb9d2fa8'819067be'b5126529'45eb9165_u128}, - {Sign::NEG, -145, 0xb4aa56a4'538e753c'96d57890'e171eea5_u128}, - {Sign::NEG, -145, 0xadb77d68'8ef0ad4e'94c5854b'9cd01726_u128}, - {Sign::NEG, -145, 0xa6c4a3f5'33b3968d'9a7eb881'1ec3e6bb_u128}, - {Sign::NEG, -145, 0x9fd1ca4a'41d3b792'403bd2ab'3e0fa2d7_u128}, - {Sign::NEG, -145, 0x98def067'b94d96f4'cad61d29'db384b6b_u128}, - {Sign::NEG, -145, 0x91ec164d'9a1dbb4d'2bc55fd6'b8a306ec_u128}, - {Sign::NEG, -145, 0x8af93bfb'e440ab33'011fd699'5111a927_u128}, - {Sign::NEG, -145, 0x84066172'97b2ed3d'959a26fa'ac7e5494_u128}, - {Sign::NEG, -146, 0xfa270d63'68e21007'c10eab72'66ac6bc0_u128}, - {Sign::NEG, -146, 0xec415772'74ef0439'0bb178b9'0026b2b2_u128}, - {Sign::NEG, -146, 0xde5ba112'5385c43b'ac3bfd92'5e6b33e1_u128}, - {Sign::NEG, -146, 0xd075ea43'049f5d3b'9d0a01a9'5b355319_u128}, - {Sign::NEG, -146, 0xc2903304'8834dc64'31b3b7b2'0a6a6496_u128}, - {Sign::NEG, -146, 0xb4aa7b56'de3f4ee0'170da891'504620f4_u128}, - {Sign::NEG, -146, 0xa6c4c33a'06b7c1d9'53289e84'744549cb_u128}, - {Sign::NEG, -146, 0x98df0aae'01974279'45519048'b0ce7e7f_u128}, - {Sign::NEG, -146, 0x8af951b2'ced6dde8'a6118c42'bf99407e_u128}, - {Sign::NEG, -147, 0xfa273090'dcdf429f'0e5b474c'c5a64cf6_u128}, - {Sign::NEG, -147, 0xde5bbcdd'c0b533aa'a74dab3b'd6067bc7_u128}, - {Sign::NEG, -147, 0xc290484c'4921a941'9f73f4e3'7357341b_u128}, - {Sign::NEG, -147, 0xa6c4d2dc'7616bdb0'31bf5d5f'815220e7_u128}, - {Sign::NEG, -147, 0x8af95c8e'47868b41'4b987ca5'fca242d7_u128}, - {Sign::NEG, -148, 0xde5bcac3'7ac6587d'19be3fab'd93832c5_u128}, - {Sign::NEG, -148, 0xa6c4daad'af3d75e0'8fd43f0c'9ce444d3_u128}, - {Sign::NEG, -149, 0xde5bd1b6'58ad4676'061cd853'e796bc2c_u128}, - {Sign::NEG, -150, 0xde5bd52f'c7d8545f'87d6afab'fba0644f_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -150, 0xde5bdc22'a69d9e19'a9bf3200'1043629d_u128}, - {Sign::POS, -149, 0xde5bdf9c'1637d9ef'8014f0f3'60272d82_u128}, - {Sign::POS, -148, 0xa6c4ea50'24795bd2'fe94a02f'c639c0e3_u128}, - {Sign::POS, -148, 0xde5be68e'f5db7f99'bee710a5'ace7c8d4_u128}, - {Sign::POS, -147, 0x8af97245'3faf11e8'1a778d81'00437e4f_u128}, - {Sign::POS, -147, 0xa6c4f221'608e89fe'97d773f8'992f7051_u128}, - {Sign::POS, -147, 0xc29072db'dd9a0dd5'0c9ee584'1a3afa95_u128}, - {Sign::POS, -147, 0xde5bf474'b6df8331'7b644b13'993cf4ef_u128}, - {Sign::POS, -147, 0xfa2776eb'ec6ccfdb'3448f66e'2bd7a0ca_u128}, - {Sign::POS, -146, 0x8af97d20'bf27eccd'6a7ca5f1'a87a1a3c_u128}, - {Sign::POS, -146, 0x98df3f3a'b64b431d'245675fe'3061108f_u128}, - {Sign::POS, -146, 0xa6c501c3'dba75dc2'64136e97'019d0a3b_u128}, - {Sign::POS, -146, 0xb4aac4bc'2f432fa3'6cdadac4'd6925bd4_u128}, - {Sign::POS, -146, 0xc2908823'b125aba7'2899e237'91d29632_u128}, - {Sign::POS, -146, 0xd0764bfa'6155c4b5'28039e1f'0323a4c1_u128}, - {Sign::POS, -146, 0xde5c1040'3fda6db5'a2912e03'afc8cc28_u128}, - {Sign::POS, -146, 0xec41d4f5'4cba9991'7681cc9f'9e0d89f9_u128}, - {Sign::POS, -146, 0xfa279a19'87fd3b32'28dae4b7'241255e1_u128}, - {Sign::POS, -145, 0x8406afd6'78d4a2c0'f2b412f8'dceda28e_u128}, - {Sign::POS, -145, 0x8af992d7'c4e2d5b5'bf5dccd9'67504857_u128}, - {Sign::POS, -145, 0x91ec7610'a82cafed'3716dbf9'50b07f85_u128}, - {Sign::POS, -145, 0x98df5981'22b5aadd'69eebe0b'8e5b18e1_u128}, - {Sign::POS, -145, 0x9fd23d29'34813ffc'bb583ce6'5af56beb_u128}, - {Sign::POS, -145, 0xa6c52108'dd92e8c1'e22978ef'a7a962a0_u128}, - {Sign::POS, -145, 0xadb80520'1dee1ea3'e89bf389'8ef27836_u128}, - {Sign::POS, -145, 0xb4aae96e'f5965b1a'2c4c997e'c90bab0b_u128}, - {Sign::POS, -145, 0xbb9dcdf5'648f179c'5e3bcd6f'21fe6224_u128}, - {Sign::POS, -145, 0xc290b2b3'6adbcda2'82cd723b'f1524680_u128}, - {Sign::POS, -145, 0xc98397a9'087ff6a4'f1c8f574'935e109b_u128}, - {Sign::POS, -145, 0xd0767cd6'3d7f0c1c'565959c2'e4394a59_u128}, - {Sign::POS, -145, 0xd769623b'09dc8781'af0d4157'bc4f05be_u128}, - {Sign::POS, -145, 0xde5c47d7'6d9be24e'4dd6f857'6e9188b8_u128}, - {Sign::POS, -145, 0xe54f2dab'68c095fb'd80c7f46'484eee3d_u128}, - {Sign::POS, -145, 0xec4213b6'fb4e1c04'46679575'12a6bd26_u128}, - {Sign::POS, -145, 0xf334f9fa'2547ede1'e505c36d'95a074fa_u128}, - {Sign::POS, -145, 0xfa27e074'e6b1850f'5368655f'1ce3110b_u128}, - {Sign::POS, -144, 0x808d6393'9fc72d83'c23a5ac5'7f06c112_u128}, - {Sign::POS, -144, 0x8406d708'97f0f4a2'df39eb58'90580f93_u128}, - {Sign::POS, -144, 0x87804a99'5bd7d4a2'cd896f3e'43f38669_u128}, - {Sign::POS, -144, 0x8af9be45'eb7d8a41'83b16ff7'eecace8c_u128}, - {Sign::POS, -144, 0x8e73320e'46e3d23d'21ec7ae8'ffa1531d_u128}, - {Sign::POS, -144, 0x91eca5f2'6e0c6953'f227268d'464ae907_u128}, - {Sign::POS, -144, 0x956619f2'60f90c44'680017af'3bbaf2d3_u128}, - {Sign::POS, -144, 0x98df8e0e'1fab77cd'20c8069e'4ae400de_u128}, - {Sign::POS, -144, 0x9c590245'aa2568ac'e381c465'1a67ee13_u128}, - {Sign::POS, -144, 0x9fd27699'00689ba2'a0e23fff'd718794e_u128}, - {Sign::POS, -144, 0xa34beb08'2276cd6d'73508b92'7f485b97_u128}, - {Sign::POS, -144, 0xa6c55f93'1051bacc'9ee5e19f'2eecdb55_u128}, - {Sign::POS, -144, 0xaa3ed439'c9fb207f'916daa3c'6c8fdc9d_u128}, - {Sign::POS, -144, 0xadb848fc'4f74bb45'e265804b'77126ed3_u128}, - {Sign::POS, -144, 0xb131bdda'a0c047df'52fd36ae'943fd7b4_u128}, - {Sign::POS, -144, 0xb4ab32d4'bddf830b'ce16dd7f'60311bf6_u128}, - {Sign::POS, -144, 0xb824a7ea'a6d4298b'6846c745'1d8105ac_u128}, - {Sign::POS, -144, 0xbb9e1d1c'5b9ff81e'5fd38e2b'0650a884_u128}, - {Sign::POS, -144, 0xbf179269'dc44ab85'1cb61936'9e1c641f_u128}, - {Sign::POS, -144, 0xc29107d3'28c40080'3099a17e'0461648c_u128}, - {Sign::POS, -144, 0xc60a7d58'411fb3d0'56dbb75e'4813a12b_u128}, - {Sign::POS, -144, 0xc983f2f9'25598236'748c47b1'bbe45a07_u128}, - {Sign::POS, -144, 0xccfd68b5'd5732873'986da106'4b5913e1_u128}, - {Sign::POS, -144, 0xd076de8e'516e6348'faf478d3'd0b31300_u128}, - {Sign::POS, -144, 0xd3f05482'994cef77'fe47f0b2'6ba754ff_u128}, - {Sign::POS, -144, 0xd769ca92'ad1089c2'2e419b90'd8e709b7_u128}, - {Sign::POS, -144, 0xdae340be'8cbaeee9'406d82ea'ca788b6f_u128}, - {Sign::POS, -144, 0xde5cb706'384ddbaf'140a2bff'40e0d670_u128}, - {Sign::POS, -144, 0xe1d62d69'afcb0cd5'b2089d06'e51d8034_u128}, - {Sign::POS, -144, 0xe54fa3e8'f3343f1f'4d0c626a'636f2e4f_u128}, - {Sign::POS, -144, 0xe8c91a84'028b2f4e'416b93f8'c6f48d30_u128}, - {Sign::POS, -144, 0xec42913a'ddd19a25'152eda1d'd615c6f5_u128}, - {Sign::POS, -144, 0xefbc080d'85093c66'78117318'6fc07a66_u128}, - {Sign::POS, -144, 0xf3357efb'f833d2d5'43813830'e974324d_u128}, - {Sign::POS, -144, 0xf6aef606'37531a34'7a9ea2ef'6e1f5d41_u128}, - {Sign::POS, -144, 0xfa286d2c'4268cf47'4a3cd252'5dccc623_u128}, - {Sign::POS, -144, 0xfda1e46e'1976aed1'08e19004'ae218d5d_u128}, - {Sign::POS, -143, 0x808dade5'de3f3aca'9b62aaca'25d5d18a_u128}, - {Sign::POS, -143, 0x824a69a2'95c0f02b'bee9a8d4'3e00613c_u128}, - {Sign::POS, -143, 0x8407256d'334155ed'd8d4b69c'2056f729_u128}, - {Sign::POS, -143, 0x85c3e145'b6c14a72'e7cc2860'5d7bb77e_u128}, - {Sign::POS, -143, 0x87809d2c'2041ac1c'ff51b4bd'c834a8f1_u128}, - {Sign::POS, -143, 0x893d5920'6fc3594e'47c0774a'a81c3561_u128}, - {Sign::POS, -143, 0x8afa1522'a5473068'fe4cf331'ecb9eb62_u128}, + {Sign::NEG, -143, MType({0x54d7e49898ca0093, 0x8af8b9b322ba8c7d})}, + {Sign::NEG, -143, MType({0xc321bbf16665f29c, 0x893c06529deffc3d})}, + {Sign::NEG, -143, MType({0x8246df7140c3e4ae, 0x877f52e433ac7ec4})}, + {Sign::NEG, -143, MType({0x1deaa9e85780e4c1, 0x85c29f67e3ef35bc})}, + {Sign::NEG, -143, MType({0xcd8a5121a9162d0, 0x8405ebddaeb742cf})}, + {Sign::NEG, -143, MType({0xb10486fa4644308d, 0x824938459403c7a7})}, + {Sign::NEG, -143, MType({0x578a2f61eedd4be8, 0x808c849f93d3e5f0})}, + {Sign::NEG, -144, MType({0x715b4a491790e8a7, 0xfd9fa1d75c4d7ea6})}, + {Sign::NEG, -144, MType({0xefb6273a04c71573, 0xfa263a53c5f6eaf4})}, + {Sign::NEG, -144, MType({0x474d901560c17807, 0xf6acd2b464a25420})}, + {Sign::NEG, -144, MType({0x6b9a5deceb80ec57, 0xf3336af9384dfd7c})}, + {Sign::NEG, -144, MType({0x2665a32f7cc64f79, 0xefba032240f82a5d})}, + {Sign::NEG, -144, MType({0x17c8a67316659363, 0xec409b2f7e9f1e16})}, + {Sign::NEG, -144, MType({0xb62cdd3ef5c8673d, 0xe8c73320f1411bfa})}, + {Sign::NEG, -144, MType({0x4e4be6d5a4a07422, 0xe54dcaf698dc675e})}, + {Sign::NEG, -144, MType({0x32f86ff08c92e22, 0xe1d462b0756f4394})}, + {Sign::NEG, -144, MType({0xce31a0d27359396f, 0xde5afa4e86f7f3ee})}, + {Sign::NEG, -144, MType({0x7efc3180aee36373, 0xdae191d0cd74bbc1})}, + {Sign::NEG, -144, MType({0xbb894b1e0ce72fc4, 0xd768293748e3de5e})}, + {Sign::NEG, -144, MType({0x230f6c7270f8be, 0xd3eec081f9439f19})}, + {Sign::NEG, -144, MType({0x9f63aaa563e9a399, 0xd07557b0de924142})}, + {Sign::NEG, -144, MType({0xc2354e441015e7eb, 0xccfbeec3f8ce082d})}, + {Sign::NEG, -144, MType({0x67d22bcf5a452a4c, 0xc98285bb47f5372c})}, + {Sign::NEG, -144, MType({0x65c46fa3e3afea18, 0xc6091c96cc061190})}, + {Sign::NEG, -144, MType({0x67e63bbe1405c20d, 0xc28fb35684fedaab})}, + {Sign::NEG, -144, MType({0xf061a284212afbad, 0xbf1649fa72ddd5ce})}, + {Sign::NEG, -144, MType({0x57b0a1901625b539, 0xbb9ce08295a1464c})}, + {Sign::NEG, -144, MType({0xcc9d1c79d93a9a1e, 0xb82376eeed476f74})}, + {Sign::NEG, -144, MType({0x5440d7a131392da8, 0xb4aa0d3f79ce9499})}, + {Sign::NEG, -144, MType({0xca0572f7c9f7a7de, 0xb130a3743b34f90a})}, + {Sign::NEG, -144, MType({0xdfa464cb37fe6455, 0xadb7398d3178e019})}, + {Sign::NEG, -144, MType({0x1d26f48efb62e2e0, 0xaa3dcf8a5c988d17})}, + {Sign::NEG, -144, MType({0xe0e635a681d259e2, 0xa6c4656bbc924352})}, + {Sign::NEG, -144, MType({0x5f8b022f27cbda35, 0xa34afb315164461d})}, + {Sign::NEG, -144, MType({0xa40df5ca390a0465, 0x9fd190db1b0cd8c6})}, + {Sign::NEG, -144, MType({0x8fb76866f01c4f2d, 0x9c582669198a3e9e})}, + {Sign::NEG, -144, MType({0xda1f690c752fdeff, 0x98debbdb4cdabaf4})}, + {Sign::NEG, -144, MType({0x112db8a3dc07ee78, 0x95655131b4fc9119})}, + {Sign::NEG, -144, MType({0x9919c4c22125c79e, 0x91ebe66c51ee045a})}, + {Sign::NEG, -144, MType({0xac6aa27226204db3, 0x8e727b8b23ad5808})}, + {Sign::NEG, -144, MType({0x5bf708fead2b1780, 0x8af9108e2a38cf72})}, + {Sign::NEG, -144, MType({0x8ee54cbc53cd19ed, 0x877fa575658eade6})}, + {Sign::NEG, -144, MType({0x2ab59d38cc6e2c5, 0x84063a40d5ad36b4})}, + {Sign::NEG, -144, MType({0x4b0eaf0a99286378, 0x808ccef07a92ad29})}, + {Sign::NEG, -145, MType({0xa448b11f012c975c, 0xfa26c708a87aa929})}, + {Sign::NEG, -145, MType({0xb0a1d584117de73b, 0xf333eff8c556e089})}, + {Sign::NEG, -145, MType({0xe890f9fb57fdabb6, 0xec4118b14bb6870e})}, + {Sign::NEG, -145, MType({0x261d48c71e693130, 0xe54e41323b962355})}, + {Sign::NEG, -145, MType({0xefecdd48ed894c32, 0xde5b697b94f23bf7})}, + {Sign::NEG, -145, MType({0x7944b9957598a88a, 0xd768918d57c75792})}, + {Sign::NEG, -145, MType({0xa208bc0875093645, 0xd075b9678411fcbf})}, + {Sign::NEG, -145, MType({0xf6bb94d89da8b432, 0xc982e10a19ceb219})}, + {Sign::NEG, -145, MType({0xb07ebbab782457b0, 0xc290087518f9fe3b})}, + {Sign::NEG, -145, MType({0xb512652945eb9165, 0xbb9d2fa8819067be})}, + {Sign::NEG, -145, MType({0x96d57890e171eea5, 0xb4aa56a4538e753c})}, + {Sign::NEG, -145, MType({0x94c5854b9cd01726, 0xadb77d688ef0ad4e})}, + {Sign::NEG, -145, MType({0x9a7eb8811ec3e6bb, 0xa6c4a3f533b3968d})}, + {Sign::NEG, -145, MType({0x403bd2ab3e0fa2d7, 0x9fd1ca4a41d3b792})}, + {Sign::NEG, -145, MType({0xcad61d29db384b6b, 0x98def067b94d96f4})}, + {Sign::NEG, -145, MType({0x2bc55fd6b8a306ec, 0x91ec164d9a1dbb4d})}, + {Sign::NEG, -145, MType({0x11fd6995111a927, 0x8af93bfbe440ab33})}, + {Sign::NEG, -145, MType({0x959a26faac7e5494, 0x8406617297b2ed3d})}, + {Sign::NEG, -146, MType({0xc10eab7266ac6bc0, 0xfa270d6368e21007})}, + {Sign::NEG, -146, MType({0xbb178b90026b2b2, 0xec41577274ef0439})}, + {Sign::NEG, -146, MType({0xac3bfd925e6b33e1, 0xde5ba1125385c43b})}, + {Sign::NEG, -146, MType({0x9d0a01a95b355319, 0xd075ea43049f5d3b})}, + {Sign::NEG, -146, MType({0x31b3b7b20a6a6496, 0xc29033048834dc64})}, + {Sign::NEG, -146, MType({0x170da891504620f4, 0xb4aa7b56de3f4ee0})}, + {Sign::NEG, -146, MType({0x53289e84744549cb, 0xa6c4c33a06b7c1d9})}, + {Sign::NEG, -146, MType({0x45519048b0ce7e7f, 0x98df0aae01974279})}, + {Sign::NEG, -146, MType({0xa6118c42bf99407e, 0x8af951b2ced6dde8})}, + {Sign::NEG, -147, MType({0xe5b474cc5a64cf6, 0xfa273090dcdf429f})}, + {Sign::NEG, -147, MType({0xa74dab3bd6067bc7, 0xde5bbcddc0b533aa})}, + {Sign::NEG, -147, MType({0x9f73f4e37357341b, 0xc290484c4921a941})}, + {Sign::NEG, -147, MType({0x31bf5d5f815220e7, 0xa6c4d2dc7616bdb0})}, + {Sign::NEG, -147, MType({0x4b987ca5fca242d7, 0x8af95c8e47868b41})}, + {Sign::NEG, -148, MType({0x19be3fabd93832c5, 0xde5bcac37ac6587d})}, + {Sign::NEG, -148, MType({0x8fd43f0c9ce444d3, 0xa6c4daadaf3d75e0})}, + {Sign::NEG, -149, MType({0x61cd853e796bc2c, 0xde5bd1b658ad4676})}, + {Sign::NEG, -150, MType({0x87d6afabfba0644f, 0xde5bd52fc7d8545f})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -150, MType({0xa9bf32001043629d, 0xde5bdc22a69d9e19})}, + {Sign::POS, -149, MType({0x8014f0f360272d82, 0xde5bdf9c1637d9ef})}, + {Sign::POS, -148, MType({0xfe94a02fc639c0e3, 0xa6c4ea5024795bd2})}, + {Sign::POS, -148, MType({0xbee710a5ace7c8d4, 0xde5be68ef5db7f99})}, + {Sign::POS, -147, MType({0x1a778d8100437e4f, 0x8af972453faf11e8})}, + {Sign::POS, -147, MType({0x97d773f8992f7051, 0xa6c4f221608e89fe})}, + {Sign::POS, -147, MType({0xc9ee5841a3afa95, 0xc29072dbdd9a0dd5})}, + {Sign::POS, -147, MType({0x7b644b13993cf4ef, 0xde5bf474b6df8331})}, + {Sign::POS, -147, MType({0x3448f66e2bd7a0ca, 0xfa2776ebec6ccfdb})}, + {Sign::POS, -146, MType({0x6a7ca5f1a87a1a3c, 0x8af97d20bf27eccd})}, + {Sign::POS, -146, MType({0x245675fe3061108f, 0x98df3f3ab64b431d})}, + {Sign::POS, -146, MType({0x64136e97019d0a3b, 0xa6c501c3dba75dc2})}, + {Sign::POS, -146, MType({0x6cdadac4d6925bd4, 0xb4aac4bc2f432fa3})}, + {Sign::POS, -146, MType({0x2899e23791d29632, 0xc2908823b125aba7})}, + {Sign::POS, -146, MType({0x28039e1f0323a4c1, 0xd0764bfa6155c4b5})}, + {Sign::POS, -146, MType({0xa2912e03afc8cc28, 0xde5c10403fda6db5})}, + {Sign::POS, -146, MType({0x7681cc9f9e0d89f9, 0xec41d4f54cba9991})}, + {Sign::POS, -146, MType({0x28dae4b7241255e1, 0xfa279a1987fd3b32})}, + {Sign::POS, -145, MType({0xf2b412f8dceda28e, 0x8406afd678d4a2c0})}, + {Sign::POS, -145, MType({0xbf5dccd967504857, 0x8af992d7c4e2d5b5})}, + {Sign::POS, -145, MType({0x3716dbf950b07f85, 0x91ec7610a82cafed})}, + {Sign::POS, -145, MType({0x69eebe0b8e5b18e1, 0x98df598122b5aadd})}, + {Sign::POS, -145, MType({0xbb583ce65af56beb, 0x9fd23d2934813ffc})}, + {Sign::POS, -145, MType({0xe22978efa7a962a0, 0xa6c52108dd92e8c1})}, + {Sign::POS, -145, MType({0xe89bf3898ef27836, 0xadb805201dee1ea3})}, + {Sign::POS, -145, MType({0x2c4c997ec90bab0b, 0xb4aae96ef5965b1a})}, + {Sign::POS, -145, MType({0x5e3bcd6f21fe6224, 0xbb9dcdf5648f179c})}, + {Sign::POS, -145, MType({0x82cd723bf1524680, 0xc290b2b36adbcda2})}, + {Sign::POS, -145, MType({0xf1c8f574935e109b, 0xc98397a9087ff6a4})}, + {Sign::POS, -145, MType({0x565959c2e4394a59, 0xd0767cd63d7f0c1c})}, + {Sign::POS, -145, MType({0xaf0d4157bc4f05be, 0xd769623b09dc8781})}, + {Sign::POS, -145, MType({0x4dd6f8576e9188b8, 0xde5c47d76d9be24e})}, + {Sign::POS, -145, MType({0xd80c7f46484eee3d, 0xe54f2dab68c095fb})}, + {Sign::POS, -145, MType({0x4667957512a6bd26, 0xec4213b6fb4e1c04})}, + {Sign::POS, -145, MType({0xe505c36d95a074fa, 0xf334f9fa2547ede1})}, + {Sign::POS, -145, MType({0x5368655f1ce3110b, 0xfa27e074e6b1850f})}, + {Sign::POS, -144, MType({0xc23a5ac57f06c112, 0x808d63939fc72d83})}, + {Sign::POS, -144, MType({0xdf39eb5890580f93, 0x8406d70897f0f4a2})}, + {Sign::POS, -144, MType({0xcd896f3e43f38669, 0x87804a995bd7d4a2})}, + {Sign::POS, -144, MType({0x83b16ff7eecace8c, 0x8af9be45eb7d8a41})}, + {Sign::POS, -144, MType({0x21ec7ae8ffa1531d, 0x8e73320e46e3d23d})}, + {Sign::POS, -144, MType({0xf227268d464ae907, 0x91eca5f26e0c6953})}, + {Sign::POS, -144, MType({0x680017af3bbaf2d3, 0x956619f260f90c44})}, + {Sign::POS, -144, MType({0x20c8069e4ae400de, 0x98df8e0e1fab77cd})}, + {Sign::POS, -144, MType({0xe381c4651a67ee13, 0x9c590245aa2568ac})}, + {Sign::POS, -144, MType({0xa0e23fffd718794e, 0x9fd2769900689ba2})}, + {Sign::POS, -144, MType({0x73508b927f485b97, 0xa34beb082276cd6d})}, + {Sign::POS, -144, MType({0x9ee5e19f2eecdb55, 0xa6c55f931051bacc})}, + {Sign::POS, -144, MType({0x916daa3c6c8fdc9d, 0xaa3ed439c9fb207f})}, + {Sign::POS, -144, MType({0xe265804b77126ed3, 0xadb848fc4f74bb45})}, + {Sign::POS, -144, MType({0x52fd36ae943fd7b4, 0xb131bddaa0c047df})}, + {Sign::POS, -144, MType({0xce16dd7f60311bf6, 0xb4ab32d4bddf830b})}, + {Sign::POS, -144, MType({0x6846c7451d8105ac, 0xb824a7eaa6d4298b})}, + {Sign::POS, -144, MType({0x5fd38e2b0650a884, 0xbb9e1d1c5b9ff81e})}, + {Sign::POS, -144, MType({0x1cb619369e1c641f, 0xbf179269dc44ab85})}, + {Sign::POS, -144, MType({0x3099a17e0461648c, 0xc29107d328c40080})}, + {Sign::POS, -144, MType({0x56dbb75e4813a12b, 0xc60a7d58411fb3d0})}, + {Sign::POS, -144, MType({0x748c47b1bbe45a07, 0xc983f2f925598236})}, + {Sign::POS, -144, MType({0x986da1064b5913e1, 0xccfd68b5d5732873})}, + {Sign::POS, -144, MType({0xfaf478d3d0b31300, 0xd076de8e516e6348})}, + {Sign::POS, -144, MType({0xfe47f0b26ba754ff, 0xd3f05482994cef77})}, + {Sign::POS, -144, MType({0x2e419b90d8e709b7, 0xd769ca92ad1089c2})}, + {Sign::POS, -144, MType({0x406d82eaca788b6f, 0xdae340be8cbaeee9})}, + {Sign::POS, -144, MType({0x140a2bff40e0d670, 0xde5cb706384ddbaf})}, + {Sign::POS, -144, MType({0xb2089d06e51d8034, 0xe1d62d69afcb0cd5})}, + {Sign::POS, -144, MType({0x4d0c626a636f2e4f, 0xe54fa3e8f3343f1f})}, + {Sign::POS, -144, MType({0x416b93f8c6f48d30, 0xe8c91a84028b2f4e})}, + {Sign::POS, -144, MType({0x152eda1dd615c6f5, 0xec42913addd19a25})}, + {Sign::POS, -144, MType({0x781173186fc07a66, 0xefbc080d85093c66})}, + {Sign::POS, -144, MType({0x43813830e974324d, 0xf3357efbf833d2d5})}, + {Sign::POS, -144, MType({0x7a9ea2ef6e1f5d41, 0xf6aef60637531a34})}, + {Sign::POS, -144, MType({0x4a3cd2525dccc623, 0xfa286d2c4268cf47})}, + {Sign::POS, -144, MType({0x8e19004ae218d5d, 0xfda1e46e1976aed1})}, + {Sign::POS, -143, MType({0x9b62aaca25d5d18a, 0x808dade5de3f3aca})}, + {Sign::POS, -143, MType({0xbee9a8d43e00613c, 0x824a69a295c0f02b})}, + {Sign::POS, -143, MType({0xd8d4b69c2056f729, 0x8407256d334155ed})}, + {Sign::POS, -143, MType({0xe7cc28605d7bb77e, 0x85c3e145b6c14a72})}, + {Sign::POS, -143, MType({0xff51b4bdc834a8f1, 0x87809d2c2041ac1c})}, + {Sign::POS, -143, MType({0x47c0774aa81c3561, 0x893d59206fc3594e})}, + {Sign::POS, -143, MType({0xfe4cf331ecb9eb62, 0x8afa1522a5473068})}, }, // -log10(r) for the fourth step, generated by SageMath with: // // for i in range(-65, 65): // r = 2^-28 * round( 2^28 / (1 + i*2^(-28)) ); // s, m, e = RealField(128)(r).log10().sign_mantissa_exponent(); - // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ",", - // format_hex(m), "},"); + // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_4 = */ { - {Sign::NEG, -151, 0xe1d54641'22cf95a4'e471a82b'bedbe0ae_u128}, - {Sign::NEG, -151, 0xde5bd6ec'7f7bc110'af6e93be'8e4c1764_u128}, - {Sign::NEG, -151, 0xdae26797'a490f80e'e44848f0'a5779499_u128}, - {Sign::NEG, -151, 0xd768f842'920f3a98'90205533'f4e70566_u128}, - {Sign::NEG, -151, 0xd3ef88ed'47f688a6'c01844ac'e3729e48_u128}, - {Sign::NEG, -151, 0xd0761997'c646e232'8151a232'4e41c7c4_u128}, - {Sign::NEG, -151, 0xccfcaa42'0d004734'e0edf74d'88cacafd_u128}, - {Sign::NEG, -151, 0xc9833aec'1c22b7a6'ec0ecc3a'5cd27e58_u128}, - {Sign::NEG, -151, 0xc609cb95'f3ae3381'afd5a7e7'0a6bf214_u128}, - {Sign::NEG, -151, 0xc2905c3f'93a2babe'39640ff4'47f81ceb_u128}, - {Sign::NEG, -151, 0xbf16ece8'fc004d55'95db88b5'422588b1_u128}, - {Sign::NEG, -151, 0xbb9d7d92'2cc6eb40'd25d952f'9beffeec_u128}, - {Sign::NEG, -151, 0xb8240e3b'25f69478'fc0bb71b'6ea03578_u128}, - {Sign::NEG, -151, 0xb4aa9ee3'e78f48f7'20076ee3'49cb7b20_u128}, - {Sign::NEG, -151, 0xb1312f8c'719108b4'4b723ba4'3353643d_u128}, - {Sign::NEG, -151, 0xadb7c034'c3fbd3a9'8b6d9b2d'a7657754_u128}, - {Sign::NEG, -151, 0xaa3e50dc'decfa9cf'ed1b0a01'987ad9b4_u128}, - {Sign::NEG, -151, 0xa6c4e184'c20c8b20'7d9c0354'6f57fc11_u128}, - {Sign::NEG, -151, 0xa34b722c'6db27794'4a12010d'0b0c4727_u128}, - {Sign::NEG, -151, 0x9fd202d3'e1c16f24'5f9e7bc4'c0f1c851_u128}, - {Sign::NEG, -151, 0x9c58937b'1e3971c9'cb62eac7'5cacde29_u128}, - {Sign::NEG, -151, 0x98df2422'231a7f7d'9a80c413'202be52a_u128}, - {Sign::NEG, -151, 0x9565b4c8'f0649838'da197c58'c3a6e445_u128}, - {Sign::NEG, -151, 0x91ec456f'8617bbf4'974e86fb'759f3988_u128}, - {Sign::NEG, -151, 0x8e72d615'e433eaa9'df415610'dadf46b3_u128}, - {Sign::NEG, -151, 0x8af966bc'0ab92451'bf135a61'0e7a1ddc_u128}, - {Sign::NEG, -151, 0x877ff761'f9a768e5'43e60366'a1cb2e09_u128}, - {Sign::NEG, -151, 0x84068807'b0feb85d'7adabf4e'9c75efce_u128}, - {Sign::NEG, -151, 0x808d18ad'30bf12b3'7112faf8'7c6591ee_u128}, - {Sign::NEG, -152, 0xfa2752a4'f1d0efc0'676043ec'6b994be5_u128}, - {Sign::NEG, -152, 0xf33473ef'12f5cfb9'9fa73d18'6649999d_u128}, - {Sign::NEG, -152, 0xec419538'c4ecc544'a53db362'aa5cc6f0_u128}, - {Sign::NEG, -152, 0xe54eb682'07b5d053'9266761d'e5e05f13_u128}, - {Sign::NEG, -152, 0xde5bd7ca'db50f0d8'81645201'b36e17ba_u128}, - {Sign::NEG, -152, 0xd768f913'3fbe26c5'8c7a112a'9a2b2a52_u128}, - {Sign::NEG, -152, 0xd0761a5b'34fd720c'cdea7b1a'0dc7ad42_u128}, - {Sign::NEG, -152, 0xc9833ba2'bb0ed2a0'5ff854b6'6e7ded1f_u128}, - {Sign::NEG, -152, 0xc2905ce9'd1f24872'5ce6604b'0911c5ed_u128}, - {Sign::NEG, -152, 0xbb9d7e30'79a7d374'def75d88'16cffc59_u128}, - {Sign::NEG, -152, 0xb4aa9f76'b22f739a'006e0982'bd8d96ef_u128}, - {Sign::NEG, -152, 0xadb7c0bc'7b8928d3'db8d1eb5'0fa7375c_u128}, - {Sign::NEG, -152, 0xa6c4e201'd5b4f314'8a9754fe'0c0073a7_u128}, - {Sign::NEG, -152, 0x9fd20346'c0b2d24e'27cf61a1'9e032f69_u128}, - {Sign::NEG, -152, 0x98df248b'3c82c672'cd77f748'9d9ef50b_u128}, - {Sign::NEG, -152, 0x91ec45cf'4924cf74'95d3c600'cf484f03_u128}, - {Sign::NEG, -152, 0x8af96712'e698ed45'9b257b3c'e3f82109_u128}, - {Sign::NEG, -152, 0x84068856'14df1fd7'f7afc1d4'792b015a_u128}, - {Sign::NEG, -153, 0xfa275331'a7eece3b'8b6a8408'31c123d8_u128}, - {Sign::NEG, -153, 0xec4195b6'47c38612'3ef142da'7335b35a_u128}, - {Sign::NEG, -153, 0xde5bd83a'093c6718'3e79062c'7cbb3b7d_u128}, - {Sign::NEG, -153, 0xd0761abc'ec597131'be870ed4'ed5b755b_u128}, - {Sign::NEG, -153, 0xc2905d3e'f11aa442'f3a09874'3d20fb64_u128}, - {Sign::NEG, -153, 0xb4aa9fc0'17800030'124ad974'bd15fbca_u128}, - {Sign::NEG, -153, 0xa6c4e240'5f8984dd'4f0b030a'9742eb00_u128}, - {Sign::NEG, -153, 0x98df24bf'c937322e'de664133'cead362d_u128}, - {Sign::NEG, -153, 0x8af9673e'54890808'f4e1bab8'3f55f5a1_u128}, - {Sign::NEG, -154, 0xfa275378'02fe0c9f'8e052253'3c713e98_u128}, - {Sign::NEG, -154, 0xde5bd871'a03259cf'129bc1c6'f293726e_u128}, - {Sign::NEG, -154, 0xc2905d69'80aef768'e0918216'6eeb17eb_u128}, - {Sign::NEG, -154, 0xa6c4e25f'a473e535'60f08720'313daa3f_u128}, - {Sign::NEG, -154, 0x8af96754'0b8122fc'fcc2ea56'6b3af38b_u128}, - {Sign::NEG, -155, 0xde5bd88d'6bad6110'3a25757e'00f4e3a0_u128}, - {Sign::NEG, -155, 0xa6c4e26f'46e91b3e'55d3f9e7'0cf177b8_u128}, - {Sign::NEG, -156, 0xde5bd89b'516ae82a'3d4aac85'125398d0_u128}, - {Sign::NEG, -157, 0xde5bd8a2'4449ac95'9ab5a849'a06f400d_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -157, 0xde5bd8b0'2a073729'0d3cc88f'd4ef34c2_u128}, - {Sign::POS, -156, 0xde5bd8b7'1ce5fd51'225916c2'b3f33c90_u128}, - {Sign::POS, -155, 0xa6c4e28e'8bd3930a'17847f98'acf08d54_u128}, - {Sign::POS, -155, 0xde5bd8c5'02a38b5e'04439783'0931fddd_u128}, - {Sign::POS, -154, 0x8af9677f'79717409'c2ab3859'13176984_u128}, - {Sign::POS, -154, 0xa6c4e29e'2e48d4cc'e454dec8'2bde52e5_u128}, - {Sign::POS, -154, 0xc2905dbe'9fd7e82f'fe1522b0'470d7d7f_u128}, - {Sign::POS, -154, 0xde5bd8e0'ce1eae6a'a6e2721f'2afc3cce_u128}, - {Sign::POS, -154, 0xfa275404'b91d27b4'75b3458e'ec3c106c_u128}, - {Sign::POS, -153, 0x8af96795'3069aa22'80bf0ff2'f6cd9f93_u128}, - {Sign::POS, -153, 0x98df2528'e2a09a29'f09cc73b'7013b906_u128}, - {Sign::POS, -153, 0xa6c4e2bd'7333640c'55ee1480'619827c4_u128}, - {Sign::POS, -153, 0xb4aaa052'e22207e5'7c2e48d7'72250b3c_u128}, - {Sign::POS, -153, 0xc2905de9'2f6c85d1'2ed8ba8c'6fa81c98_u128}, - {Sign::POS, -153, 0xd0761b80'5b12ddeb'3968c521'4f33fc4f_u128}, - {Sign::POS, -153, 0xde5bd918'6515104f'6759c94e'2d017fad_u128}, - {Sign::POS, -153, 0xec4196b1'4d731d19'84272d01'4c70fe58_u128}, - {Sign::POS, -153, 0xfa27544b'142d0465'5b4c5b5f'180b9fe1_u128}, - {Sign::POS, -152, 0x840688f2'dca16327'5c226261'10c254a4_u128}, - {Sign::POS, -152, 0x8af967c0'9e5a3178'b345ef5d'90dd6545_u128}, - {Sign::POS, -152, 0x91ec468e'cf40ed34'98ce9208'7c5cb614_u128}, - {Sign::POS, -152, 0x98df255d'6f559668'f27a0a60'56dcfe57_u128}, - {Sign::POS, -152, 0x9fd2042c'7e982d23'a6061afe'b7929f24_u128}, - {Sign::POS, -152, 0xa6c4e2fb'fd08b172'99308918'494a4a20_u128}, - {Sign::POS, -152, 0xadb7c1cb'eaa72363'b1b71c7c'ca69a844_u128}, - {Sign::POS, -152, 0xb4aaa09c'47738304'd5579f97'0cf000a9_u128}, - {Sign::POS, -152, 0xbb9d7f6d'136dd063'e9cfdf6c'f676df42_u128}, - {Sign::POS, -152, 0xc2905e3e'4e960b8e'd4ddab9f'8032bbab_u128}, - {Sign::POS, -152, 0xc9833d0f'f8ec3493'7c3ed66a'b6f39fe9_u128}, - {Sign::POS, -152, 0xd0761be2'12704b7f'c5b134a5'bb25cf2e_u128}, - {Sign::POS, -152, 0xd768fab4'9b225061'96f29dc2'c0d26ca0_u128}, - {Sign::POS, -152, 0xde5bd987'93024346'd5c0ebcf'0fa0221e_u128}, - {Sign::POS, -152, 0xe54eb85a'fa10243d'67d9fb73'02d3c705_u128}, - {Sign::POS, -152, 0xec41972e'd04bf353'32fbabf2'095106f1_u128}, - {Sign::POS, -152, 0xf3347603'15b5b096'1ce3df2a'a59b0889_u128}, - {Sign::POS, -152, 0xfa2754d7'ca4d5c14'0b507996'6dd5143e_u128}, - {Sign::POS, -151, 0x808d19d6'77097aed'71ffb125'05e19d89_u128}, - {Sign::POS, -151, 0x84068941'40833efc'4657417a'9e657eae_u128}, - {Sign::POS, -151, 0x877ff8ac'4193fa3d'758de3f1'68f9f8c9_u128}, - {Sign::POS, -151, 0x8af96817'7a3bacb7'f2828ffc'57f43581_u128}, - {Sign::POS, -151, 0x8e72d782'ea7a5672'b0143e5b'e77b1053_u128}, - {Sign::POS, -151, 0x91ec46ee'924ff774'a121e91e'1d8769ef_u128}, - {Sign::POS, -151, 0x9565b65a'71bc8fc4'b88a8b9e'89e47b9c_u128}, - {Sign::POS, -151, 0x98df25c6'88c01f69'e92d2286'46302a9c_u128}, - {Sign::POS, -151, 0x9c589532'd75aa66b'25e8abcb'f5db5b8c_u128}, - {Sign::POS, -151, 0x9fd2049f'5d8c24cf'619c26b3'c62a45c8_u128}, - {Sign::POS, -151, 0xa34b740c'1b549a9d'8f2693cf'6e34c6cc_u128}, - {Sign::POS, -151, 0xa6c4e379'10b407dc'a166f4fe'2ee6b59a_u128}, - {Sign::POS, -151, 0xaa3e52e6'3daa6c93'8b3c4d6c'd3003616_u128}, - {Sign::POS, -151, 0xadb7c253'a237c8c9'3f85a195'af160c71_u128}, - {Sign::POS, -151, 0xb13131c1'3e5c1c84'b121f740'a191f084_u128}, - {Sign::POS, -151, 0xb4aaa12f'121767cc'd2f05583'12b2e136_u128}, - {Sign::POS, -151, 0xb824109d'1d69aaa8'97cfc4bf'f48d77de_u128}, - {Sign::POS, -151, 0xbb9d800b'6052e51e'f29f4ea7'c30c3ba5_u128}, - {Sign::POS, -151, 0xbf16ef79'dad31736'd63dfe38'83eff4e9_u128}, - {Sign::POS, -151, 0xc2905ee8'8cea40f7'358adfbd'c6d0009f_u128}, - {Sign::POS, -151, 0xc609ce57'76986267'036500d0'a51aa3b6_u128}, - {Sign::POS, -151, 0xc9833dc6'97dd7b8d'32ab7057'c2155e78_u128}, - {Sign::POS, -151, 0xccfcad35'f0b98c70'b63d3e87'4add3ff0_u128}, - {Sign::POS, -151, 0xd0761ca5'812c9518'80f97ce0'f6673948_u128}, - {Sign::POS, -151, 0xd3ef8c15'4936958b'85bf3e34'0580712d_u128}, - {Sign::POS, -151, 0xd768fb85'48d78dd0'b76d969d'42ce9734_u128}, - {Sign::POS, -151, 0xdae26af5'800f7def'08e39b87'02d0373a_u128}, - {Sign::POS, -151, 0xde5bda65'eede65ed'6d0063a9'23dd0cc6_u128}, + {Sign::NEG, -151, MType({0xe471a82bbedbe0ae, 0xe1d5464122cf95a4})}, + {Sign::NEG, -151, MType({0xaf6e93be8e4c1764, 0xde5bd6ec7f7bc110})}, + {Sign::NEG, -151, MType({0xe44848f0a5779499, 0xdae26797a490f80e})}, + {Sign::NEG, -151, MType({0x90205533f4e70566, 0xd768f842920f3a98})}, + {Sign::NEG, -151, MType({0xc01844ace3729e48, 0xd3ef88ed47f688a6})}, + {Sign::NEG, -151, MType({0x8151a2324e41c7c4, 0xd0761997c646e232})}, + {Sign::NEG, -151, MType({0xe0edf74d88cacafd, 0xccfcaa420d004734})}, + {Sign::NEG, -151, MType({0xec0ecc3a5cd27e58, 0xc9833aec1c22b7a6})}, + {Sign::NEG, -151, MType({0xafd5a7e70a6bf214, 0xc609cb95f3ae3381})}, + {Sign::NEG, -151, MType({0x39640ff447f81ceb, 0xc2905c3f93a2babe})}, + {Sign::NEG, -151, MType({0x95db88b5422588b1, 0xbf16ece8fc004d55})}, + {Sign::NEG, -151, MType({0xd25d952f9beffeec, 0xbb9d7d922cc6eb40})}, + {Sign::NEG, -151, MType({0xfc0bb71b6ea03578, 0xb8240e3b25f69478})}, + {Sign::NEG, -151, MType({0x20076ee349cb7b20, 0xb4aa9ee3e78f48f7})}, + {Sign::NEG, -151, MType({0x4b723ba43353643d, 0xb1312f8c719108b4})}, + {Sign::NEG, -151, MType({0x8b6d9b2da7657754, 0xadb7c034c3fbd3a9})}, + {Sign::NEG, -151, MType({0xed1b0a01987ad9b4, 0xaa3e50dcdecfa9cf})}, + {Sign::NEG, -151, MType({0x7d9c03546f57fc11, 0xa6c4e184c20c8b20})}, + {Sign::NEG, -151, MType({0x4a12010d0b0c4727, 0xa34b722c6db27794})}, + {Sign::NEG, -151, MType({0x5f9e7bc4c0f1c851, 0x9fd202d3e1c16f24})}, + {Sign::NEG, -151, MType({0xcb62eac75cacde29, 0x9c58937b1e3971c9})}, + {Sign::NEG, -151, MType({0x9a80c413202be52a, 0x98df2422231a7f7d})}, + {Sign::NEG, -151, MType({0xda197c58c3a6e445, 0x9565b4c8f0649838})}, + {Sign::NEG, -151, MType({0x974e86fb759f3988, 0x91ec456f8617bbf4})}, + {Sign::NEG, -151, MType({0xdf415610dadf46b3, 0x8e72d615e433eaa9})}, + {Sign::NEG, -151, MType({0xbf135a610e7a1ddc, 0x8af966bc0ab92451})}, + {Sign::NEG, -151, MType({0x43e60366a1cb2e09, 0x877ff761f9a768e5})}, + {Sign::NEG, -151, MType({0x7adabf4e9c75efce, 0x84068807b0feb85d})}, + {Sign::NEG, -151, MType({0x7112faf87c6591ee, 0x808d18ad30bf12b3})}, + {Sign::NEG, -152, MType({0x676043ec6b994be5, 0xfa2752a4f1d0efc0})}, + {Sign::NEG, -152, MType({0x9fa73d186649999d, 0xf33473ef12f5cfb9})}, + {Sign::NEG, -152, MType({0xa53db362aa5cc6f0, 0xec419538c4ecc544})}, + {Sign::NEG, -152, MType({0x9266761de5e05f13, 0xe54eb68207b5d053})}, + {Sign::NEG, -152, MType({0x81645201b36e17ba, 0xde5bd7cadb50f0d8})}, + {Sign::NEG, -152, MType({0x8c7a112a9a2b2a52, 0xd768f9133fbe26c5})}, + {Sign::NEG, -152, MType({0xcdea7b1a0dc7ad42, 0xd0761a5b34fd720c})}, + {Sign::NEG, -152, MType({0x5ff854b66e7ded1f, 0xc9833ba2bb0ed2a0})}, + {Sign::NEG, -152, MType({0x5ce6604b0911c5ed, 0xc2905ce9d1f24872})}, + {Sign::NEG, -152, MType({0xdef75d8816cffc59, 0xbb9d7e3079a7d374})}, + {Sign::NEG, -152, MType({0x6e0982bd8d96ef, 0xb4aa9f76b22f739a})}, + {Sign::NEG, -152, MType({0xdb8d1eb50fa7375c, 0xadb7c0bc7b8928d3})}, + {Sign::NEG, -152, MType({0x8a9754fe0c0073a7, 0xa6c4e201d5b4f314})}, + {Sign::NEG, -152, MType({0x27cf61a19e032f69, 0x9fd20346c0b2d24e})}, + {Sign::NEG, -152, MType({0xcd77f7489d9ef50b, 0x98df248b3c82c672})}, + {Sign::NEG, -152, MType({0x95d3c600cf484f03, 0x91ec45cf4924cf74})}, + {Sign::NEG, -152, MType({0x9b257b3ce3f82109, 0x8af96712e698ed45})}, + {Sign::NEG, -152, MType({0xf7afc1d4792b015a, 0x8406885614df1fd7})}, + {Sign::NEG, -153, MType({0x8b6a840831c123d8, 0xfa275331a7eece3b})}, + {Sign::NEG, -153, MType({0x3ef142da7335b35a, 0xec4195b647c38612})}, + {Sign::NEG, -153, MType({0x3e79062c7cbb3b7d, 0xde5bd83a093c6718})}, + {Sign::NEG, -153, MType({0xbe870ed4ed5b755b, 0xd0761abcec597131})}, + {Sign::NEG, -153, MType({0xf3a098743d20fb64, 0xc2905d3ef11aa442})}, + {Sign::NEG, -153, MType({0x124ad974bd15fbca, 0xb4aa9fc017800030})}, + {Sign::NEG, -153, MType({0x4f0b030a9742eb00, 0xa6c4e2405f8984dd})}, + {Sign::NEG, -153, MType({0xde664133cead362d, 0x98df24bfc937322e})}, + {Sign::NEG, -153, MType({0xf4e1bab83f55f5a1, 0x8af9673e54890808})}, + {Sign::NEG, -154, MType({0x8e0522533c713e98, 0xfa27537802fe0c9f})}, + {Sign::NEG, -154, MType({0x129bc1c6f293726e, 0xde5bd871a03259cf})}, + {Sign::NEG, -154, MType({0xe09182166eeb17eb, 0xc2905d6980aef768})}, + {Sign::NEG, -154, MType({0x60f08720313daa3f, 0xa6c4e25fa473e535})}, + {Sign::NEG, -154, MType({0xfcc2ea566b3af38b, 0x8af967540b8122fc})}, + {Sign::NEG, -155, MType({0x3a25757e00f4e3a0, 0xde5bd88d6bad6110})}, + {Sign::NEG, -155, MType({0x55d3f9e70cf177b8, 0xa6c4e26f46e91b3e})}, + {Sign::NEG, -156, MType({0x3d4aac85125398d0, 0xde5bd89b516ae82a})}, + {Sign::NEG, -157, MType({0x9ab5a849a06f400d, 0xde5bd8a24449ac95})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -157, MType({0xd3cc88fd4ef34c2, 0xde5bd8b02a073729})}, + {Sign::POS, -156, MType({0x225916c2b3f33c90, 0xde5bd8b71ce5fd51})}, + {Sign::POS, -155, MType({0x17847f98acf08d54, 0xa6c4e28e8bd3930a})}, + {Sign::POS, -155, MType({0x44397830931fddd, 0xde5bd8c502a38b5e})}, + {Sign::POS, -154, MType({0xc2ab385913176984, 0x8af9677f79717409})}, + {Sign::POS, -154, MType({0xe454dec82bde52e5, 0xa6c4e29e2e48d4cc})}, + {Sign::POS, -154, MType({0xfe1522b0470d7d7f, 0xc2905dbe9fd7e82f})}, + {Sign::POS, -154, MType({0xa6e2721f2afc3cce, 0xde5bd8e0ce1eae6a})}, + {Sign::POS, -154, MType({0x75b3458eec3c106c, 0xfa275404b91d27b4})}, + {Sign::POS, -153, MType({0x80bf0ff2f6cd9f93, 0x8af967953069aa22})}, + {Sign::POS, -153, MType({0xf09cc73b7013b906, 0x98df2528e2a09a29})}, + {Sign::POS, -153, MType({0x55ee1480619827c4, 0xa6c4e2bd7333640c})}, + {Sign::POS, -153, MType({0x7c2e48d772250b3c, 0xb4aaa052e22207e5})}, + {Sign::POS, -153, MType({0x2ed8ba8c6fa81c98, 0xc2905de92f6c85d1})}, + {Sign::POS, -153, MType({0x3968c5214f33fc4f, 0xd0761b805b12ddeb})}, + {Sign::POS, -153, MType({0x6759c94e2d017fad, 0xde5bd9186515104f})}, + {Sign::POS, -153, MType({0x84272d014c70fe58, 0xec4196b14d731d19})}, + {Sign::POS, -153, MType({0x5b4c5b5f180b9fe1, 0xfa27544b142d0465})}, + {Sign::POS, -152, MType({0x5c22626110c254a4, 0x840688f2dca16327})}, + {Sign::POS, -152, MType({0xb345ef5d90dd6545, 0x8af967c09e5a3178})}, + {Sign::POS, -152, MType({0x98ce92087c5cb614, 0x91ec468ecf40ed34})}, + {Sign::POS, -152, MType({0xf27a0a6056dcfe57, 0x98df255d6f559668})}, + {Sign::POS, -152, MType({0xa6061afeb7929f24, 0x9fd2042c7e982d23})}, + {Sign::POS, -152, MType({0x99308918494a4a20, 0xa6c4e2fbfd08b172})}, + {Sign::POS, -152, MType({0xb1b71c7cca69a844, 0xadb7c1cbeaa72363})}, + {Sign::POS, -152, MType({0xd5579f970cf000a9, 0xb4aaa09c47738304})}, + {Sign::POS, -152, MType({0xe9cfdf6cf676df42, 0xbb9d7f6d136dd063})}, + {Sign::POS, -152, MType({0xd4ddab9f8032bbab, 0xc2905e3e4e960b8e})}, + {Sign::POS, -152, MType({0x7c3ed66ab6f39fe9, 0xc9833d0ff8ec3493})}, + {Sign::POS, -152, MType({0xc5b134a5bb25cf2e, 0xd0761be212704b7f})}, + {Sign::POS, -152, MType({0x96f29dc2c0d26ca0, 0xd768fab49b225061})}, + {Sign::POS, -152, MType({0xd5c0ebcf0fa0221e, 0xde5bd98793024346})}, + {Sign::POS, -152, MType({0x67d9fb7302d3c705, 0xe54eb85afa10243d})}, + {Sign::POS, -152, MType({0x32fbabf2095106f1, 0xec41972ed04bf353})}, + {Sign::POS, -152, MType({0x1ce3df2aa59b0889, 0xf334760315b5b096})}, + {Sign::POS, -152, MType({0xb5079966dd5143e, 0xfa2754d7ca4d5c14})}, + {Sign::POS, -151, MType({0x71ffb12505e19d89, 0x808d19d677097aed})}, + {Sign::POS, -151, MType({0x4657417a9e657eae, 0x8406894140833efc})}, + {Sign::POS, -151, MType({0x758de3f168f9f8c9, 0x877ff8ac4193fa3d})}, + {Sign::POS, -151, MType({0xf2828ffc57f43581, 0x8af968177a3bacb7})}, + {Sign::POS, -151, MType({0xb0143e5be77b1053, 0x8e72d782ea7a5672})}, + {Sign::POS, -151, MType({0xa121e91e1d8769ef, 0x91ec46ee924ff774})}, + {Sign::POS, -151, MType({0xb88a8b9e89e47b9c, 0x9565b65a71bc8fc4})}, + {Sign::POS, -151, MType({0xe92d228646302a9c, 0x98df25c688c01f69})}, + {Sign::POS, -151, MType({0x25e8abcbf5db5b8c, 0x9c589532d75aa66b})}, + {Sign::POS, -151, MType({0x619c26b3c62a45c8, 0x9fd2049f5d8c24cf})}, + {Sign::POS, -151, MType({0x8f2693cf6e34c6cc, 0xa34b740c1b549a9d})}, + {Sign::POS, -151, MType({0xa166f4fe2ee6b59a, 0xa6c4e37910b407dc})}, + {Sign::POS, -151, MType({0x8b3c4d6cd3003616, 0xaa3e52e63daa6c93})}, + {Sign::POS, -151, MType({0x3f85a195af160c71, 0xadb7c253a237c8c9})}, + {Sign::POS, -151, MType({0xb121f740a191f084, 0xb13131c13e5c1c84})}, + {Sign::POS, -151, MType({0xd2f0558312b2e136, 0xb4aaa12f121767cc})}, + {Sign::POS, -151, MType({0x97cfc4bff48d77de, 0xb824109d1d69aaa8})}, + {Sign::POS, -151, MType({0xf29f4ea7c30c3ba5, 0xbb9d800b6052e51e})}, + {Sign::POS, -151, MType({0xd63dfe3883eff4e9, 0xbf16ef79dad31736})}, + {Sign::POS, -151, MType({0x358adfbdc6d0009f, 0xc2905ee88cea40f7})}, + {Sign::POS, -151, MType({0x36500d0a51aa3b6, 0xc609ce5776986267})}, + {Sign::POS, -151, MType({0x32ab7057c2155e78, 0xc9833dc697dd7b8d})}, + {Sign::POS, -151, MType({0xb63d3e874add3ff0, 0xccfcad35f0b98c70})}, + {Sign::POS, -151, MType({0x80f97ce0f6673948, 0xd0761ca5812c9518})}, + {Sign::POS, -151, MType({0x85bf3e340580712d, 0xd3ef8c154936958b})}, + {Sign::POS, -151, MType({0xb76d969d42ce9734, 0xd768fb8548d78dd0})}, + {Sign::POS, -151, MType({0x8e39b8702d0373a, 0xdae26af5800f7def})}, + {Sign::POS, -151, MType({0x6d0063a923dd0cc6, 0xde5bda65eede65ed})}, }}; // > P = fpminimax(log10(1 + x)/x, 3, [|128...|], [-0x1.0002143p-29 , 0x1p-29]); @@ -706,10 +702,10 @@ const LogRR LOG10_TABLE = { // > dirtyinfnorm(log10(1 + x)/x - P, [-0x1.0002143p-29 , 0x1p-29]); // 0x1.64fb8...p-123 const Float128 BIG_COEFFS[4]{ - {Sign::NEG, -131, 0xde5bd8a9'373f89a7'6903c4ce'1582517d_u128}, - {Sign::POS, -130, 0x943d3b1b'7a1af679'b8a21791'624e2e8a_u128}, - {Sign::NEG, -130, 0xde5bd8a9'37287195'355baaaf'abc25990_u128}, - {Sign::POS, -129, 0xde5bd8a9'37287195'355baaaf'ad33dbd9_u128}, + {Sign::NEG, -131, MType({0x6903c4ce1582517d, 0xde5bd8a9373f89a7})}, + {Sign::POS, -130, MType({0xb8a21791624e2e8a, 0x943d3b1b7a1af679})}, + {Sign::NEG, -130, MType({0x355baaafabc25990, 0xde5bd8a937287195})}, + {Sign::POS, -129, MType({0x355baaafad33dbd9, 0xde5bd8a937287195})}, }; // Reuse the output of the fast pass range reduction. diff --git a/libc/src/math/generic/log1p.cpp b/libc/src/math/generic/log1p.cpp index 12710cfe0de21a..0edab70124c955 100644 --- a/libc/src/math/generic/log1p.cpp +++ b/libc/src/math/generic/log1p.cpp @@ -14,7 +14,6 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" -#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "common_constants.h" @@ -23,24 +22,19 @@ namespace LIBC_NAMESPACE { // 128-bit precision dyadic floating point numbers. using Float128 = typename fputil::DyadicFloat<128>; +using MType = typename Float128::MantissaType; using Sign = fputil::Sign; -using LIBC_NAMESPACE::operator""_u128; namespace { // Extra errors from P is from using x^2 to reduce evaluation latency. constexpr double P_ERR = 0x1.0p-50; -// log(2) with 128-bit precision generated by SageMath with: -// def format_hex(value): -// l = hex(value)[2:] -// n = 8 -// x = [l[i:i + n] for i in range(0, len(l), n)] -// return "0x" + "'".join(x) + "_uint128" -// (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent(); -// print(format_hex(m)); +// log(2) with 128-bit prepcision generated by SageMath with: +// sage: (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent(); +// sage: print("MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})"); const Float128 LOG_2(Sign::POS, /*exponent=*/-128, /*mantissa=*/ - 0xb17217f7'd1cf79ab'c9e3b398'03f2f6af_u128); + MType({0xc9e3b39803f2f6af, 0xb17217f7d1cf79ab})); // R1[i] = 2^-8 * nearestint( 2^8 / (1 + i * 2^-7) ) constexpr double R1[129] = { @@ -251,137 +245,139 @@ constexpr double P_COEFFS[6] = {-0x1p-1, // for i in range(129): // r = 2^-8 * round( 2^8 / (1 + i*2^(-7)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); -// print("{Sign::POS,", e, ", format_hex(m), "},"); +// print("{Sign::POS,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) % +// 2^64), +// "})},"); const Float128 LOG_R1[129] = { - {Sign::POS, 0, 0_u128}, - {Sign::POS, -134, 0x8080abac'46f38946'662d417c'ed007a46_u128}, - {Sign::POS, -133, 0x8102b2c4'9ac23a4f'91d082dc'e3ddcd38_u128}, - {Sign::POS, -133, 0xc2492946'4655f45c'da5f3cc0'b3251dbd_u128}, - {Sign::POS, -132, 0x820aec4f'3a222380'b9e3aea6'c444ef07_u128}, - {Sign::POS, -132, 0xa33576a1'6f1f4c64'521016bd'904dc968_u128}, - {Sign::POS, -132, 0xb3e4a796'a5dac208'27cca0bc'c06c2f92_u128}, - {Sign::POS, -132, 0xd5779687'd887e0d1'a9dda170'56e45ed5_u128}, - {Sign::POS, -132, 0xf7518e00'35c3dd83'606d8909'3278a939_u128}, - {Sign::POS, -131, 0x8cb9de8a'32ab368a'a7c98595'30a45153_u128}, - {Sign::POS, -131, 0x9defad3e'8f73217a'976d3b5b'45f6ca0b_u128}, - {Sign::POS, -131, 0xa6988ae9'03f562ed'3e858f08'597b3a69_u128}, - {Sign::POS, -131, 0xb8069857'560707a3'6a677b4c'8bec22e1_u128}, - {Sign::POS, -131, 0xc99af2ea'ca4c4570'eaf51f66'692844ba_u128}, - {Sign::POS, -131, 0xd273b205'8de1bd49'46bbf837'b4d320c6_u128}, - {Sign::POS, -131, 0xe442c00d'e2591b47'196ab34c'e0bccd12_u128}, - {Sign::POS, -131, 0xed393b1c'22351280'3f4e2e66'0317d55f_u128}, - {Sign::POS, -131, 0xff4489ce'deab2ca6'c17bd40d'8d9291ec_u128}, - {Sign::POS, -130, 0x88bc7411'3f23def1'9c5a0fe3'96f40f1e_u128}, - {Sign::POS, -130, 0x8d515bf1'1fb94f1c'88713268'840cbcc0_u128}, - {Sign::POS, -130, 0x968b0864'3409ceb6'65c0da50'6a088484_u128}, - {Sign::POS, -130, 0x9b2fe580'ac80b17d'411a5b94'4aca8708_u128}, - {Sign::POS, -130, 0xa489ec19'9dab06f2'a9fb6cf0'ecb411b7_u128}, - {Sign::POS, -130, 0xa93f2f25'0dac67d1'cad2fb8d'48054ae0_u128}, - {Sign::POS, -130, 0xadfa035a'a1ed8fdc'149767e4'10316d2c_u128}, - {Sign::POS, -130, 0xb780945b'ab55dce4'34c7bc3d'32750fde_u128}, - {Sign::POS, -130, 0xbc4c6c2a'226399ef'8f6ebcfb'2016a439_u128}, - {Sign::POS, -130, 0xc5f57f59'c7f46155'aa8b6997'a402bf30_u128}, - {Sign::POS, -130, 0xcad2d6e7'b80bf914'2c507fb7'a3d0bf6a_u128}, - {Sign::POS, -130, 0xcfb62038'44b3209a'd0cb02f3'3f79c16c_u128}, - {Sign::POS, -130, 0xd98ec2ba'de71e539'58a98f2a'd65bee9b_u128}, - {Sign::POS, -130, 0xde8439c1'dec56877'4d57da94'5b5d0aaa_u128}, - {Sign::POS, -130, 0xe37fde37'807b84e3'4e9a750b'6b68781d_u128}, - {Sign::POS, -130, 0xe881bf93'2af3dac0'c524848e'3443e040_u128}, - {Sign::POS, -130, 0xf29877ff'38809091'3b020fa1'820c9492_u128}, - {Sign::POS, -130, 0xf7ad6f26'e7ff2ef7'54d2238f'75f969b1_u128}, - {Sign::POS, -130, 0xfcc8e365'9d9bcbec'ca0cdf30'1431b60f_u128}, - {Sign::POS, -129, 0x80f572b1'363487b9'f5bd0b5b'3479d5f4_u128}, - {Sign::POS, -129, 0x86216b3b'0b17188b'163ceae8'8f720f1e_u128}, - {Sign::POS, -129, 0x88bc7411'3f23def1'9c5a0fe3'96f40f1e_u128}, - {Sign::POS, -129, 0x8b5ae65d'67db9acd'f7a51681'26a58b9a_u128}, - {Sign::POS, -129, 0x8dfccb1a'd35ca6ed'5147bdb6'ddcaf59c_u128}, - {Sign::POS, -129, 0x90a22b68'75c6a1f7'ae91aeba'609c8877_u128}, - {Sign::POS, -129, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, - {Sign::POS, -129, 0x95f783e6'e49a9cfa'4a5004f3'ef063313_u128}, - {Sign::POS, -129, 0x9b5b3bb5'f088b766'd878bbe3'd392be25_u128}, - {Sign::POS, -129, 0x9e1293b9'998c1daa'5b035eae'273a855f_u128}, - {Sign::POS, -129, 0xa0cda11e'af46390d'bb243827'3918db7e_u128}, - {Sign::POS, -129, 0xa38c6e13'8e20d831'f698298a'dddd7f32_u128}, - {Sign::POS, -129, 0xa64f04f0'b961df76'e4f5275c'2d15c21f_u128}, - {Sign::POS, -129, 0xa9157039'c51ebe70'8164c759'686a2209_u128}, - {Sign::POS, -129, 0xabdfba9e'468fd6f6'f72ea077'49ce6bd3_u128}, - {Sign::POS, -129, 0xaeadeefa'caf97d35'7dd6e688'ebb13b03_u128}, - {Sign::POS, -129, 0xb1801859'd56249dc'18ce51ff'f99479cd_u128}, - {Sign::POS, -129, 0xb45641f4'e350a0d3'2756eba0'0bc33978_u128}, - {Sign::POS, -129, 0xb7307735'78cb90b2'be1116c3'466beb6d_u128}, - {Sign::POS, -129, 0xba0ec3b6'33dd8b09'49dc60b2'b059a60b_u128}, - {Sign::POS, -129, 0xbcf13343'e7d9ec7d'2efd1778'1bb3afec_u128}, - {Sign::POS, -129, 0xbfd7d1de'c0a8df6f'37eda996'244bccb0_u128}, - {Sign::POS, -129, 0xc2c2abbb'6e5fd56f'33337789'd592e296_u128}, - {Sign::POS, -129, 0xc5b1cd44'596fa51e'1a18fb8f'9f9ef280_u128}, - {Sign::POS, -129, 0xc8a5431a'dfb44ca5'688ce7c1'a75e341a_u128}, - {Sign::POS, -129, 0xcb9d1a18'9ab56e76'2d7e9307'c70c0668_u128}, - {Sign::POS, -129, 0xcb9d1a18'9ab56e76'2d7e9307'c70c0668_u128}, - {Sign::POS, -129, 0xce995f50'af69d861'ef2f3f4f'861ad6a9_u128}, - {Sign::POS, -129, 0xd19a2011'27d3c645'7f9d79f5'1dcc7301_u128}, - {Sign::POS, -129, 0xd49f69e4'56cf1b79'5f53bd2e'406e66e7_u128}, - {Sign::POS, -129, 0xd7a94a92'466e833a'ad88bba7'd0cee8e0_u128}, - {Sign::POS, -129, 0xdab7d022'31484a92'96c20cca'6efe2ac5_u128}, - {Sign::POS, -129, 0xddcb08dc'0717d85b'f40a666c'87842843_u128}, - {Sign::POS, -129, 0xe0e30349'fd1cec80'7fe8e180'2aba24d6_u128}, - {Sign::POS, -129, 0xe0e30349'fd1cec80'7fe8e180'2aba24d6_u128}, - {Sign::POS, -129, 0xe3ffce3a'2aa64922'3eadb651'b49ac53a_u128}, - {Sign::POS, -129, 0xe72178c0'323a1a0f'304e1653'e71d9973_u128}, - {Sign::POS, -129, 0xea481236'f7d35baf'e9a767a8'0d6d97e8_u128}, - {Sign::POS, -129, 0xed73aa42'64b0ade9'4f91cf4b'33e42998_u128}, - {Sign::POS, -129, 0xed73aa42'64b0ade9'4f91cf4b'33e42998_u128}, - {Sign::POS, -129, 0xf0a450d1'39366ca6'fc66eb64'08ff6433_u128}, - {Sign::POS, -129, 0xf3da161e'ed6b9aaf'ac8d42f7'8d3e65d3_u128}, - {Sign::POS, -129, 0xf7150ab5'a09f27f4'5a470250'd40ebe90_u128}, - {Sign::POS, -129, 0xf7150ab5'a09f27f4'5a470250'd40ebe90_u128}, - {Sign::POS, -129, 0xfa553f70'18c966f2'b780a545'a1b54dcf_u128}, - {Sign::POS, -129, 0xfd9ac57b'd244217e'8f05924d'258c14c5_u128}, - {Sign::POS, -128, 0x8072d72d'903d588b'89d1b09c'70c4010a_u128}, - {Sign::POS, -128, 0x8072d72d'903d588b'89d1b09c'70c4010a_u128}, - {Sign::POS, -128, 0x821b05f3'b01d6774'030d58c3'f7e2ea1f_u128}, - {Sign::POS, -128, 0x83c5f829'9e2b4091'20f6fafe'8fbb68b9_u128}, - {Sign::POS, -128, 0x8573b716'82a7d21a'e21f9f89'c1ab80b2_u128}, - {Sign::POS, -128, 0x8573b716'82a7d21a'e21f9f89'c1ab80b2_u128}, - {Sign::POS, -128, 0x87244c30'8e670a66'01e005d0'6dbfa8f8_u128}, - {Sign::POS, -128, 0x88d7c11e'3ad53cdc'223111a7'07b6de2c_u128}, - {Sign::POS, -128, 0x88d7c11e'3ad53cdc'223111a7'07b6de2c_u128}, - {Sign::POS, -128, 0x8a8e1fb7'94b09134'2eb628db'a173c82d_u128}, - {Sign::POS, -128, 0x8c477207'91e53313'be2ad194'15fe25a5_u128}, - {Sign::POS, -128, 0x8c477207'91e53313'be2ad194'15fe25a5_u128}, - {Sign::POS, -128, 0x8e03c24d'73003959'bddae1cc'ce247838_u128}, - {Sign::POS, -128, 0x8fc31afe'30b2c6de'9b00bf16'7e95da67_u128}, - {Sign::POS, -128, 0x8fc31afe'30b2c6de'9b00bf16'7e95da67_u128}, - {Sign::POS, -128, 0x918586c5'f5e4bf01'9b92199e'd1a4bab1_u128}, - {Sign::POS, -128, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, - {Sign::POS, -128, 0x934b1089'a6dc93c1'df5bb3b6'0554e152_u128}, - {Sign::POS, -128, 0x9513c368'76083695'f3cbc416'a2418012_u128}, - {Sign::POS, -128, 0x96dfaabd'86fa1646'be1188fb'c94e2f15_u128}, - {Sign::POS, -128, 0x96dfaabd'86fa1646'be1188fb'c94e2f15_u128}, - {Sign::POS, -128, 0x98aed221'a03458b6'1d2f8932'1647b358_u128}, - {Sign::POS, -128, 0x98aed221'a03458b6'1d2f8932'1647b358_u128}, - {Sign::POS, -128, 0x9a81456c'ec642e0f'e549f9aa'ea3cb5e1_u128}, - {Sign::POS, -128, 0x9c5710b8'cbb73a42'a2554b2d'd4619e63_u128}, - {Sign::POS, -128, 0x9c5710b8'cbb73a42'a2554b2d'd4619e63_u128}, - {Sign::POS, -128, 0x9e304061'b5fda919'30603d87'b6df81ad_u128}, - {Sign::POS, -128, 0x9e304061'b5fda919'30603d87'b6df81ad_u128}, - {Sign::POS, -128, 0xa00ce109'2e5498c3'67879c5a'30cd1242_u128}, - {Sign::POS, -128, 0xa00ce109'2e5498c3'67879c5a'30cd1242_u128}, - {Sign::POS, -128, 0xa1ecff97'c91e267b'0b7efae0'8e597e16_u128}, - {Sign::POS, -128, 0xa3d0a93f'45169a4a'83594fab'088c0d65_u128}, - {Sign::POS, -128, 0xa3d0a93f'45169a4a'83594fab'088c0d65_u128}, - {Sign::POS, -128, 0xa5b7eb7c'b860fb88'af6a62a0'dec6e073_u128}, - {Sign::POS, -128, 0xa5b7eb7c'b860fb88'af6a62a0'dec6e073_u128}, - {Sign::POS, -128, 0xa7a2d41a'd270c9d7'49362382'a768847a_u128}, - {Sign::POS, -128, 0xa7a2d41a'd270c9d7'49362382'a768847a_u128}, - {Sign::POS, -128, 0xa9917134'33c2b998'8ba4aea6'14d05701_u128}, - {Sign::POS, -128, 0xa9917134'33c2b998'8ba4aea6'14d05701_u128}, - {Sign::POS, -128, 0xab83d135'dc633301'7fe6607b'a902ef3c_u128}, - {Sign::POS, -128, 0xab83d135'dc633301'7fe6607b'a902ef3c_u128}, - {Sign::POS, -128, 0xad7a02e1'b24efd31'd60864fd'949b4bd3_u128}, - {Sign::POS, -128, 0xad7a02e1'b24efd31'd60864fd'949b4bd3_u128}, - {Sign::POS, -128, 0xaf741551'20c9011c'066d235e'e63073dd_u128}, - {Sign::POS, -128, 0xaf741551'20c9011c'066d235e'e63073dd_u128}, - {Sign::POS, 0, 0_u128}, + {Sign::POS, 0, MType(0)}, + {Sign::POS, -134, MType({0x662d417ced007a46, 0x8080abac46f38946})}, + {Sign::POS, -133, MType({0x91d082dce3ddcd38, 0x8102b2c49ac23a4f})}, + {Sign::POS, -133, MType({0xda5f3cc0b3251dbd, 0xc24929464655f45c})}, + {Sign::POS, -132, MType({0xb9e3aea6c444ef07, 0x820aec4f3a222380})}, + {Sign::POS, -132, MType({0x521016bd904dc968, 0xa33576a16f1f4c64})}, + {Sign::POS, -132, MType({0x27cca0bcc06c2f92, 0xb3e4a796a5dac208})}, + {Sign::POS, -132, MType({0xa9dda17056e45ed5, 0xd5779687d887e0d1})}, + {Sign::POS, -132, MType({0x606d89093278a939, 0xf7518e0035c3dd83})}, + {Sign::POS, -131, MType({0xa7c9859530a45153, 0x8cb9de8a32ab368a})}, + {Sign::POS, -131, MType({0x976d3b5b45f6ca0b, 0x9defad3e8f73217a})}, + {Sign::POS, -131, MType({0x3e858f08597b3a69, 0xa6988ae903f562ed})}, + {Sign::POS, -131, MType({0x6a677b4c8bec22e1, 0xb8069857560707a3})}, + {Sign::POS, -131, MType({0xeaf51f66692844ba, 0xc99af2eaca4c4570})}, + {Sign::POS, -131, MType({0x46bbf837b4d320c6, 0xd273b2058de1bd49})}, + {Sign::POS, -131, MType({0x196ab34ce0bccd12, 0xe442c00de2591b47})}, + {Sign::POS, -131, MType({0x3f4e2e660317d55f, 0xed393b1c22351280})}, + {Sign::POS, -131, MType({0xc17bd40d8d9291ec, 0xff4489cedeab2ca6})}, + {Sign::POS, -130, MType({0x9c5a0fe396f40f1e, 0x88bc74113f23def1})}, + {Sign::POS, -130, MType({0x88713268840cbcc0, 0x8d515bf11fb94f1c})}, + {Sign::POS, -130, MType({0x65c0da506a088484, 0x968b08643409ceb6})}, + {Sign::POS, -130, MType({0x411a5b944aca8708, 0x9b2fe580ac80b17d})}, + {Sign::POS, -130, MType({0xa9fb6cf0ecb411b7, 0xa489ec199dab06f2})}, + {Sign::POS, -130, MType({0xcad2fb8d48054ae0, 0xa93f2f250dac67d1})}, + {Sign::POS, -130, MType({0x149767e410316d2c, 0xadfa035aa1ed8fdc})}, + {Sign::POS, -130, MType({0x34c7bc3d32750fde, 0xb780945bab55dce4})}, + {Sign::POS, -130, MType({0x8f6ebcfb2016a439, 0xbc4c6c2a226399ef})}, + {Sign::POS, -130, MType({0xaa8b6997a402bf30, 0xc5f57f59c7f46155})}, + {Sign::POS, -130, MType({0x2c507fb7a3d0bf6a, 0xcad2d6e7b80bf914})}, + {Sign::POS, -130, MType({0xd0cb02f33f79c16c, 0xcfb6203844b3209a})}, + {Sign::POS, -130, MType({0x58a98f2ad65bee9b, 0xd98ec2bade71e539})}, + {Sign::POS, -130, MType({0x4d57da945b5d0aaa, 0xde8439c1dec56877})}, + {Sign::POS, -130, MType({0x4e9a750b6b68781d, 0xe37fde37807b84e3})}, + {Sign::POS, -130, MType({0xc524848e3443e040, 0xe881bf932af3dac0})}, + {Sign::POS, -130, MType({0x3b020fa1820c9492, 0xf29877ff38809091})}, + {Sign::POS, -130, MType({0x54d2238f75f969b1, 0xf7ad6f26e7ff2ef7})}, + {Sign::POS, -130, MType({0xca0cdf301431b60f, 0xfcc8e3659d9bcbec})}, + {Sign::POS, -129, MType({0xf5bd0b5b3479d5f4, 0x80f572b1363487b9})}, + {Sign::POS, -129, MType({0x163ceae88f720f1e, 0x86216b3b0b17188b})}, + {Sign::POS, -129, MType({0x9c5a0fe396f40f1e, 0x88bc74113f23def1})}, + {Sign::POS, -129, MType({0xf7a5168126a58b9a, 0x8b5ae65d67db9acd})}, + {Sign::POS, -129, MType({0x5147bdb6ddcaf59c, 0x8dfccb1ad35ca6ed})}, + {Sign::POS, -129, MType({0xae91aeba609c8877, 0x90a22b6875c6a1f7})}, + {Sign::POS, -129, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, + {Sign::POS, -129, MType({0x4a5004f3ef063313, 0x95f783e6e49a9cfa})}, + {Sign::POS, -129, MType({0xd878bbe3d392be25, 0x9b5b3bb5f088b766})}, + {Sign::POS, -129, MType({0x5b035eae273a855f, 0x9e1293b9998c1daa})}, + {Sign::POS, -129, MType({0xbb2438273918db7e, 0xa0cda11eaf46390d})}, + {Sign::POS, -129, MType({0xf698298adddd7f32, 0xa38c6e138e20d831})}, + {Sign::POS, -129, MType({0xe4f5275c2d15c21f, 0xa64f04f0b961df76})}, + {Sign::POS, -129, MType({0x8164c759686a2209, 0xa9157039c51ebe70})}, + {Sign::POS, -129, MType({0xf72ea07749ce6bd3, 0xabdfba9e468fd6f6})}, + {Sign::POS, -129, MType({0x7dd6e688ebb13b03, 0xaeadeefacaf97d35})}, + {Sign::POS, -129, MType({0x18ce51fff99479cd, 0xb1801859d56249dc})}, + {Sign::POS, -129, MType({0x2756eba00bc33978, 0xb45641f4e350a0d3})}, + {Sign::POS, -129, MType({0xbe1116c3466beb6d, 0xb730773578cb90b2})}, + {Sign::POS, -129, MType({0x49dc60b2b059a60b, 0xba0ec3b633dd8b09})}, + {Sign::POS, -129, MType({0x2efd17781bb3afec, 0xbcf13343e7d9ec7d})}, + {Sign::POS, -129, MType({0x37eda996244bccb0, 0xbfd7d1dec0a8df6f})}, + {Sign::POS, -129, MType({0x33337789d592e296, 0xc2c2abbb6e5fd56f})}, + {Sign::POS, -129, MType({0x1a18fb8f9f9ef280, 0xc5b1cd44596fa51e})}, + {Sign::POS, -129, MType({0x688ce7c1a75e341a, 0xc8a5431adfb44ca5})}, + {Sign::POS, -129, MType({0x2d7e9307c70c0668, 0xcb9d1a189ab56e76})}, + {Sign::POS, -129, MType({0x2d7e9307c70c0668, 0xcb9d1a189ab56e76})}, + {Sign::POS, -129, MType({0xef2f3f4f861ad6a9, 0xce995f50af69d861})}, + {Sign::POS, -129, MType({0x7f9d79f51dcc7301, 0xd19a201127d3c645})}, + {Sign::POS, -129, MType({0x5f53bd2e406e66e7, 0xd49f69e456cf1b79})}, + {Sign::POS, -129, MType({0xad88bba7d0cee8e0, 0xd7a94a92466e833a})}, + {Sign::POS, -129, MType({0x96c20cca6efe2ac5, 0xdab7d02231484a92})}, + {Sign::POS, -129, MType({0xf40a666c87842843, 0xddcb08dc0717d85b})}, + {Sign::POS, -129, MType({0x7fe8e1802aba24d6, 0xe0e30349fd1cec80})}, + {Sign::POS, -129, MType({0x7fe8e1802aba24d6, 0xe0e30349fd1cec80})}, + {Sign::POS, -129, MType({0x3eadb651b49ac53a, 0xe3ffce3a2aa64922})}, + {Sign::POS, -129, MType({0x304e1653e71d9973, 0xe72178c0323a1a0f})}, + {Sign::POS, -129, MType({0xe9a767a80d6d97e8, 0xea481236f7d35baf})}, + {Sign::POS, -129, MType({0x4f91cf4b33e42998, 0xed73aa4264b0ade9})}, + {Sign::POS, -129, MType({0x4f91cf4b33e42998, 0xed73aa4264b0ade9})}, + {Sign::POS, -129, MType({0xfc66eb6408ff6433, 0xf0a450d139366ca6})}, + {Sign::POS, -129, MType({0xac8d42f78d3e65d3, 0xf3da161eed6b9aaf})}, + {Sign::POS, -129, MType({0x5a470250d40ebe90, 0xf7150ab5a09f27f4})}, + {Sign::POS, -129, MType({0x5a470250d40ebe90, 0xf7150ab5a09f27f4})}, + {Sign::POS, -129, MType({0xb780a545a1b54dcf, 0xfa553f7018c966f2})}, + {Sign::POS, -129, MType({0x8f05924d258c14c5, 0xfd9ac57bd244217e})}, + {Sign::POS, -128, MType({0x89d1b09c70c4010a, 0x8072d72d903d588b})}, + {Sign::POS, -128, MType({0x89d1b09c70c4010a, 0x8072d72d903d588b})}, + {Sign::POS, -128, MType({0x30d58c3f7e2ea1f, 0x821b05f3b01d6774})}, + {Sign::POS, -128, MType({0x20f6fafe8fbb68b9, 0x83c5f8299e2b4091})}, + {Sign::POS, -128, MType({0xe21f9f89c1ab80b2, 0x8573b71682a7d21a})}, + {Sign::POS, -128, MType({0xe21f9f89c1ab80b2, 0x8573b71682a7d21a})}, + {Sign::POS, -128, MType({0x1e005d06dbfa8f8, 0x87244c308e670a66})}, + {Sign::POS, -128, MType({0x223111a707b6de2c, 0x88d7c11e3ad53cdc})}, + {Sign::POS, -128, MType({0x223111a707b6de2c, 0x88d7c11e3ad53cdc})}, + {Sign::POS, -128, MType({0x2eb628dba173c82d, 0x8a8e1fb794b09134})}, + {Sign::POS, -128, MType({0xbe2ad19415fe25a5, 0x8c47720791e53313})}, + {Sign::POS, -128, MType({0xbe2ad19415fe25a5, 0x8c47720791e53313})}, + {Sign::POS, -128, MType({0xbddae1ccce247838, 0x8e03c24d73003959})}, + {Sign::POS, -128, MType({0x9b00bf167e95da67, 0x8fc31afe30b2c6de})}, + {Sign::POS, -128, MType({0x9b00bf167e95da67, 0x8fc31afe30b2c6de})}, + {Sign::POS, -128, MType({0x9b92199ed1a4bab1, 0x918586c5f5e4bf01})}, + {Sign::POS, -128, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, + {Sign::POS, -128, MType({0xdf5bb3b60554e152, 0x934b1089a6dc93c1})}, + {Sign::POS, -128, MType({0xf3cbc416a2418012, 0x9513c36876083695})}, + {Sign::POS, -128, MType({0xbe1188fbc94e2f15, 0x96dfaabd86fa1646})}, + {Sign::POS, -128, MType({0xbe1188fbc94e2f15, 0x96dfaabd86fa1646})}, + {Sign::POS, -128, MType({0x1d2f89321647b358, 0x98aed221a03458b6})}, + {Sign::POS, -128, MType({0x1d2f89321647b358, 0x98aed221a03458b6})}, + {Sign::POS, -128, MType({0xe549f9aaea3cb5e1, 0x9a81456cec642e0f})}, + {Sign::POS, -128, MType({0xa2554b2dd4619e63, 0x9c5710b8cbb73a42})}, + {Sign::POS, -128, MType({0xa2554b2dd4619e63, 0x9c5710b8cbb73a42})}, + {Sign::POS, -128, MType({0x30603d87b6df81ad, 0x9e304061b5fda919})}, + {Sign::POS, -128, MType({0x30603d87b6df81ad, 0x9e304061b5fda919})}, + {Sign::POS, -128, MType({0x67879c5a30cd1242, 0xa00ce1092e5498c3})}, + {Sign::POS, -128, MType({0x67879c5a30cd1242, 0xa00ce1092e5498c3})}, + {Sign::POS, -128, MType({0xb7efae08e597e16, 0xa1ecff97c91e267b})}, + {Sign::POS, -128, MType({0x83594fab088c0d65, 0xa3d0a93f45169a4a})}, + {Sign::POS, -128, MType({0x83594fab088c0d65, 0xa3d0a93f45169a4a})}, + {Sign::POS, -128, MType({0xaf6a62a0dec6e073, 0xa5b7eb7cb860fb88})}, + {Sign::POS, -128, MType({0xaf6a62a0dec6e073, 0xa5b7eb7cb860fb88})}, + {Sign::POS, -128, MType({0x49362382a768847a, 0xa7a2d41ad270c9d7})}, + {Sign::POS, -128, MType({0x49362382a768847a, 0xa7a2d41ad270c9d7})}, + {Sign::POS, -128, MType({0x8ba4aea614d05701, 0xa991713433c2b998})}, + {Sign::POS, -128, MType({0x8ba4aea614d05701, 0xa991713433c2b998})}, + {Sign::POS, -128, MType({0x7fe6607ba902ef3c, 0xab83d135dc633301})}, + {Sign::POS, -128, MType({0x7fe6607ba902ef3c, 0xab83d135dc633301})}, + {Sign::POS, -128, MType({0xd60864fd949b4bd3, 0xad7a02e1b24efd31})}, + {Sign::POS, -128, MType({0xd60864fd949b4bd3, 0xad7a02e1b24efd31})}, + {Sign::POS, -128, MType({0x66d235ee63073dd, 0xaf74155120c9011c})}, + {Sign::POS, -128, MType({0x66d235ee63073dd, 0xaf74155120c9011c})}, + {Sign::POS, 0, MType(0)}, }; // Logarithm range reduction - Step 2: @@ -435,196 +431,196 @@ constexpr double S2[198] = { // r = 2^-18 * round( 2^18 / (1 + i*2^(-14)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", -// format_hex(m), "},"); +// MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); const Float128 LOG_R2[198] = { - {Sign::NEG, -135, 0xb67dab2a'1a5742a4'a0e061c5'f7431c5e_u128}, - {Sign::NEG, -135, 0xb4807f24'af682939'5d5bfe7b'969ed6ec_u128}, - {Sign::NEG, -135, 0xb2834b35'b4d54d5f'4d08702d'dfabc23f_u128}, - {Sign::NEG, -135, 0xb0860f5c'eba9be95'd4d36650'8b9953df_u128}, - {Sign::NEG, -135, 0xae68f71a'a09e8847'ac18a289'f8f214a9_u128}, - {Sign::NEG, -135, 0xac6baaee'd676e8f1'd5b42054'abb88c45_u128}, - {Sign::NEG, -135, 0xaa6e56d8'7cd632d6'09809d58'ee484964_u128}, - {Sign::NEG, -135, 0xa870fad7'54bb8791'b9e6fc7c'72f06d73_u128}, - {Sign::NEG, -135, 0xa67396eb'1f231892'6f78d6d0'105c00e2_u128}, - {Sign::NEG, -135, 0xa4762b13'9d0626e7'028f7126'29209148_u128}, - {Sign::NEG, -135, 0xa258dfd1'0aedaa67'c98d898e'f172df02_u128}, - {Sign::NEG, -135, 0xa05b63a3'73e60a83'fcc37c3c'3062bfa1_u128}, - {Sign::NEG, -135, 0x9e5ddf89'cf42f501'3eb450db'05763c36_u128}, - {Sign::NEG, -135, 0x9c605383'ddf1b88c'7146a86f'd458b775_u128}, - {Sign::NEG, -135, 0x9a62bf91'60dcb286'c20a0c92'81474436_u128}, - {Sign::NEG, -135, 0x986523b2'18eb4ed6'cdc57316'ec4aebc3_u128}, - {Sign::NEG, -135, 0x96677fe5'c70207b9'c060dad7'4cef4273_u128}, - {Sign::NEG, -135, 0x9449f92d'2ff44633'ed8def1a'3e433499_u128}, - {Sign::NEG, -135, 0x924c4507'3220b5e0'3ce7a1f8'5c27b4fc_u128}, - {Sign::NEG, -135, 0x904e88f3'68fea63f'f2ca8934'49f7f2cb_u128}, - {Sign::NEG, -135, 0x8e50c4f1'956699ed'8d77d9fa'bd2853cf_u128}, - {Sign::NEG, -135, 0x8c52f901'782e20ec'93e828d7'5b58ded4_u128}, - {Sign::NEG, -135, 0x8a552522'd227d87a'9f9605b0'53c5acf0_u128}, - {Sign::NEG, -135, 0x88574955'64236ae0'62a14939'3bca7241_u128}, - {Sign::NEG, -135, 0x86398719'b66bac7c'aea6b56c'e89203d4_u128}, - {Sign::NEG, -135, 0x843b9aef'044e4dcc'0242bd86'd00609b2_u128}, - {Sign::NEG, -135, 0x823da6d4'c89c6927'daabf927'74bac84e_u128}, - {Sign::NEG, -135, 0x803faaca'c419abf2'a1c6f3fc'242ef8d0_u128}, - {Sign::NEG, -136, 0xfc834da1'6f0d9f57'a225ebc0'2e6d9dd4_u128}, - {Sign::NEG, -136, 0xf88735cc'c7433381'c33f6ad3'40ae18a9_u128}, - {Sign::NEG, -136, 0xf48b0e17'1249b6bc'70b2a4d3'8a242244_u128}, - {Sign::NEG, -136, 0xf08ed67f'd190e280'1d548190'48b811b0_u128}, - {Sign::NEG, -136, 0xec52ca07'ed95f236'9c21b650'afe9ede0_u128}, - {Sign::NEG, -136, 0xe85671ad'ecd28aac'935519c9'6d30e463_u128}, - {Sign::NEG, -136, 0xe45a0970'dc912ca7'ba88f6f2'e2672cfe_u128}, - {Sign::NEG, -136, 0xe05d9150'3e298bc8'0b1a8b84'657ae069_u128}, - {Sign::NEG, -136, 0xdc61094b'92ed70ef'ea3bff8d'197b20a1_u128}, - {Sign::NEG, -136, 0xd8647162'5c28b9e5'cdbb931d'6fecc249_u128}, - {Sign::NEG, -136, 0xd467c994'1b2158f5'd971d560'd5f00820_u128}, - {Sign::NEG, -136, 0xd06b11e0'51175493'75563561'244c090b_u128}, - {Sign::NEG, -136, 0xcc6e4a46'7f44c6fa'dc393c9a'3f3b380f_u128}, - {Sign::NEG, -136, 0xc831a4c6'f6fa709d'e6abe6e9'e4ee2096_u128}, - {Sign::NEG, -136, 0xc434bc61'24a0f16e'3ce3c822'8583a66e_u128}, - {Sign::NEG, -136, 0xc037c413'c61bfd93'b96a79f5'c5a4963a_u128}, - {Sign::NEG, -136, 0xbc3abbde'5c8d9bde'aaef2733'7008679f_u128}, - {Sign::NEG, -136, 0xb83da3c0'6911e509'a49a3fca'ddc8bc5a_u128}, - {Sign::NEG, -136, 0xb4407bb9'6cbf035a'e0254feb'785362fa_u128}, - {Sign::NEG, -136, 0xb04343c8'e8a53245'9893a4e2'5ab9dc95_u128}, - {Sign::NEG, -136, 0xac45fbee'5dcebe0b'5d8b0f40'a3708915_u128}, - {Sign::NEG, -136, 0xa848a429'4d40035d'5f4c11c2'c7a58c69_u128}, - {Sign::NEG, -136, 0xa44b3c79'37f76efd'b348cc5d'f706ffba_u128}, - {Sign::NEG, -136, 0xa04dc4dd'9eed7d60'9159f2c5'5a18befd_u128}, - {Sign::NEG, -136, 0x9c106456'3058bef3'bdfdee41'fe6a5a02_u128}, - {Sign::NEG, -136, 0x9812cbe3'46475a24'4580ddf8'9853254d_u128}, - {Sign::NEG, -136, 0x94152383'53489ffb'ac75e10d'61fc3ee8_u128}, - {Sign::NEG, -136, 0x90176b35'd83ce8e2'cad9b30b'29736155_u128}, - {Sign::NEG, -136, 0x8c19a2fa'55fe9b14'6f881deb'98fc45f3_u128}, - {Sign::NEG, -136, 0x881bcad0'4d622a3e'70a04b63'b7248c96_u128}, - {Sign::NEG, -136, 0x841de2b7'3f361722'b4823fb4'8035eddd_u128}, - {Sign::NEG, -136, 0x801feaae'ac42ef38'3364ccb5'b13cd47f_u128}, - {Sign::NEG, -137, 0xf843c56c'2a969897'e306977b'049f0ad5_u128}, - {Sign::NEG, -137, 0xf0479599'f617a843'e3c4d9e9'619bc045_u128}, - {Sign::NEG, -137, 0xe84b45e5'bc76702c'4356d525'b5e6432d_u128}, - {Sign::NEG, -137, 0xe04ed64e'7f14697a'7839dcd7'989339ab_u128}, - {Sign::NEG, -137, 0xd85246d3'3f47230b'4e21f045'ecb76f23_u128}, - {Sign::NEG, -137, 0xd0559772'fe5840b0'902e248d'd4ba9b28_u128}, - {Sign::NEG, -137, 0xc858c82c'bd857a72'a4444906'7ef92e01_u128}, - {Sign::NEG, -137, 0xc05bd8ff'7e009bd2'17926207'cc22e4e6_u128}, - {Sign::NEG, -137, 0xb85ec9ea'40ef8309'1c349622'f3fa5d82_u128}, - {Sign::NEG, -137, 0xafe1c6ec'e1a058dd'97fa2fd0'c9dc723e_u128}, - {Sign::NEG, -137, 0xa7e47606'048b1a65'983e8089'7cf1e60f_u128}, - {Sign::NEG, -137, 0x9fe70534'1d236102'7199cd06'ae5d39b3_u128}, - {Sign::NEG, -137, 0x97e97476'2c5e8f58'43cd18a7'2a051a96_u128}, - {Sign::NEG, -137, 0x8febc3cb'332616ff'7b6d1248'c3e1fd40_u128}, - {Sign::NEG, -137, 0x87edf332'325777c5'f5572a88'14c703af_u128}, - {Sign::NEG, -138, 0xffe00554'55887de0'26828c92'649a3a39_u128}, - {Sign::NEG, -138, 0xefe3e464'3a640cf3'82c550bd'1216d82a_u128}, - {Sign::NEG, -138, 0xdfe78392'14b4e8ae'da6959f7'f0e01bf0_u128}, - {Sign::NEG, -138, 0xcfeae2db'e5d6736d'da93e2fa'85a8f214_u128}, - {Sign::NEG, -138, 0xbfee023f'af0c2480'b47505bf'a5a03b06_u128}, - {Sign::NEG, -138, 0xaff0e1bb'718186ad'b1475a51'80a43520_u128}, - {Sign::NEG, -138, 0x9ff3814d'2e4a36b2'a8740b91'c95df537_u128}, - {Sign::NEG, -138, 0x8ff5e0f2'e661e1c6'57d895d3'5921b59c_u128}, - {Sign::NEG, -139, 0xfff00155'35588833'3c56c598'c659c2a3_u128}, - {Sign::NEG, -139, 0xdff3c0e4'97ea4eb1'2ef8ec33'ed9d782a_u128}, - {Sign::NEG, -139, 0xbff7008f'f5e0c257'379eba7e'6465ff63_u128}, - {Sign::NEG, -139, 0x9ff9c053'5073a370'3f972b78'3fcab757_u128}, - {Sign::NEG, -140, 0xfff80055'51558885'de026e27'1ee0549d_u128}, - {Sign::NEG, -140, 0xbffb8023'febc0c25'eceb47ea'01f6c632_u128}, - {Sign::NEG, -141, 0xfffc0015'54d55888'7333c578'57e1ed52_u128}, - {Sign::NEG, -142, 0xfffe0005'55455588'87dde026'fa704374_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -141, 0x80010002'aab2aac4'44999abe'2fe2cc65_u128}, - {Sign::POS, -140, 0x8002000a'aaeaac44'4eef3815'81464ccb_u128}, - {Sign::POS, -140, 0xc0048024'01440c26'dfeb4850'85f6f454_u128}, - {Sign::POS, -139, 0x8004002a'acaac445'99abe3be'3a1c6e93_u128}, - {Sign::POS, -139, 0xa0064053'5a37a37a'6bc1e20e'ac8448b4_u128}, - {Sign::POS, -139, 0xc0090090'0a20c275'979eedc0'64c242fd_u128}, - {Sign::POS, -139, 0xe00c40e4'bd6e4efd'c72446cc'1bf728bd_u128}, - {Sign::POS, -138, 0x800800aa'baac446e'f381b821'bbb569e5_u128}, - {Sign::POS, -138, 0x900a20f3'19a3e273'569b26aa'a485ea5c_u128}, - {Sign::POS, -138, 0xa00c814d'7c6a37f8'2dcf56c8'3c80b028_u128}, - {Sign::POS, -138, 0xb00f21bb'e3e388ee'5f697682'84463b9b_u128}, - {Sign::POS, -138, 0xc0120240'510c284c'b48ea6c0'5e2773a1_u128}, - {Sign::POS, -138, 0xd01522dc'c4f87991'14d9d761'96d8043a_u128}, - {Sign::POS, -138, 0xe0188393'40d4f241'e016a611'a4415d72_u128}, - {Sign::POS, -138, 0xf01c2465'c5e61b6f'661e135f'49a47c40_u128}, - {Sign::POS, -137, 0x801002ab'2ac4499a'be6bf0fa'435e8383_u128}, - {Sign::POS, -137, 0x88121333'7898871e'9a31ba0c'bc030353_u128}, - {Sign::POS, -137, 0x901443cc'cd362c9f'54b57dfe'0c4c840f_u128}, - {Sign::POS, -137, 0x98169478'296fad41'7ad1e9c3'15328f7e_u128}, - {Sign::POS, -137, 0xa0190536'8e2389b3'1f3f686c'f3d6be22_u128}, - {Sign::POS, -137, 0xa81b9608'fc3c50ec'f105b66e'c4703ede_u128}, - {Sign::POS, -137, 0xb01e46f0'74b0a0f3'610848c6'8df4d233_u128}, - {Sign::POS, -137, 0xb7a0e9ed'7613acb0'2e0efddf'33a20464_u128}, - {Sign::POS, -137, 0xbfa3d900'8e042ffb'c2cdb3c7'50f127b4_u128}, - {Sign::POS, -137, 0xc7a6e82b'a36a7073'bd953378'6d3f4c49_u128}, - {Sign::POS, -137, 0xcfaa176f'b76c8eb1'82e237c9'a4d450e3_u128}, - {Sign::POS, -137, 0xd7ad66cd'cb3cbe14'c00b46a4'd0e3dfd0_u128}, - {Sign::POS, -137, 0xdfb0d646'e0194584'ea999c0d'f8546710_u128}, - {Sign::POS, -137, 0xe7b465db'f74c8032'cec6c2a9'ad974f4f_u128}, - {Sign::POS, -137, 0xefb8158e'122cde5a'2d2045da'1570a07c_u128}, - {Sign::POS, -137, 0xf7bbe55e'321ce603'6752e9b2'381e3edc_u128}, - {Sign::POS, -137, 0xffbfd54d'588b33c5'3c1ed527'28e00e40_u128}, - {Sign::POS, -136, 0x83e1f2ae'43793dc3'493b0d87'3fb9a340_u128}, - {Sign::POS, -136, 0x87e40ac6'5f6cc4a0'29e38750'c9d26893_u128}, - {Sign::POS, -136, 0x8be632ef'80e9a0df'aab9e832'7258ac3f_u128}, - {Sign::POS, -136, 0x8fe86b2a'28bf51b3'28bc403d'8a5f3c63_u128}, - {Sign::POS, -136, 0x93eab376'd7c36377'f720c1c9'7227fcdc_u128}, - {Sign::POS, -136, 0x97ed0bd6'0ed17018'6ad9a3e3'd11b66c1_u128}, - {Sign::POS, -136, 0x9bef7448'4ecb1f6c'edb27b79'c90b4019_u128}, - {Sign::POS, -136, 0x9fb1c4cd'27012e19'a092a0d7'ab21722a_u128}, - {Sign::POS, -136, 0xa3b44c65'b71c2d85'535d52f0'939a4d02_u128}, - {Sign::POS, -136, 0xa7b6e412'cadcb3dc'90a57e11'edc1864e_u128}, - {Sign::POS, -136, 0xabb98bd4'e33c4381'68e9c901'60031159_u128}, - {Sign::POS, -136, 0xafbc43ac'813a6ea3'bf60594f'929adeb8_u128}, - {Sign::POS, -136, 0xb3bf0b9a'25dcd7a2'8a421588'86775205_u128}, - {Sign::POS, -136, 0xb7c1e39e'522f316d'1ab45417'663dee9e_u128}, - {Sign::POS, -136, 0xbbc4cbb9'87433fe4'6c51ae3c'e1aea68a_u128}, - {Sign::POS, -136, 0xbfc7c3ec'4630d83c'7c52ae8b'40ebabb7_u128}, - {Sign::POS, -136, 0xc3cacc37'1015e15d'a857126f'7cfaaa67_u128}, - {Sign::POS, -136, 0xc7cde49a'66165446'14d05662'cd29464a_u128}, - {Sign::POS, -136, 0xcb90da16'44d29bb7'8379db06'ef3cd6bb_u128}, - {Sign::POS, -136, 0xcf9411aa'99ddb7de'9025f4c6'7dd38bb6_u128}, - {Sign::POS, -136, 0xd3975958'f681086d'd6f8a61c'892032ee_u128}, - {Sign::POS, -136, 0xd79ab121'dbf8714c'9a2f20b4'e2332d47_u128}, - {Sign::POS, -136, 0xdb9e1905'cb85ea59'3c767d61'f51d375b_u128}, - {Sign::POS, -136, 0xdfa19105'46717fca'd4b2bd65'bb25493c_u128}, - {Sign::POS, -136, 0xe3a51920'ce095292'c96c1254'a30ef91f_u128}, - {Sign::POS, -136, 0xe7a8b158'e3a198be'73e324ce'0946b214_u128}, - {Sign::POS, -136, 0xebac59ae'08949dd8'cacd125a'12bac62c_u128}, - {Sign::POS, -136, 0xef6fd620'b2b7a503'cafdc272'27b71eaa_u128}, - {Sign::POS, -136, 0xf3739daf'959aaafc'688d4282'f6026aa3_u128}, - {Sign::POS, -136, 0xf777755d'03f4e0b6'e54e9e38'04464cdd_u128}, - {Sign::POS, -136, 0xfb7b5d29'7f388a12'cb78b383'f4b59dce_u128}, - {Sign::POS, -136, 0xff7f5515'88de024f'ee055fc5'15062c04_u128}, - {Sign::POS, -135, 0x81c1ae90'd131de38'207812b4'3382acdd_u128}, - {Sign::POS, -135, 0x83c3baa7'26a721cc'dc90c4c4'b61f3a87_u128}, - {Sign::POS, -135, 0x85c5cece'05941dbc'1a03f13f'b2c978b1_u128}, - {Sign::POS, -135, 0x87c7eb05'aec1304f'b36f282e'83a7dc36_u128}, - {Sign::POS, -135, 0x89a9eccd'56a980c0'd82a4661'6d4c393f_u128}, - {Sign::POS, -135, 0x8bac18a6'40185360'bc6ff847'13c9babd_u128}, - {Sign::POS, -135, 0x8dae4c90'b22574f4'9f7942a5'16fc2d8a_u128}, - {Sign::POS, -135, 0x8fb0888c'eda546ab'15e50cfd'9b29b427_u128}, - {Sign::POS, -135, 0x91b2cc9b'336f3718'9f465296'ae7dd49a_u128}, - {Sign::POS, -135, 0x93b518bb'c45dc268'b49c1eb9'b348e6e4_u128}, - {Sign::POS, -135, 0x95b76cee'e14e728e'daa320cd'64c9d9c7_u128}, - {Sign::POS, -135, 0x9799a333'de49b963'75a91950'ffe1e3b5_u128}, - {Sign::POS, -135, 0x999c070b'a32068cd'5c6abcbf'43f03f14_u128}, - {Sign::POS, -135, 0x9b9e72f6'b295ad4f'5a9e7f26'5d1ed157_u128}, - {Sign::POS, -135, 0x9da0e6f5'4d9318fd'efeb98d0'2a195c17_u128}, - {Sign::POS, -135, 0x9fa36307'b5054ca8'2aa503a3'110ab5a7_u128}, - {Sign::POS, -135, 0xa1a5e72e'29dbf808'd0fe7e05'869eb825_u128}, - {Sign::POS, -135, 0xa3884a68'a750cb10'e80a28f4'e1e500d2_u128}, - {Sign::POS, -135, 0xa58ade36'aeef9f0b'53106415'1ca6e30b_u128}, - {Sign::POS, -135, 0xa78d7a19'82c4b08f'27c01ffa'8e2e3c4b_u128}, - {Sign::POS, -135, 0xa9901e11'63cbbbf5'7ba9408d'c857d568_u128}, - {Sign::POS, -135, 0xab92ca1e'93038d76'104d1e33'31d3b4fa_u128}, - {Sign::POS, -135, 0xad957e41'516e0158'9343c846'fcdf9137_u128}, - {Sign::POS, -135, 0xaf780e79'b2514889'3977e89a'ec59bfa2_u128}, - {Sign::POS, -135, 0xb17ad246'ef3713bc'913d4e3d'c55c3e6e_u128}, - {Sign::POS, -135, 0xb37d9e2a'7a56b09d'777b52a9'e70d8bcc_u128}, - {Sign::POS, -135, 0xb5807224'94be0c91'55de916f'd30591de_u128}, - {Sign::POS, -135, 0xb7834e35'7f7e2600'e79cfb37'be2861e4_u128}, - {Sign::POS, -135, 0xb986325d'7bab0c89'90983104'd3805389_u128}, - {Sign::POS, -135, 0xbb68ef9c'254aa378'59e3b2ec'71ce64f4_u128}, - {Sign::POS, -135, 0xbd6be371'8c77636f'e83183bf'3dd612ef_u128}, - {Sign::POS, -135, 0xbf6edf5e'c44d9d35'c4e3b0ac'2fd52b7f_u128}, + {Sign::NEG, -135, MType({0xa0e061c5f7431c5e, 0xb67dab2a1a5742a4})}, + {Sign::NEG, -135, MType({0x5d5bfe7b969ed6ec, 0xb4807f24af682939})}, + {Sign::NEG, -135, MType({0x4d08702ddfabc23f, 0xb2834b35b4d54d5f})}, + {Sign::NEG, -135, MType({0xd4d366508b9953df, 0xb0860f5ceba9be95})}, + {Sign::NEG, -135, MType({0xac18a289f8f214a9, 0xae68f71aa09e8847})}, + {Sign::NEG, -135, MType({0xd5b42054abb88c45, 0xac6baaeed676e8f1})}, + {Sign::NEG, -135, MType({0x9809d58ee484964, 0xaa6e56d87cd632d6})}, + {Sign::NEG, -135, MType({0xb9e6fc7c72f06d73, 0xa870fad754bb8791})}, + {Sign::NEG, -135, MType({0x6f78d6d0105c00e2, 0xa67396eb1f231892})}, + {Sign::NEG, -135, MType({0x28f712629209148, 0xa4762b139d0626e7})}, + {Sign::NEG, -135, MType({0xc98d898ef172df02, 0xa258dfd10aedaa67})}, + {Sign::NEG, -135, MType({0xfcc37c3c3062bfa1, 0xa05b63a373e60a83})}, + {Sign::NEG, -135, MType({0x3eb450db05763c36, 0x9e5ddf89cf42f501})}, + {Sign::NEG, -135, MType({0x7146a86fd458b775, 0x9c605383ddf1b88c})}, + {Sign::NEG, -135, MType({0xc20a0c9281474436, 0x9a62bf9160dcb286})}, + {Sign::NEG, -135, MType({0xcdc57316ec4aebc3, 0x986523b218eb4ed6})}, + {Sign::NEG, -135, MType({0xc060dad74cef4273, 0x96677fe5c70207b9})}, + {Sign::NEG, -135, MType({0xed8def1a3e433499, 0x9449f92d2ff44633})}, + {Sign::NEG, -135, MType({0x3ce7a1f85c27b4fc, 0x924c45073220b5e0})}, + {Sign::NEG, -135, MType({0xf2ca893449f7f2cb, 0x904e88f368fea63f})}, + {Sign::NEG, -135, MType({0x8d77d9fabd2853cf, 0x8e50c4f1956699ed})}, + {Sign::NEG, -135, MType({0x93e828d75b58ded4, 0x8c52f901782e20ec})}, + {Sign::NEG, -135, MType({0x9f9605b053c5acf0, 0x8a552522d227d87a})}, + {Sign::NEG, -135, MType({0x62a149393bca7241, 0x8857495564236ae0})}, + {Sign::NEG, -135, MType({0xaea6b56ce89203d4, 0x86398719b66bac7c})}, + {Sign::NEG, -135, MType({0x242bd86d00609b2, 0x843b9aef044e4dcc})}, + {Sign::NEG, -135, MType({0xdaabf92774bac84e, 0x823da6d4c89c6927})}, + {Sign::NEG, -135, MType({0xa1c6f3fc242ef8d0, 0x803faacac419abf2})}, + {Sign::NEG, -136, MType({0xa225ebc02e6d9dd4, 0xfc834da16f0d9f57})}, + {Sign::NEG, -136, MType({0xc33f6ad340ae18a9, 0xf88735ccc7433381})}, + {Sign::NEG, -136, MType({0x70b2a4d38a242244, 0xf48b0e171249b6bc})}, + {Sign::NEG, -136, MType({0x1d54819048b811b0, 0xf08ed67fd190e280})}, + {Sign::NEG, -136, MType({0x9c21b650afe9ede0, 0xec52ca07ed95f236})}, + {Sign::NEG, -136, MType({0x935519c96d30e463, 0xe85671adecd28aac})}, + {Sign::NEG, -136, MType({0xba88f6f2e2672cfe, 0xe45a0970dc912ca7})}, + {Sign::NEG, -136, MType({0xb1a8b84657ae069, 0xe05d91503e298bc8})}, + {Sign::NEG, -136, MType({0xea3bff8d197b20a1, 0xdc61094b92ed70ef})}, + {Sign::NEG, -136, MType({0xcdbb931d6fecc249, 0xd86471625c28b9e5})}, + {Sign::NEG, -136, MType({0xd971d560d5f00820, 0xd467c9941b2158f5})}, + {Sign::NEG, -136, MType({0x75563561244c090b, 0xd06b11e051175493})}, + {Sign::NEG, -136, MType({0xdc393c9a3f3b380f, 0xcc6e4a467f44c6fa})}, + {Sign::NEG, -136, MType({0xe6abe6e9e4ee2096, 0xc831a4c6f6fa709d})}, + {Sign::NEG, -136, MType({0x3ce3c8228583a66e, 0xc434bc6124a0f16e})}, + {Sign::NEG, -136, MType({0xb96a79f5c5a4963a, 0xc037c413c61bfd93})}, + {Sign::NEG, -136, MType({0xaaef27337008679f, 0xbc3abbde5c8d9bde})}, + {Sign::NEG, -136, MType({0xa49a3fcaddc8bc5a, 0xb83da3c06911e509})}, + {Sign::NEG, -136, MType({0xe0254feb785362fa, 0xb4407bb96cbf035a})}, + {Sign::NEG, -136, MType({0x9893a4e25ab9dc95, 0xb04343c8e8a53245})}, + {Sign::NEG, -136, MType({0x5d8b0f40a3708915, 0xac45fbee5dcebe0b})}, + {Sign::NEG, -136, MType({0x5f4c11c2c7a58c69, 0xa848a4294d40035d})}, + {Sign::NEG, -136, MType({0xb348cc5df706ffba, 0xa44b3c7937f76efd})}, + {Sign::NEG, -136, MType({0x9159f2c55a18befd, 0xa04dc4dd9eed7d60})}, + {Sign::NEG, -136, MType({0xbdfdee41fe6a5a02, 0x9c1064563058bef3})}, + {Sign::NEG, -136, MType({0x4580ddf89853254d, 0x9812cbe346475a24})}, + {Sign::NEG, -136, MType({0xac75e10d61fc3ee8, 0x9415238353489ffb})}, + {Sign::NEG, -136, MType({0xcad9b30b29736155, 0x90176b35d83ce8e2})}, + {Sign::NEG, -136, MType({0x6f881deb98fc45f3, 0x8c19a2fa55fe9b14})}, + {Sign::NEG, -136, MType({0x70a04b63b7248c96, 0x881bcad04d622a3e})}, + {Sign::NEG, -136, MType({0xb4823fb48035eddd, 0x841de2b73f361722})}, + {Sign::NEG, -136, MType({0x3364ccb5b13cd47f, 0x801feaaeac42ef38})}, + {Sign::NEG, -137, MType({0xe306977b049f0ad5, 0xf843c56c2a969897})}, + {Sign::NEG, -137, MType({0xe3c4d9e9619bc045, 0xf0479599f617a843})}, + {Sign::NEG, -137, MType({0x4356d525b5e6432d, 0xe84b45e5bc76702c})}, + {Sign::NEG, -137, MType({0x7839dcd7989339ab, 0xe04ed64e7f14697a})}, + {Sign::NEG, -137, MType({0x4e21f045ecb76f23, 0xd85246d33f47230b})}, + {Sign::NEG, -137, MType({0x902e248dd4ba9b28, 0xd0559772fe5840b0})}, + {Sign::NEG, -137, MType({0xa44449067ef92e01, 0xc858c82cbd857a72})}, + {Sign::NEG, -137, MType({0x17926207cc22e4e6, 0xc05bd8ff7e009bd2})}, + {Sign::NEG, -137, MType({0x1c349622f3fa5d82, 0xb85ec9ea40ef8309})}, + {Sign::NEG, -137, MType({0x97fa2fd0c9dc723e, 0xafe1c6ece1a058dd})}, + {Sign::NEG, -137, MType({0x983e80897cf1e60f, 0xa7e47606048b1a65})}, + {Sign::NEG, -137, MType({0x7199cd06ae5d39b3, 0x9fe705341d236102})}, + {Sign::NEG, -137, MType({0x43cd18a72a051a96, 0x97e974762c5e8f58})}, + {Sign::NEG, -137, MType({0x7b6d1248c3e1fd40, 0x8febc3cb332616ff})}, + {Sign::NEG, -137, MType({0xf5572a8814c703af, 0x87edf332325777c5})}, + {Sign::NEG, -138, MType({0x26828c92649a3a39, 0xffe0055455887de0})}, + {Sign::NEG, -138, MType({0x82c550bd1216d82a, 0xefe3e4643a640cf3})}, + {Sign::NEG, -138, MType({0xda6959f7f0e01bf0, 0xdfe7839214b4e8ae})}, + {Sign::NEG, -138, MType({0xda93e2fa85a8f214, 0xcfeae2dbe5d6736d})}, + {Sign::NEG, -138, MType({0xb47505bfa5a03b06, 0xbfee023faf0c2480})}, + {Sign::NEG, -138, MType({0xb1475a5180a43520, 0xaff0e1bb718186ad})}, + {Sign::NEG, -138, MType({0xa8740b91c95df537, 0x9ff3814d2e4a36b2})}, + {Sign::NEG, -138, MType({0x57d895d35921b59c, 0x8ff5e0f2e661e1c6})}, + {Sign::NEG, -139, MType({0x3c56c598c659c2a3, 0xfff0015535588833})}, + {Sign::NEG, -139, MType({0x2ef8ec33ed9d782a, 0xdff3c0e497ea4eb1})}, + {Sign::NEG, -139, MType({0x379eba7e6465ff63, 0xbff7008ff5e0c257})}, + {Sign::NEG, -139, MType({0x3f972b783fcab757, 0x9ff9c0535073a370})}, + {Sign::NEG, -140, MType({0xde026e271ee0549d, 0xfff8005551558885})}, + {Sign::NEG, -140, MType({0xeceb47ea01f6c632, 0xbffb8023febc0c25})}, + {Sign::NEG, -141, MType({0x7333c57857e1ed52, 0xfffc001554d55888})}, + {Sign::NEG, -142, MType({0x87dde026fa704374, 0xfffe000555455588})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -141, MType({0x44999abe2fe2cc65, 0x80010002aab2aac4})}, + {Sign::POS, -140, MType({0x4eef381581464ccb, 0x8002000aaaeaac44})}, + {Sign::POS, -140, MType({0xdfeb485085f6f454, 0xc004802401440c26})}, + {Sign::POS, -139, MType({0x99abe3be3a1c6e93, 0x8004002aacaac445})}, + {Sign::POS, -139, MType({0x6bc1e20eac8448b4, 0xa00640535a37a37a})}, + {Sign::POS, -139, MType({0x979eedc064c242fd, 0xc00900900a20c275})}, + {Sign::POS, -139, MType({0xc72446cc1bf728bd, 0xe00c40e4bd6e4efd})}, + {Sign::POS, -138, MType({0xf381b821bbb569e5, 0x800800aabaac446e})}, + {Sign::POS, -138, MType({0x569b26aaa485ea5c, 0x900a20f319a3e273})}, + {Sign::POS, -138, MType({0x2dcf56c83c80b028, 0xa00c814d7c6a37f8})}, + {Sign::POS, -138, MType({0x5f69768284463b9b, 0xb00f21bbe3e388ee})}, + {Sign::POS, -138, MType({0xb48ea6c05e2773a1, 0xc0120240510c284c})}, + {Sign::POS, -138, MType({0x14d9d76196d8043a, 0xd01522dcc4f87991})}, + {Sign::POS, -138, MType({0xe016a611a4415d72, 0xe018839340d4f241})}, + {Sign::POS, -138, MType({0x661e135f49a47c40, 0xf01c2465c5e61b6f})}, + {Sign::POS, -137, MType({0xbe6bf0fa435e8383, 0x801002ab2ac4499a})}, + {Sign::POS, -137, MType({0x9a31ba0cbc030353, 0x881213337898871e})}, + {Sign::POS, -137, MType({0x54b57dfe0c4c840f, 0x901443cccd362c9f})}, + {Sign::POS, -137, MType({0x7ad1e9c315328f7e, 0x98169478296fad41})}, + {Sign::POS, -137, MType({0x1f3f686cf3d6be22, 0xa01905368e2389b3})}, + {Sign::POS, -137, MType({0xf105b66ec4703ede, 0xa81b9608fc3c50ec})}, + {Sign::POS, -137, MType({0x610848c68df4d233, 0xb01e46f074b0a0f3})}, + {Sign::POS, -137, MType({0x2e0efddf33a20464, 0xb7a0e9ed7613acb0})}, + {Sign::POS, -137, MType({0xc2cdb3c750f127b4, 0xbfa3d9008e042ffb})}, + {Sign::POS, -137, MType({0xbd9533786d3f4c49, 0xc7a6e82ba36a7073})}, + {Sign::POS, -137, MType({0x82e237c9a4d450e3, 0xcfaa176fb76c8eb1})}, + {Sign::POS, -137, MType({0xc00b46a4d0e3dfd0, 0xd7ad66cdcb3cbe14})}, + {Sign::POS, -137, MType({0xea999c0df8546710, 0xdfb0d646e0194584})}, + {Sign::POS, -137, MType({0xcec6c2a9ad974f4f, 0xe7b465dbf74c8032})}, + {Sign::POS, -137, MType({0x2d2045da1570a07c, 0xefb8158e122cde5a})}, + {Sign::POS, -137, MType({0x6752e9b2381e3edc, 0xf7bbe55e321ce603})}, + {Sign::POS, -137, MType({0x3c1ed52728e00e40, 0xffbfd54d588b33c5})}, + {Sign::POS, -136, MType({0x493b0d873fb9a340, 0x83e1f2ae43793dc3})}, + {Sign::POS, -136, MType({0x29e38750c9d26893, 0x87e40ac65f6cc4a0})}, + {Sign::POS, -136, MType({0xaab9e8327258ac3f, 0x8be632ef80e9a0df})}, + {Sign::POS, -136, MType({0x28bc403d8a5f3c63, 0x8fe86b2a28bf51b3})}, + {Sign::POS, -136, MType({0xf720c1c97227fcdc, 0x93eab376d7c36377})}, + {Sign::POS, -136, MType({0x6ad9a3e3d11b66c1, 0x97ed0bd60ed17018})}, + {Sign::POS, -136, MType({0xedb27b79c90b4019, 0x9bef74484ecb1f6c})}, + {Sign::POS, -136, MType({0xa092a0d7ab21722a, 0x9fb1c4cd27012e19})}, + {Sign::POS, -136, MType({0x535d52f0939a4d02, 0xa3b44c65b71c2d85})}, + {Sign::POS, -136, MType({0x90a57e11edc1864e, 0xa7b6e412cadcb3dc})}, + {Sign::POS, -136, MType({0x68e9c90160031159, 0xabb98bd4e33c4381})}, + {Sign::POS, -136, MType({0xbf60594f929adeb8, 0xafbc43ac813a6ea3})}, + {Sign::POS, -136, MType({0x8a42158886775205, 0xb3bf0b9a25dcd7a2})}, + {Sign::POS, -136, MType({0x1ab45417663dee9e, 0xb7c1e39e522f316d})}, + {Sign::POS, -136, MType({0x6c51ae3ce1aea68a, 0xbbc4cbb987433fe4})}, + {Sign::POS, -136, MType({0x7c52ae8b40ebabb7, 0xbfc7c3ec4630d83c})}, + {Sign::POS, -136, MType({0xa857126f7cfaaa67, 0xc3cacc371015e15d})}, + {Sign::POS, -136, MType({0x14d05662cd29464a, 0xc7cde49a66165446})}, + {Sign::POS, -136, MType({0x8379db06ef3cd6bb, 0xcb90da1644d29bb7})}, + {Sign::POS, -136, MType({0x9025f4c67dd38bb6, 0xcf9411aa99ddb7de})}, + {Sign::POS, -136, MType({0xd6f8a61c892032ee, 0xd3975958f681086d})}, + {Sign::POS, -136, MType({0x9a2f20b4e2332d47, 0xd79ab121dbf8714c})}, + {Sign::POS, -136, MType({0x3c767d61f51d375b, 0xdb9e1905cb85ea59})}, + {Sign::POS, -136, MType({0xd4b2bd65bb25493c, 0xdfa1910546717fca})}, + {Sign::POS, -136, MType({0xc96c1254a30ef91f, 0xe3a51920ce095292})}, + {Sign::POS, -136, MType({0x73e324ce0946b214, 0xe7a8b158e3a198be})}, + {Sign::POS, -136, MType({0xcacd125a12bac62c, 0xebac59ae08949dd8})}, + {Sign::POS, -136, MType({0xcafdc27227b71eaa, 0xef6fd620b2b7a503})}, + {Sign::POS, -136, MType({0x688d4282f6026aa3, 0xf3739daf959aaafc})}, + {Sign::POS, -136, MType({0xe54e9e3804464cdd, 0xf777755d03f4e0b6})}, + {Sign::POS, -136, MType({0xcb78b383f4b59dce, 0xfb7b5d297f388a12})}, + {Sign::POS, -136, MType({0xee055fc515062c04, 0xff7f551588de024f})}, + {Sign::POS, -135, MType({0x207812b43382acdd, 0x81c1ae90d131de38})}, + {Sign::POS, -135, MType({0xdc90c4c4b61f3a87, 0x83c3baa726a721cc})}, + {Sign::POS, -135, MType({0x1a03f13fb2c978b1, 0x85c5cece05941dbc})}, + {Sign::POS, -135, MType({0xb36f282e83a7dc36, 0x87c7eb05aec1304f})}, + {Sign::POS, -135, MType({0xd82a46616d4c393f, 0x89a9eccd56a980c0})}, + {Sign::POS, -135, MType({0xbc6ff84713c9babd, 0x8bac18a640185360})}, + {Sign::POS, -135, MType({0x9f7942a516fc2d8a, 0x8dae4c90b22574f4})}, + {Sign::POS, -135, MType({0x15e50cfd9b29b427, 0x8fb0888ceda546ab})}, + {Sign::POS, -135, MType({0x9f465296ae7dd49a, 0x91b2cc9b336f3718})}, + {Sign::POS, -135, MType({0xb49c1eb9b348e6e4, 0x93b518bbc45dc268})}, + {Sign::POS, -135, MType({0xdaa320cd64c9d9c7, 0x95b76ceee14e728e})}, + {Sign::POS, -135, MType({0x75a91950ffe1e3b5, 0x9799a333de49b963})}, + {Sign::POS, -135, MType({0x5c6abcbf43f03f14, 0x999c070ba32068cd})}, + {Sign::POS, -135, MType({0x5a9e7f265d1ed157, 0x9b9e72f6b295ad4f})}, + {Sign::POS, -135, MType({0xefeb98d02a195c17, 0x9da0e6f54d9318fd})}, + {Sign::POS, -135, MType({0x2aa503a3110ab5a7, 0x9fa36307b5054ca8})}, + {Sign::POS, -135, MType({0xd0fe7e05869eb825, 0xa1a5e72e29dbf808})}, + {Sign::POS, -135, MType({0xe80a28f4e1e500d2, 0xa3884a68a750cb10})}, + {Sign::POS, -135, MType({0x531064151ca6e30b, 0xa58ade36aeef9f0b})}, + {Sign::POS, -135, MType({0x27c01ffa8e2e3c4b, 0xa78d7a1982c4b08f})}, + {Sign::POS, -135, MType({0x7ba9408dc857d568, 0xa9901e1163cbbbf5})}, + {Sign::POS, -135, MType({0x104d1e3331d3b4fa, 0xab92ca1e93038d76})}, + {Sign::POS, -135, MType({0x9343c846fcdf9137, 0xad957e41516e0158})}, + {Sign::POS, -135, MType({0x3977e89aec59bfa2, 0xaf780e79b2514889})}, + {Sign::POS, -135, MType({0x913d4e3dc55c3e6e, 0xb17ad246ef3713bc})}, + {Sign::POS, -135, MType({0x777b52a9e70d8bcc, 0xb37d9e2a7a56b09d})}, + {Sign::POS, -135, MType({0x55de916fd30591de, 0xb580722494be0c91})}, + {Sign::POS, -135, MType({0xe79cfb37be2861e4, 0xb7834e357f7e2600})}, + {Sign::POS, -135, MType({0x90983104d3805389, 0xb986325d7bab0c89})}, + {Sign::POS, -135, MType({0x59e3b2ec71ce64f4, 0xbb68ef9c254aa378})}, + {Sign::POS, -135, MType({0xe83183bf3dd612ef, 0xbd6be3718c77636f})}, + {Sign::POS, -135, MType({0xc4e3b0ac2fd52b7f, 0xbf6edf5ec44d9d35})}, }; // Logarithm range reduction - Step 3: @@ -664,147 +660,147 @@ constexpr double S3[139] = { // r = 2^-21 * round( 2^21 / (1 + i*2^(-21)) ); // s, m, e = RealField(128)(r).log().sign_mantissa_exponent(); // print("{Sign::POS," if (s == -1) else "{Sign::NEG,", e, ", -// format_hex(m), "},"); +// MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); const Float128 LOG_R3[139] = { - {Sign::NEG, -142, 0x89ff6b38'd5de2622'e39d3faf'42340ed7_u128}, - {Sign::NEG, -142, 0x87ff6f80'ccb40f16'7ff33266'82c02485_u128}, - {Sign::NEG, -142, 0x85ff73b8'c3cdf731'5caf4fbe'343cf928_u128}, - {Sign::NEG, -142, 0x83ff77e0'bb2ade79'cdb6e554'348f7fe8_u128}, - {Sign::NEG, -142, 0x81ff7bf8'b2c9c4f6'0ef009c2'457de25d_u128}, - {Sign::NEG, -143, 0xffff0001'55535558'8883333c'57b57c74_u128}, - {Sign::NEG, -143, 0xfbff07f1'45931f44'f32668f3'9c70d183_u128}, - {Sign::NEG, -143, 0xf7ff0fc1'3650e7bd'459a73c6'a6486fe3_u128}, - {Sign::NEG, -143, 0xf3ff1771'278aaecd'37b18cca'7dd3a29f_u128}, - {Sign::NEG, -143, 0xefff1f01'193e7480'513f610d'21bcfc78_u128}, - {Sign::NEG, -143, 0xebff2671'0b6a38e1'ea190b95'c0690b7b_u128}, - {Sign::NEG, -143, 0xe7ff2dc0'fe0bfbfd'2a150f64'f0ad1743_u128}, - {Sign::NEG, -143, 0xe3ff34f0'f121bddd'090b5174'e995e9d1_u128}, - {Sign::NEG, -143, 0xdfff3c00'e4a97e8c'4ed512b9'b93ea2bf_u128}, - {Sign::NEG, -143, 0xdbff42f0'd8a13e15'934cea21'7ab794a2_u128}, - {Sign::NEG, -143, 0xd7ff49c0'cd06fc83'3e4ebe94'8afd2c76_u128}, - {Sign::NEG, -143, 0xd3ff5070'c1d8b9df'87b7c0f5'bcfee2e1_u128}, - {Sign::NEG, -143, 0xcfff5700'b7147634'77666622'8cb6371b_u128}, - {Sign::NEG, -143, 0xcbff5d70'acb8318b'e53a60f3'514db358_u128}, - {Sign::NEG, -143, 0xc7ff63c0'a2c1ebef'79149c3b'6e57fa86_u128}, - {Sign::NEG, -143, 0xc3ff69f0'992fa568'aad734c9'8416df2a_u128}, - {Sign::NEG, -143, 0xbfff7000'8fff5e00'c2657367'9ed28334_u128}, - {Sign::NEG, -143, 0xbbff75f0'872f15c0'd7a3c6db'6540809f_u128}, - {Sign::NEG, -143, 0xb7ff7bc0'7ebcccb1'd277bde6'45fb1aad_u128}, - {Sign::NEG, -143, 0xb3ff8170'76a682dc'6ac80145'a4087793_u128}, - {Sign::NEG, -143, 0xafff8700'6eea3849'287c4db3'0271e265_u128}, - {Sign::NEG, -143, 0xabff8c70'6785ed00'637d6de4'2eeb151e_u128}, - {Sign::NEG, -143, 0xa7ff91c0'6077a10a'43b5348b'6b898a8c_u128}, - {Sign::NEG, -143, 0xa3ff96f0'59bd546e'c10e7657'978bd7f6_u128}, - {Sign::NEG, -143, 0x9fff9c00'53550735'a37503f4'57310e59_u128}, - {Sign::NEG, -143, 0x9bffa0f0'4d3cb966'82d5a40a'3aa022ff_u128}, - {Sign::NEG, -143, 0x97ffa5c0'47726b08'c71e0d3e'e3df5f4d_u128}, - {Sign::NEG, -143, 0x93ffaa70'41f41c23'a83ce035'2bdbd79b_u128}, - {Sign::NEG, -143, 0x8fffaf00'3cbfccbe'2e21a18d'4680e8e4_u128}, - {Sign::NEG, -143, 0x8bffb370'37d37cdf'30bcb3e4'e5dfbd28_u128}, - {Sign::NEG, -143, 0x87ffb7c0'332d2c8d'57ff51d7'5c66d64a_u128}, - {Sign::NEG, -143, 0x83ffbbf0'2ecadbcf'1bdb87fd'be299f43_u128}, - {Sign::NEG, -144, 0xffff8000'55551555'88885dde'02700703_u128}, - {Sign::NEG, -144, 0xf7ff87e0'4d94724c'd259ca80'3a0c1870_u128}, - {Sign::NEG, -144, 0xefff8f80'464fce8f'e5141308'51c7070a_u128}, - {Sign::NEG, -144, 0xe7ff96e0'3f832a2a'30a16898'f3073a64_u128}, - {Sign::NEG, -144, 0xdfff9e00'392a8526'c4ed6451'7b2949ce_u128}, - {Sign::NEG, -144, 0xd7ffa4e0'3341df90'51e4fb4e'32cf6350_u128}, - {Sign::NEG, -144, 0xcfffab80'2dc53971'277672a8'8350bcce_u128}, - {Sign::NEG, -144, 0xc7ffb1e0'28b092d3'35915377'2a490f06_u128}, - {Sign::NEG, -144, 0xbfffb800'23ffebc0'0c265ece'6b481a0e_u128}, - {Sign::NEG, -144, 0xb7ffbde0'1faf4440'db2781c0'3fa132f6_u128}, - {Sign::NEG, -144, 0xafffc380'1bba9c5e'7287c95c'845ada33_u128}, - {Sign::NEG, -144, 0xa7ffc8e0'181df421'423b56b1'263e5a77_u128}, - {Sign::NEG, -144, 0x9fffce00'14d54b91'5a3752ca'4c076fa3_u128}, - {Sign::NEG, -144, 0x97ffd2e0'11dca2b6'6a71e2b2'7eb3f573_u128}, - {Sign::NEG, -144, 0x8fffd780'0f2ff997'c2e21b72'cff39d8f_u128}, - {Sign::NEG, -144, 0x87ffdbe0'0ccb503c'537ff612'feb7ac9e_u128}, - {Sign::NEG, -145, 0xffffc000'15554d55'58888733'33c57c18_u128}, - {Sign::NEG, -145, 0xefffc7c0'1193f9d1'fa514218'42311c42_u128}, - {Sign::NEG, -145, 0xdfffcf00'0e4aa5fa'2c4ed6de'475b942c_u128}, - {Sign::NEG, -145, 0xcfffd5c0'0b7151d8'ce77678c'bb6fcb88_u128}, - {Sign::NEG, -145, 0xbfffdc00'08fffd78'00c26629'a679ed3b_u128}, - {Sign::NEG, -145, 0xafffe1c0'06eea8e1'23287cb9'd3072728_u128}, - {Sign::NEG, -145, 0x9fffe700'0535541c'd5a37540'fd057315_u128}, - {Sign::NEG, -145, 0x8fffebc0'03cbff32'f82e21c1'fce36810_u128}, - {Sign::NEG, -146, 0xffffe000'05555455'5588887d'dde02702_u128}, - {Sign::NEG, -146, 0xdfffe780'0392aa14'9ac4ed72'adf5b295_u128}, - {Sign::NEG, -146, 0xbfffee00'023fffaf'000c2664'8066b482_u128}, - {Sign::NEG, -146, 0x9ffff380'014d552e'455a3754'b292c077_u128}, - {Sign::NEG, -147, 0xfffff000'01555535'55588888'33333c58_u128}, - {Sign::NEG, -147, 0xbffff700'008ffff5'e000c266'5736679f_u128}, - {Sign::NEG, -148, 0xfffff800'00555551'55558888'85ddde02_u128}, - {Sign::NEG, -149, 0xfffffc00'00155554'd5555888'88733334_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -148, 0x80000200'000aaaaa'eaaaac44'444eeeef_u128}, - {Sign::POS, -147, 0x80000400'002aaaac'aaaac444'459999ac_u128}, - {Sign::POS, -147, 0xc0000900'0090000a'2000c266'7596679f_u128}, - {Sign::POS, -146, 0x80000800'00aaaaba'aaac4444'6eeef381_u128}, - {Sign::POS, -146, 0xa0000c80'014d557c'655a3755'f81815cc_u128}, - {Sign::POS, -146, 0xc0001200'02400051'000c2668'4c66b482_u128}, - {Sign::POS, -146, 0xe0001880'0392ab40'bac4ed7c'40fb07eb_u128}, - {Sign::POS, -145, 0x80001000'02aaab2a'aac44449'999abe2c_u128}, - {Sign::POS, -145, 0x90001440'03cc00cd'082e21d7'9cbb6812_u128}, - {Sign::POS, -145, 0xa0001900'0535568d'd5a37569'adb01dc3_u128}, - {Sign::POS, -145, 0xb0001e40'06eeac74'33287d01'e8c9d1d9_u128}, - {Sign::POS, -145, 0xc0002400'09000288'00c266a3'2679ed48_u128}, - {Sign::POS, -145, 0xd0002a40'0b7158d1'de776851'22b2764b_u128}, - {Sign::POS, -145, 0xe0003100'0e4aaf5b'2c4ed810'a8063f03_u128}, - {Sign::POS, -145, 0xf0003840'1194062e'0a5143e7'be891c8f_u128}, - {Sign::POS, -144, 0x80002000'0aaaaeaa'ac4444ee'ef3813a1_u128}, - {Sign::POS, -144, 0x88002420'0ccb5a6e'5b7ff7fe'1339025b_u128}, - {Sign::POS, -144, 0x90002880'0f300668'42e21e26'caf39e33_u128}, - {Sign::POS, -144, 0x98002d20'11dcb29e'f271e66f'a5554bc6_u128}, - {Sign::POS, -144, 0xa0003200'14d55f19'5a3757e0'615cc676_u128}, - {Sign::POS, -144, 0xa8003720'181e0bde'ca3b5d82'10ca5cab_u128}, - {Sign::POS, -144, 0xb0003c80'1bbab8f6'f287d25f'3cb032bb_u128}, - {Sign::POS, -144, 0xb8004220'1faf6669'e3278d84'0be28cdb_u128}, - {Sign::POS, -144, 0xc0004800'24001440'0c266dfe'6b482076_u128}, - {Sign::POS, -144, 0xc8004e20'28b0c282'3d9166de'380a6d3d_u128}, - {Sign::POS, -144, 0xd0005480'2dc57139'a7768b35'6ba61e4b_u128}, - {Sign::POS, -144, 0xd8005b20'3342206f'd9e51a18'49db73c1_u128}, - {Sign::POS, -144, 0xe0006200'392ad02e'c4ed8a9d'907eb521_u128}, - {Sign::POS, -144, 0xe8006920'3f838080'b8a197de'a928acd7_u128}, - {Sign::POS, -144, 0xf0007080'46503170'65144cf7'dcc72d3b_u128}, - {Sign::POS, -144, 0xf8007820'4d94e308'da5a1108'890d9f6a_u128}, - {Sign::POS, -143, 0x80004000'2aaacaaa'c4445999'abe2ce2c_u128}, - {Sign::POS, -143, 0x84004410'2ecb2431'1fdbbb4f'3bffc832_u128}, - {Sign::POS, -143, 0x88004840'332d7e1d'97ff8f39'ec91b4ee_u128}, - {Sign::POS, -143, 0x8c004c90'37d3d876'74bcfcf0'b3f0a95d_u128}, - {Sign::POS, -143, 0x90005100'3cc03342'2e21f80c'a6813aff_u128}, - {Sign::POS, -143, 0x94005590'41f48e87'6c3d4629'170ce87f_u128}, - {Sign::POS, -143, 0x98005a40'4772ea4d'071e84e3'b80a8881_u128}, - {Sign::POS, -143, 0x9c005f10'4d3d469a'06d62fdc'bdd6bec3_u128}, - {Sign::POS, -143, 0xa0006400'5355a375'a375a6b7'01dc77c0_u128}, - {Sign::POS, -143, 0xa4006910'59be00e7'450f3318'26ad6b05_u128}, - {Sign::POS, -143, 0xa8006e40'60785ef6'83b60ea8'bd0aa459_u128}, - {Sign::POS, -143, 0xac007390'6786bdab'277e6914'69dd13f5_u128}, - {Sign::POS, -143, 0xb0007900'6eeb1d0d'287d6e0a'0d1e25eb_u128}, - {Sign::POS, -143, 0xb4007e90'76a77d24'aec94b3b'e9b060f5_u128}, - {Sign::POS, -143, 0xb8008440'7ebdddfa'1279365f'ce280cce_u128}, - {Sign::POS, -143, 0xbc008a10'87303f95'dba5732f'3e83e04a_u128}, - {Sign::POS, -143, 0xc0009000'9000a200'c2675967'9ed5b754_u128}, - {Sign::POS, -143, 0xc4009610'99310543'aed95aca'5edb5109_u128}, - {Sign::POS, -143, 0xc8009c40'a2c36967'b917091d'2687160f_u128}, - {Sign::POS, -143, 0xcc00a290'acb9ce76'293d1c2a'0378e75d_u128}, - {Sign::POS, -143, 0xd000a900'b7163478'776977bf'9766f5a7_u128}, - {Sign::POS, -143, 0xd400af90'c1da9b78'4bbb31b1'4776a18b_u128}, - {Sign::POS, -143, 0xd800b640'cd09037f'7e5297d7'6c8564ba_u128}, - {Sign::POS, -143, 0xdc00bd10'd8a36c98'1751360f'8461c447_u128}, - {Sign::POS, -143, 0xe000c400'e4abd6cc'4ed9dc3c'63f44c41_u128}, - {Sign::POS, -143, 0xe400cb10'f1244226'8d10a446'6a5894d5_u128}, - {Sign::POS, -143, 0xe800d240'fe0eaeb1'6a1af81b'b4e6510e_u128}, - {Sign::POS, -143, 0xec00d991'0b6d1c77'ae1f97b0'542a677a_u128}, - {Sign::POS, -143, 0xf000e101'19418b84'51469efe'81d014cc_u128}, - {Sign::POS, -143, 0xf400e891'278dfbe2'7bb98c06'd77a18b4_u128}, - {Sign::POS, -143, 0xf800f041'36546d9d'85a344d0'868bed17_u128}, - {Sign::POS, -143, 0xfc00f811'4596e0c0'f7301d69'90e307cc_u128}, - {Sign::POS, -142, 0x80008000'aaabaaac'4446eef3'8140138f_u128}, - {Sign::POS, -142, 0x82008408'b2cbe5b8'10f5e432'96105497_u128}, - {Sign::POS, -142, 0x84008820'bb2d2189'edbd4f83'ef63f730_u128}, - {Sign::POS, -142, 0x86008c48'c3d05e27'feb654fd'541c638e_u128}, - {Sign::POS, -142, 0x88009080'ccb69b98'7ffadeb8'882f7674_u128}, - {Sign::POS, -142, 0x8a0094c8'd5e0d9e1'c5a59fd3'6bd44397_u128}, + {Sign::NEG, -142, MType({0xe39d3faf42340ed7, 0x89ff6b38d5de2622})}, + {Sign::NEG, -142, MType({0x7ff3326682c02485, 0x87ff6f80ccb40f16})}, + {Sign::NEG, -142, MType({0x5caf4fbe343cf928, 0x85ff73b8c3cdf731})}, + {Sign::NEG, -142, MType({0xcdb6e554348f7fe8, 0x83ff77e0bb2ade79})}, + {Sign::NEG, -142, MType({0xef009c2457de25d, 0x81ff7bf8b2c9c4f6})}, + {Sign::NEG, -143, MType({0x8883333c57b57c74, 0xffff000155535558})}, + {Sign::NEG, -143, MType({0xf32668f39c70d183, 0xfbff07f145931f44})}, + {Sign::NEG, -143, MType({0x459a73c6a6486fe3, 0xf7ff0fc13650e7bd})}, + {Sign::NEG, -143, MType({0x37b18cca7dd3a29f, 0xf3ff1771278aaecd})}, + {Sign::NEG, -143, MType({0x513f610d21bcfc78, 0xefff1f01193e7480})}, + {Sign::NEG, -143, MType({0xea190b95c0690b7b, 0xebff26710b6a38e1})}, + {Sign::NEG, -143, MType({0x2a150f64f0ad1743, 0xe7ff2dc0fe0bfbfd})}, + {Sign::NEG, -143, MType({0x90b5174e995e9d1, 0xe3ff34f0f121bddd})}, + {Sign::NEG, -143, MType({0x4ed512b9b93ea2bf, 0xdfff3c00e4a97e8c})}, + {Sign::NEG, -143, MType({0x934cea217ab794a2, 0xdbff42f0d8a13e15})}, + {Sign::NEG, -143, MType({0x3e4ebe948afd2c76, 0xd7ff49c0cd06fc83})}, + {Sign::NEG, -143, MType({0x87b7c0f5bcfee2e1, 0xd3ff5070c1d8b9df})}, + {Sign::NEG, -143, MType({0x776666228cb6371b, 0xcfff5700b7147634})}, + {Sign::NEG, -143, MType({0xe53a60f3514db358, 0xcbff5d70acb8318b})}, + {Sign::NEG, -143, MType({0x79149c3b6e57fa86, 0xc7ff63c0a2c1ebef})}, + {Sign::NEG, -143, MType({0xaad734c98416df2a, 0xc3ff69f0992fa568})}, + {Sign::NEG, -143, MType({0xc26573679ed28334, 0xbfff70008fff5e00})}, + {Sign::NEG, -143, MType({0xd7a3c6db6540809f, 0xbbff75f0872f15c0})}, + {Sign::NEG, -143, MType({0xd277bde645fb1aad, 0xb7ff7bc07ebcccb1})}, + {Sign::NEG, -143, MType({0x6ac80145a4087793, 0xb3ff817076a682dc})}, + {Sign::NEG, -143, MType({0x287c4db30271e265, 0xafff87006eea3849})}, + {Sign::NEG, -143, MType({0x637d6de42eeb151e, 0xabff8c706785ed00})}, + {Sign::NEG, -143, MType({0x43b5348b6b898a8c, 0xa7ff91c06077a10a})}, + {Sign::NEG, -143, MType({0xc10e7657978bd7f6, 0xa3ff96f059bd546e})}, + {Sign::NEG, -143, MType({0xa37503f457310e59, 0x9fff9c0053550735})}, + {Sign::NEG, -143, MType({0x82d5a40a3aa022ff, 0x9bffa0f04d3cb966})}, + {Sign::NEG, -143, MType({0xc71e0d3ee3df5f4d, 0x97ffa5c047726b08})}, + {Sign::NEG, -143, MType({0xa83ce0352bdbd79b, 0x93ffaa7041f41c23})}, + {Sign::NEG, -143, MType({0x2e21a18d4680e8e4, 0x8fffaf003cbfccbe})}, + {Sign::NEG, -143, MType({0x30bcb3e4e5dfbd28, 0x8bffb37037d37cdf})}, + {Sign::NEG, -143, MType({0x57ff51d75c66d64a, 0x87ffb7c0332d2c8d})}, + {Sign::NEG, -143, MType({0x1bdb87fdbe299f43, 0x83ffbbf02ecadbcf})}, + {Sign::NEG, -144, MType({0x88885dde02700703, 0xffff800055551555})}, + {Sign::NEG, -144, MType({0xd259ca803a0c1870, 0xf7ff87e04d94724c})}, + {Sign::NEG, -144, MType({0xe514130851c7070a, 0xefff8f80464fce8f})}, + {Sign::NEG, -144, MType({0x30a16898f3073a64, 0xe7ff96e03f832a2a})}, + {Sign::NEG, -144, MType({0xc4ed64517b2949ce, 0xdfff9e00392a8526})}, + {Sign::NEG, -144, MType({0x51e4fb4e32cf6350, 0xd7ffa4e03341df90})}, + {Sign::NEG, -144, MType({0x277672a88350bcce, 0xcfffab802dc53971})}, + {Sign::NEG, -144, MType({0x359153772a490f06, 0xc7ffb1e028b092d3})}, + {Sign::NEG, -144, MType({0xc265ece6b481a0e, 0xbfffb80023ffebc0})}, + {Sign::NEG, -144, MType({0xdb2781c03fa132f6, 0xb7ffbde01faf4440})}, + {Sign::NEG, -144, MType({0x7287c95c845ada33, 0xafffc3801bba9c5e})}, + {Sign::NEG, -144, MType({0x423b56b1263e5a77, 0xa7ffc8e0181df421})}, + {Sign::NEG, -144, MType({0x5a3752ca4c076fa3, 0x9fffce0014d54b91})}, + {Sign::NEG, -144, MType({0x6a71e2b27eb3f573, 0x97ffd2e011dca2b6})}, + {Sign::NEG, -144, MType({0xc2e21b72cff39d8f, 0x8fffd7800f2ff997})}, + {Sign::NEG, -144, MType({0x537ff612feb7ac9e, 0x87ffdbe00ccb503c})}, + {Sign::NEG, -145, MType({0x5888873333c57c18, 0xffffc00015554d55})}, + {Sign::NEG, -145, MType({0xfa51421842311c42, 0xefffc7c01193f9d1})}, + {Sign::NEG, -145, MType({0x2c4ed6de475b942c, 0xdfffcf000e4aa5fa})}, + {Sign::NEG, -145, MType({0xce77678cbb6fcb88, 0xcfffd5c00b7151d8})}, + {Sign::NEG, -145, MType({0xc26629a679ed3b, 0xbfffdc0008fffd78})}, + {Sign::NEG, -145, MType({0x23287cb9d3072728, 0xafffe1c006eea8e1})}, + {Sign::NEG, -145, MType({0xd5a37540fd057315, 0x9fffe7000535541c})}, + {Sign::NEG, -145, MType({0xf82e21c1fce36810, 0x8fffebc003cbff32})}, + {Sign::NEG, -146, MType({0x5588887ddde02702, 0xffffe00005555455})}, + {Sign::NEG, -146, MType({0x9ac4ed72adf5b295, 0xdfffe7800392aa14})}, + {Sign::NEG, -146, MType({0xc26648066b482, 0xbfffee00023fffaf})}, + {Sign::NEG, -146, MType({0x455a3754b292c077, 0x9ffff380014d552e})}, + {Sign::NEG, -147, MType({0x5558888833333c58, 0xfffff00001555535})}, + {Sign::NEG, -147, MType({0xe000c2665736679f, 0xbffff700008ffff5})}, + {Sign::NEG, -148, MType({0x5555888885ddde02, 0xfffff80000555551})}, + {Sign::NEG, -149, MType({0xd555588888733334, 0xfffffc0000155554})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -148, MType({0xeaaaac44444eeeef, 0x80000200000aaaaa})}, + {Sign::POS, -147, MType({0xaaaac444459999ac, 0x80000400002aaaac})}, + {Sign::POS, -147, MType({0x2000c2667596679f, 0xc00009000090000a})}, + {Sign::POS, -146, MType({0xaaac44446eeef381, 0x8000080000aaaaba})}, + {Sign::POS, -146, MType({0x655a3755f81815cc, 0xa0000c80014d557c})}, + {Sign::POS, -146, MType({0xc26684c66b482, 0xc000120002400051})}, + {Sign::POS, -146, MType({0xbac4ed7c40fb07eb, 0xe00018800392ab40})}, + {Sign::POS, -145, MType({0xaac44449999abe2c, 0x8000100002aaab2a})}, + {Sign::POS, -145, MType({0x82e21d79cbb6812, 0x9000144003cc00cd})}, + {Sign::POS, -145, MType({0xd5a37569adb01dc3, 0xa00019000535568d})}, + {Sign::POS, -145, MType({0x33287d01e8c9d1d9, 0xb0001e4006eeac74})}, + {Sign::POS, -145, MType({0xc266a32679ed48, 0xc000240009000288})}, + {Sign::POS, -145, MType({0xde77685122b2764b, 0xd0002a400b7158d1})}, + {Sign::POS, -145, MType({0x2c4ed810a8063f03, 0xe00031000e4aaf5b})}, + {Sign::POS, -145, MType({0xa5143e7be891c8f, 0xf00038401194062e})}, + {Sign::POS, -144, MType({0xac4444eeef3813a1, 0x800020000aaaaeaa})}, + {Sign::POS, -144, MType({0x5b7ff7fe1339025b, 0x880024200ccb5a6e})}, + {Sign::POS, -144, MType({0x42e21e26caf39e33, 0x900028800f300668})}, + {Sign::POS, -144, MType({0xf271e66fa5554bc6, 0x98002d2011dcb29e})}, + {Sign::POS, -144, MType({0x5a3757e0615cc676, 0xa000320014d55f19})}, + {Sign::POS, -144, MType({0xca3b5d8210ca5cab, 0xa8003720181e0bde})}, + {Sign::POS, -144, MType({0xf287d25f3cb032bb, 0xb0003c801bbab8f6})}, + {Sign::POS, -144, MType({0xe3278d840be28cdb, 0xb80042201faf6669})}, + {Sign::POS, -144, MType({0xc266dfe6b482076, 0xc000480024001440})}, + {Sign::POS, -144, MType({0x3d9166de380a6d3d, 0xc8004e2028b0c282})}, + {Sign::POS, -144, MType({0xa7768b356ba61e4b, 0xd00054802dc57139})}, + {Sign::POS, -144, MType({0xd9e51a1849db73c1, 0xd8005b203342206f})}, + {Sign::POS, -144, MType({0xc4ed8a9d907eb521, 0xe0006200392ad02e})}, + {Sign::POS, -144, MType({0xb8a197dea928acd7, 0xe80069203f838080})}, + {Sign::POS, -144, MType({0x65144cf7dcc72d3b, 0xf000708046503170})}, + {Sign::POS, -144, MType({0xda5a1108890d9f6a, 0xf80078204d94e308})}, + {Sign::POS, -143, MType({0xc4445999abe2ce2c, 0x800040002aaacaaa})}, + {Sign::POS, -143, MType({0x1fdbbb4f3bffc832, 0x840044102ecb2431})}, + {Sign::POS, -143, MType({0x97ff8f39ec91b4ee, 0x88004840332d7e1d})}, + {Sign::POS, -143, MType({0x74bcfcf0b3f0a95d, 0x8c004c9037d3d876})}, + {Sign::POS, -143, MType({0x2e21f80ca6813aff, 0x900051003cc03342})}, + {Sign::POS, -143, MType({0x6c3d4629170ce87f, 0x9400559041f48e87})}, + {Sign::POS, -143, MType({0x71e84e3b80a8881, 0x98005a404772ea4d})}, + {Sign::POS, -143, MType({0x6d62fdcbdd6bec3, 0x9c005f104d3d469a})}, + {Sign::POS, -143, MType({0xa375a6b701dc77c0, 0xa00064005355a375})}, + {Sign::POS, -143, MType({0x450f331826ad6b05, 0xa400691059be00e7})}, + {Sign::POS, -143, MType({0x83b60ea8bd0aa459, 0xa8006e4060785ef6})}, + {Sign::POS, -143, MType({0x277e691469dd13f5, 0xac0073906786bdab})}, + {Sign::POS, -143, MType({0x287d6e0a0d1e25eb, 0xb00079006eeb1d0d})}, + {Sign::POS, -143, MType({0xaec94b3be9b060f5, 0xb4007e9076a77d24})}, + {Sign::POS, -143, MType({0x1279365fce280cce, 0xb80084407ebdddfa})}, + {Sign::POS, -143, MType({0xdba5732f3e83e04a, 0xbc008a1087303f95})}, + {Sign::POS, -143, MType({0xc26759679ed5b754, 0xc00090009000a200})}, + {Sign::POS, -143, MType({0xaed95aca5edb5109, 0xc400961099310543})}, + {Sign::POS, -143, MType({0xb917091d2687160f, 0xc8009c40a2c36967})}, + {Sign::POS, -143, MType({0x293d1c2a0378e75d, 0xcc00a290acb9ce76})}, + {Sign::POS, -143, MType({0x776977bf9766f5a7, 0xd000a900b7163478})}, + {Sign::POS, -143, MType({0x4bbb31b14776a18b, 0xd400af90c1da9b78})}, + {Sign::POS, -143, MType({0x7e5297d76c8564ba, 0xd800b640cd09037f})}, + {Sign::POS, -143, MType({0x1751360f8461c447, 0xdc00bd10d8a36c98})}, + {Sign::POS, -143, MType({0x4ed9dc3c63f44c41, 0xe000c400e4abd6cc})}, + {Sign::POS, -143, MType({0x8d10a4466a5894d5, 0xe400cb10f1244226})}, + {Sign::POS, -143, MType({0x6a1af81bb4e6510e, 0xe800d240fe0eaeb1})}, + {Sign::POS, -143, MType({0xae1f97b0542a677a, 0xec00d9910b6d1c77})}, + {Sign::POS, -143, MType({0x51469efe81d014cc, 0xf000e10119418b84})}, + {Sign::POS, -143, MType({0x7bb98c06d77a18b4, 0xf400e891278dfbe2})}, + {Sign::POS, -143, MType({0x85a344d0868bed17, 0xf800f04136546d9d})}, + {Sign::POS, -143, MType({0xf7301d6990e307cc, 0xfc00f8114596e0c0})}, + {Sign::POS, -142, MType({0x4446eef38140138f, 0x80008000aaabaaac})}, + {Sign::POS, -142, MType({0x10f5e43296105497, 0x82008408b2cbe5b8})}, + {Sign::POS, -142, MType({0xedbd4f83ef63f730, 0x84008820bb2d2189})}, + {Sign::POS, -142, MType({0xfeb654fd541c638e, 0x86008c48c3d05e27})}, + {Sign::POS, -142, MType({0x7ffadeb8882f7674, 0x88009080ccb69b98})}, + {Sign::POS, -142, MType({0xc5a59fd36bd44397, 0x8a0094c8d5e0d9e1})}, }; // Minimax polynomial generated by Sollya with: @@ -814,10 +810,10 @@ const Float128 LOG_R3[139] = { // > dirtyinfnorm(log(1 + x)/x - 1 - x*P, [-0x1.01928p-22 , 0x1p-22]); // 0x1.ce1e...p-116 const Float128 BIG_COEFFS[4]{ - {Sign::POS, -130, 0xccccccd7'4818e397'7ed78465'd460315b_u128}, - {Sign::NEG, -129, 0x80000000'000478b0'c6388a23'871ce156_u128}, - {Sign::POS, -129, 0xaaaaaaaa'aaaaaaaa'aa807bd8'67763262_u128}, - {Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128}, + {Sign::POS, -130, MType({0x7ed78465d460315b, 0xccccccd74818e397})}, + {Sign::NEG, -129, MType({0xc6388a23871ce156, 0x80000000000478b0})}, + {Sign::POS, -129, MType({0xaa807bd867763262, 0xaaaaaaaaaaaaaaaa})}, + {Sign::NEG, -128, MType({0x0, 0x8000000000000000})}, }; LIBC_INLINE double log1p_accurate(int e_x, int index, diff --git a/libc/src/math/generic/log2.cpp b/libc/src/math/generic/log2.cpp index 9657b344ffbd7c..ab392166475c70 100644 --- a/libc/src/math/generic/log2.cpp +++ b/libc/src/math/generic/log2.cpp @@ -14,7 +14,6 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" -#include "src/__support/integer_literals.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "common_constants.h" @@ -24,8 +23,8 @@ namespace LIBC_NAMESPACE { // 128-bit precision dyadic floating point numbers. using Float128 = typename fputil::DyadicFloat<128>; +using MType = typename Float128::MantissaType; using Sign = fputil::Sign; -using LIBC_NAMESPACE::operator""_u128; namespace { @@ -168,144 +167,142 @@ const fputil::DoubleDouble LOG_R1[128] = { const LogRR LOG2_TABLE = { // -log2(r) with 128-bit precision generated by SageMath with: - // def format_hex(value): - // l = hex(value)[2:] - // n = 8 - // x = [l[i:i + n] for i in range(0, len(l), n)] - // return "0x" + "'".join(x) + "_uint128" + // // for i in range(1, 127): // r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^(-7)) ); // s, m, e = RealField(128)(r).log2().sign_mantissa_exponent(); - // print("{Sign::POS,", e, ", format_hex(m), "},"); + // print("{Sign::POS,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) + // % 2^64), + // "})},"); /* .step_1 = */ { - {Sign::POS, 0, 0_u128}, - {Sign::POS, -134, 0xb963dd10'7b993ada'e8c25163'0adb856a_u128}, - {Sign::POS, -133, 0xba1f7430'f9aab1b2'a41b08fb'e05f82d0_u128}, - {Sign::POS, -132, 0x8c25c726'2b57c149'1f06c085'bc1b865d_u128}, - {Sign::POS, -132, 0xbb9ca64e'cac6aaef'2e1c07f0'438ebac0_u128}, - {Sign::POS, -132, 0xeb75e8f8'ff5ff022'aacc0e21'd6541224_u128}, - {Sign::POS, -131, 0x8dd99530'02a4e866'31514aef'39ce6303_u128}, - {Sign::POS, -131, 0xa62b07f3'457c4070'50799bea'aab2940c_u128}, - {Sign::POS, -131, 0xbeb024b6'7dda6339'da288fc6'15a727dc_u128}, - {Sign::POS, -131, 0xcb0657cd'5dbe4f6f'22dbbace'd44516ce_u128}, - {Sign::POS, -131, 0xe3da945b'878e27d0'd939dcee'cdd9ce05_u128}, - {Sign::POS, -131, 0xfce4aee0'e88b2749'9596a8e2'e84c8f45_u128}, - {Sign::POS, -130, 0x84bf1c67'3032495d'243efd93'25954cfe_u128}, - {Sign::POS, -130, 0x916d6e15'59a4b696'91d79938'e7226384_u128}, - {Sign::POS, -130, 0x9e37db28'66f2850b'22563c9e'd9462091_u128}, - {Sign::POS, -130, 0xa4a7c31d'c6f9a5d5'3a53ca11'81015ada_u128}, - {Sign::POS, -130, 0xb19d45fa'1be70855'3eb8023e'ed65d601_u128}, - {Sign::POS, -130, 0xb823018e'3cfc25f0'ce5cabbd'2d753d9b_u128}, - {Sign::POS, -130, 0xc544c055'fde99333'54dbf16f'b0695ee3_u128}, - {Sign::POS, -130, 0xcbe0e589'e3f6042d'5196a85a'067c6739_u128}, - {Sign::POS, -130, 0xd930124b'ea9a2c66'f349845e'48955078_u128}, - {Sign::POS, -130, 0xdfe33d3f'ffa66037'815ef705'cfaef035_u128}, - {Sign::POS, -130, 0xed61169f'220e97f2'2ba704dc'aa76f41d_u128}, - {Sign::POS, -130, 0xf42be9e9'b09b3def'2062f36b'c14d0d93_u128}, - {Sign::POS, -129, 0x80ecdde7'd30ea2ed'13288019'4144b02b_u128}, - {Sign::POS, -129, 0x845e706c'afd1bf61'54880de6'3812fd49_u128}, - {Sign::POS, -129, 0x8b4e029b'1f8ac391'a87c02ea'f36e2c29_u128}, - {Sign::POS, -129, 0x8ecc164e'a93841ae'9804237e'c8d9431d_u128}, - {Sign::POS, -129, 0x924e6958'9e6b6268'20f81ca9'5d9e7968_u128}, - {Sign::POS, -129, 0x995ff71b'8773432d'124bc6f1'acf95dc4_u128}, - {Sign::POS, -129, 0x9cef470a'acfb7bf9'5a5e8e21'bff3336b_u128}, - {Sign::POS, -129, 0xa08300be'1f651473'4e53fa33'29f65894_u128}, - {Sign::POS, -129, 0xa7b7dd96'762cc3c7'2742d729'6a39eed6_u128}, - {Sign::POS, -129, 0xab591735'abc724e4'f359c554'4bc5e134_u128}, - {Sign::POS, -129, 0xaefee78f'75707221'6b6c874d'd96e1d75_u128}, - {Sign::POS, -129, 0xb2a95a4c'c313bb59'21006678'c0a5c390_u128}, - {Sign::POS, -129, 0xb6587b43'2e47501b'6d40900b'25024b32_u128}, - {Sign::POS, -129, 0xbdc4f816'7955698f'89e2eb55'3b279b3d_u128}, - {Sign::POS, -129, 0xc1826c86'08fe9951'd58525aa'd392ca50_u128}, - {Sign::POS, -129, 0xc544c055'fde99333'54dbf16f'b0695ee3_u128}, - {Sign::POS, -129, 0xc90c0049'26e9dbfb'88d5eae3'326327bb_u128}, - {Sign::POS, -129, 0xccd83954'b6359379'46dfa05b'ddfded8c_u128}, - {Sign::POS, -129, 0xd47fcb8c'0852f0c0'bfe9dbeb'f2e8a45e_u128}, - {Sign::POS, -129, 0xd85b3fa7'a3407fa8'7b11f1c5'160c515c_u128}, - {Sign::POS, -129, 0xdc3be2bd'8d837f7f'1339e567'7ec44dd0_u128}, - {Sign::POS, -129, 0xe021c2cf'17ed9bdb'ea2b8c7b'b0ee9c8b_u128}, - {Sign::POS, -129, 0xe40cee16'a2ff21c4'aec56233'2791fe38_u128}, - {Sign::POS, -129, 0xe7fd7308'd6895b14'71682eba'cca79cfa_u128}, - {Sign::POS, -129, 0xebf36055'e1abc61e'a5ad5ce9'fb5a7bb6_u128}, - {Sign::POS, -129, 0xefeec4ea'c371584e'32251905'31a852c5_u128}, - {Sign::POS, -129, 0xf3efaff2'9c559a77'da8ad649'da21eab0_u128}, - {Sign::POS, -129, 0xf7f630d8'08fc2ada'4c3e2ea7'c15c3d1e_u128}, - {Sign::POS, -129, 0xfc025746'86680cc6'bcb9bfa9'852e0d35_u128}, - {Sign::POS, -128, 0x800a1995'f0019518'ce032f41'd1e774e8_u128}, - {Sign::POS, -128, 0x8215ea5c'd3e4c4c7'9b39ffee'bc29372a_u128}, - {Sign::POS, -128, 0x8424a633'5c777e0b'87f95f1b'efb6f806_u128}, - {Sign::POS, -128, 0x86365578'62acb7ce'b987b42e'3bb332a1_u128}, - {Sign::POS, -128, 0x884b00ae'f726cec5'139a7ba8'3bf2d136_u128}, - {Sign::POS, -128, 0x8a62b07f'3457c407'050799be'aaab2941_u128}, - {Sign::POS, -128, 0x8c7d6db7'169e0cda'8bd74461'7e9b7d52_u128}, - {Sign::POS, -128, 0x8e9b414b'5a92a606'046ad444'333ceb10_u128}, - {Sign::POS, -128, 0x90bc3458'61bf3d52'ef4c737f'ba4f5d66_u128}, - {Sign::POS, -128, 0x92e05023'1df57d6f'ae441c09'd761c549_u128}, - {Sign::POS, -128, 0x95079e1a'0382dc79'6e36aa9c'e90a3879_u128}, - {Sign::POS, -128, 0x973227d6'027ebd8a'0efca1a1'84e93809_u128}, - {Sign::POS, -128, 0x973227d6'027ebd8a'0efca1a1'84e93809_u128}, - {Sign::POS, -128, 0x995ff71b'8773432d'124bc6f1'acf95dc4_u128}, - {Sign::POS, -128, 0x9b9115db'83a3dd2d'352bea51'e58ea9e8_u128}, - {Sign::POS, -128, 0x9dc58e34'7d37696d'266d6cdc'959153bc_u128}, - {Sign::POS, -128, 0x9ffd6a73'a78eaf35'4527d82c'8214ddca_u128}, - {Sign::POS, -128, 0xa238b516'0413106e'404cabb7'6d600e3c_u128}, - {Sign::POS, -128, 0xa238b516'0413106e'404cabb7'6d600e3c_u128}, - {Sign::POS, -128, 0xa47778c9'8bcc86a1'cab7d2ec'23f0eef3_u128}, - {Sign::POS, -128, 0xa6b9c06e'6211646b'761c48dd'859de2d3_u128}, - {Sign::POS, -128, 0xa8ff9718'10a5e181'7fd3b7d7'e5d148bb_u128}, - {Sign::POS, -128, 0xab49080e'cda53208'c27c6780'd92b4d11_u128}, - {Sign::POS, -128, 0xad961ed0'cb91d406'db502402'c94092cd_u128}, - {Sign::POS, -128, 0xad961ed0'cb91d406'db502402'c94092cd_u128}, - {Sign::POS, -128, 0xafe6e713'93eeda29'3432ef6b'732b6843_u128}, - {Sign::POS, -128, 0xb23b6cc5'6cc84c99'bb324da7'e046e792_u128}, - {Sign::POS, -128, 0xb493bc0e'c9954243'b21709ce'430c8e24_u128}, - {Sign::POS, -128, 0xb493bc0e'c9954243'b21709ce'430c8e24_u128}, - {Sign::POS, -128, 0xb6efe153'c7e319f6'e91ad16e'cff10111_u128}, - {Sign::POS, -128, 0xb94fe935'b83e3eb5'ce31e481'cd797e79_u128}, - {Sign::POS, -128, 0xbbb3e094'b3d228d3'da3e961a'96c580fa_u128}, - {Sign::POS, -128, 0xbbb3e094'b3d228d3'da3e961a'96c580fa_u128}, - {Sign::POS, -128, 0xbe1bd491'3f3fda43'f396598a'ae91499a_u128}, - {Sign::POS, -128, 0xc087d28d'fb2febb8'ae4cceb0'f621941b_u128}, - {Sign::POS, -128, 0xc087d28d'fb2febb8'ae4cceb0'f621941b_u128}, - {Sign::POS, -128, 0xc2f7e831'632b6670'6c1855c4'2078f81b_u128}, - {Sign::POS, -128, 0xc56c2367'9b4d206e'169535fb'8bf577c8_u128}, - {Sign::POS, -128, 0xc56c2367'9b4d206e'169535fb'8bf577c8_u128}, - {Sign::POS, -128, 0xc7e49264'4d64237e'3b24cecc'60217942_u128}, - {Sign::POS, -128, 0xca6143a4'9626d820'3dc2687f'cf939696_u128}, - {Sign::POS, -128, 0xca6143a4'9626d820'3dc2687f'cf939696_u128}, - {Sign::POS, -128, 0xcce245f1'031e41fa'0a62e6ad'd1a901a0_u128}, - {Sign::POS, -128, 0xcf67a85f'a1f89a04'5bb6e231'38ad51e1_u128}, - {Sign::POS, -128, 0xcf67a85f'a1f89a04'5bb6e231'38ad51e1_u128}, - {Sign::POS, -128, 0xd1f17a56'21fb01ac'7fc60a51'03092bae_u128}, - {Sign::POS, -128, 0xd47fcb8c'0852f0c0'bfe9dbeb'f2e8a45e_u128}, - {Sign::POS, -128, 0xd47fcb8c'0852f0c0'bfe9dbeb'f2e8a45e_u128}, - {Sign::POS, -128, 0xd712ac0c'f811659d'8e2d7d37'8127d823_u128}, - {Sign::POS, -128, 0xd9aa2c3b'0ea3cbc1'5c1a7f14'b168b365_u128}, - {Sign::POS, -128, 0xd9aa2c3b'0ea3cbc1'5c1a7f14'b168b365_u128}, - {Sign::POS, -128, 0xdc465cd1'55a90942'b7579f0f'8d3d514b_u128}, - {Sign::POS, -128, 0xdc465cd1'55a90942'b7579f0f'8d3d514b_u128}, - {Sign::POS, -128, 0xdee74ee6'4b0c38d3'b087205e'b55aea85_u128}, - {Sign::POS, -128, 0xe18d13ee'805a4de3'424a2623'd60dfb16_u128}, - {Sign::POS, -128, 0xe18d13ee'805a4de3'424a2623'd60dfb16_u128}, - {Sign::POS, -128, 0xe437bdbf'5254459c'4d3a591a'e6854787_u128}, - {Sign::POS, -128, 0xe437bdbf'5254459c'4d3a591a'e6854787_u128}, - {Sign::POS, -128, 0xe6e75e91'b9cca551'8dcdb6b2'4c5c5cdf_u128}, - {Sign::POS, -128, 0xe99c0905'36ece983'33ac7d9e'bba8a53c_u128}, - {Sign::POS, -128, 0xe99c0905'36ece983'33ac7d9e'bba8a53c_u128}, - {Sign::POS, -128, 0xec55d022'd80e3d27'fb2eede4'b59d8959_u128}, - {Sign::POS, -128, 0xec55d022'd80e3d27'fb2eede4'b59d8959_u128}, - {Sign::POS, -128, 0xef14c760'5d60654c'308b4546'66de8f99_u128}, - {Sign::POS, -128, 0xef14c760'5d60654c'308b4546'66de8f99_u128}, - {Sign::POS, -128, 0xf1d902a3'7aaa5085'8383cb0c'e23bebd4_u128}, - {Sign::POS, -128, 0xf1d902a3'7aaa5085'8383cb0c'e23bebd4_u128}, - {Sign::POS, -128, 0xf4a29645'38813c67'64fc87b4'a41f7b70_u128}, - {Sign::POS, -128, 0xf4a29645'38813c67'64fc87b4'a41f7b70_u128}, - {Sign::POS, -128, 0xf7719715'7665f689'3f5d7d82'b65c5686_u128}, - {Sign::POS, -128, 0xf7719715'7665f689'3f5d7d82'b65c5686_u128}, - {Sign::POS, -128, 0xfa461a5e'8f4b759d'6476077b'9fbd41ae_u128}, - {Sign::POS, -128, 0xfa461a5e'8f4b759d'6476077b'9fbd41ae_u128}, - {Sign::POS, -128, 0xfd2035e9'221ef5d0'0e3909ff'd0d61778_u128}, - {Sign::POS, 0, 0_u128}, + {Sign::POS, 0, MType(0)}, + {Sign::POS, -134, MType({0xe8c251630adb856a, 0xb963dd107b993ada})}, + {Sign::POS, -133, MType({0xa41b08fbe05f82d0, 0xba1f7430f9aab1b2})}, + {Sign::POS, -132, MType({0x1f06c085bc1b865d, 0x8c25c7262b57c149})}, + {Sign::POS, -132, MType({0x2e1c07f0438ebac0, 0xbb9ca64ecac6aaef})}, + {Sign::POS, -132, MType({0xaacc0e21d6541224, 0xeb75e8f8ff5ff022})}, + {Sign::POS, -131, MType({0x31514aef39ce6303, 0x8dd9953002a4e866})}, + {Sign::POS, -131, MType({0x50799beaaab2940c, 0xa62b07f3457c4070})}, + {Sign::POS, -131, MType({0xda288fc615a727dc, 0xbeb024b67dda6339})}, + {Sign::POS, -131, MType({0x22dbbaced44516ce, 0xcb0657cd5dbe4f6f})}, + {Sign::POS, -131, MType({0xd939dceecdd9ce05, 0xe3da945b878e27d0})}, + {Sign::POS, -131, MType({0x9596a8e2e84c8f45, 0xfce4aee0e88b2749})}, + {Sign::POS, -130, MType({0x243efd9325954cfe, 0x84bf1c673032495d})}, + {Sign::POS, -130, MType({0x91d79938e7226384, 0x916d6e1559a4b696})}, + {Sign::POS, -130, MType({0x22563c9ed9462091, 0x9e37db2866f2850b})}, + {Sign::POS, -130, MType({0x3a53ca1181015ada, 0xa4a7c31dc6f9a5d5})}, + {Sign::POS, -130, MType({0x3eb8023eed65d601, 0xb19d45fa1be70855})}, + {Sign::POS, -130, MType({0xce5cabbd2d753d9b, 0xb823018e3cfc25f0})}, + {Sign::POS, -130, MType({0x54dbf16fb0695ee3, 0xc544c055fde99333})}, + {Sign::POS, -130, MType({0x5196a85a067c6739, 0xcbe0e589e3f6042d})}, + {Sign::POS, -130, MType({0xf349845e48955078, 0xd930124bea9a2c66})}, + {Sign::POS, -130, MType({0x815ef705cfaef035, 0xdfe33d3fffa66037})}, + {Sign::POS, -130, MType({0x2ba704dcaa76f41d, 0xed61169f220e97f2})}, + {Sign::POS, -130, MType({0x2062f36bc14d0d93, 0xf42be9e9b09b3def})}, + {Sign::POS, -129, MType({0x132880194144b02b, 0x80ecdde7d30ea2ed})}, + {Sign::POS, -129, MType({0x54880de63812fd49, 0x845e706cafd1bf61})}, + {Sign::POS, -129, MType({0xa87c02eaf36e2c29, 0x8b4e029b1f8ac391})}, + {Sign::POS, -129, MType({0x9804237ec8d9431d, 0x8ecc164ea93841ae})}, + {Sign::POS, -129, MType({0x20f81ca95d9e7968, 0x924e69589e6b6268})}, + {Sign::POS, -129, MType({0x124bc6f1acf95dc4, 0x995ff71b8773432d})}, + {Sign::POS, -129, MType({0x5a5e8e21bff3336b, 0x9cef470aacfb7bf9})}, + {Sign::POS, -129, MType({0x4e53fa3329f65894, 0xa08300be1f651473})}, + {Sign::POS, -129, MType({0x2742d7296a39eed6, 0xa7b7dd96762cc3c7})}, + {Sign::POS, -129, MType({0xf359c5544bc5e134, 0xab591735abc724e4})}, + {Sign::POS, -129, MType({0x6b6c874dd96e1d75, 0xaefee78f75707221})}, + {Sign::POS, -129, MType({0x21006678c0a5c390, 0xb2a95a4cc313bb59})}, + {Sign::POS, -129, MType({0x6d40900b25024b32, 0xb6587b432e47501b})}, + {Sign::POS, -129, MType({0x89e2eb553b279b3d, 0xbdc4f8167955698f})}, + {Sign::POS, -129, MType({0xd58525aad392ca50, 0xc1826c8608fe9951})}, + {Sign::POS, -129, MType({0x54dbf16fb0695ee3, 0xc544c055fde99333})}, + {Sign::POS, -129, MType({0x88d5eae3326327bb, 0xc90c004926e9dbfb})}, + {Sign::POS, -129, MType({0x46dfa05bddfded8c, 0xccd83954b6359379})}, + {Sign::POS, -129, MType({0xbfe9dbebf2e8a45e, 0xd47fcb8c0852f0c0})}, + {Sign::POS, -129, MType({0x7b11f1c5160c515c, 0xd85b3fa7a3407fa8})}, + {Sign::POS, -129, MType({0x1339e5677ec44dd0, 0xdc3be2bd8d837f7f})}, + {Sign::POS, -129, MType({0xea2b8c7bb0ee9c8b, 0xe021c2cf17ed9bdb})}, + {Sign::POS, -129, MType({0xaec562332791fe38, 0xe40cee16a2ff21c4})}, + {Sign::POS, -129, MType({0x71682ebacca79cfa, 0xe7fd7308d6895b14})}, + {Sign::POS, -129, MType({0xa5ad5ce9fb5a7bb6, 0xebf36055e1abc61e})}, + {Sign::POS, -129, MType({0x3225190531a852c5, 0xefeec4eac371584e})}, + {Sign::POS, -129, MType({0xda8ad649da21eab0, 0xf3efaff29c559a77})}, + {Sign::POS, -129, MType({0x4c3e2ea7c15c3d1e, 0xf7f630d808fc2ada})}, + {Sign::POS, -129, MType({0xbcb9bfa9852e0d35, 0xfc02574686680cc6})}, + {Sign::POS, -128, MType({0xce032f41d1e774e8, 0x800a1995f0019518})}, + {Sign::POS, -128, MType({0x9b39ffeebc29372a, 0x8215ea5cd3e4c4c7})}, + {Sign::POS, -128, MType({0x87f95f1befb6f806, 0x8424a6335c777e0b})}, + {Sign::POS, -128, MType({0xb987b42e3bb332a1, 0x8636557862acb7ce})}, + {Sign::POS, -128, MType({0x139a7ba83bf2d136, 0x884b00aef726cec5})}, + {Sign::POS, -128, MType({0x50799beaaab2941, 0x8a62b07f3457c407})}, + {Sign::POS, -128, MType({0x8bd744617e9b7d52, 0x8c7d6db7169e0cda})}, + {Sign::POS, -128, MType({0x46ad444333ceb10, 0x8e9b414b5a92a606})}, + {Sign::POS, -128, MType({0xef4c737fba4f5d66, 0x90bc345861bf3d52})}, + {Sign::POS, -128, MType({0xae441c09d761c549, 0x92e050231df57d6f})}, + {Sign::POS, -128, MType({0x6e36aa9ce90a3879, 0x95079e1a0382dc79})}, + {Sign::POS, -128, MType({0xefca1a184e93809, 0x973227d6027ebd8a})}, + {Sign::POS, -128, MType({0xefca1a184e93809, 0x973227d6027ebd8a})}, + {Sign::POS, -128, MType({0x124bc6f1acf95dc4, 0x995ff71b8773432d})}, + {Sign::POS, -128, MType({0x352bea51e58ea9e8, 0x9b9115db83a3dd2d})}, + {Sign::POS, -128, MType({0x266d6cdc959153bc, 0x9dc58e347d37696d})}, + {Sign::POS, -128, MType({0x4527d82c8214ddca, 0x9ffd6a73a78eaf35})}, + {Sign::POS, -128, MType({0x404cabb76d600e3c, 0xa238b5160413106e})}, + {Sign::POS, -128, MType({0x404cabb76d600e3c, 0xa238b5160413106e})}, + {Sign::POS, -128, MType({0xcab7d2ec23f0eef3, 0xa47778c98bcc86a1})}, + {Sign::POS, -128, MType({0x761c48dd859de2d3, 0xa6b9c06e6211646b})}, + {Sign::POS, -128, MType({0x7fd3b7d7e5d148bb, 0xa8ff971810a5e181})}, + {Sign::POS, -128, MType({0xc27c6780d92b4d11, 0xab49080ecda53208})}, + {Sign::POS, -128, MType({0xdb502402c94092cd, 0xad961ed0cb91d406})}, + {Sign::POS, -128, MType({0xdb502402c94092cd, 0xad961ed0cb91d406})}, + {Sign::POS, -128, MType({0x3432ef6b732b6843, 0xafe6e71393eeda29})}, + {Sign::POS, -128, MType({0xbb324da7e046e792, 0xb23b6cc56cc84c99})}, + {Sign::POS, -128, MType({0xb21709ce430c8e24, 0xb493bc0ec9954243})}, + {Sign::POS, -128, MType({0xb21709ce430c8e24, 0xb493bc0ec9954243})}, + {Sign::POS, -128, MType({0xe91ad16ecff10111, 0xb6efe153c7e319f6})}, + {Sign::POS, -128, MType({0xce31e481cd797e79, 0xb94fe935b83e3eb5})}, + {Sign::POS, -128, MType({0xda3e961a96c580fa, 0xbbb3e094b3d228d3})}, + {Sign::POS, -128, MType({0xda3e961a96c580fa, 0xbbb3e094b3d228d3})}, + {Sign::POS, -128, MType({0xf396598aae91499a, 0xbe1bd4913f3fda43})}, + {Sign::POS, -128, MType({0xae4cceb0f621941b, 0xc087d28dfb2febb8})}, + {Sign::POS, -128, MType({0xae4cceb0f621941b, 0xc087d28dfb2febb8})}, + {Sign::POS, -128, MType({0x6c1855c42078f81b, 0xc2f7e831632b6670})}, + {Sign::POS, -128, MType({0x169535fb8bf577c8, 0xc56c23679b4d206e})}, + {Sign::POS, -128, MType({0x169535fb8bf577c8, 0xc56c23679b4d206e})}, + {Sign::POS, -128, MType({0x3b24cecc60217942, 0xc7e492644d64237e})}, + {Sign::POS, -128, MType({0x3dc2687fcf939696, 0xca6143a49626d820})}, + {Sign::POS, -128, MType({0x3dc2687fcf939696, 0xca6143a49626d820})}, + {Sign::POS, -128, MType({0xa62e6add1a901a0, 0xcce245f1031e41fa})}, + {Sign::POS, -128, MType({0x5bb6e23138ad51e1, 0xcf67a85fa1f89a04})}, + {Sign::POS, -128, MType({0x5bb6e23138ad51e1, 0xcf67a85fa1f89a04})}, + {Sign::POS, -128, MType({0x7fc60a5103092bae, 0xd1f17a5621fb01ac})}, + {Sign::POS, -128, MType({0xbfe9dbebf2e8a45e, 0xd47fcb8c0852f0c0})}, + {Sign::POS, -128, MType({0xbfe9dbebf2e8a45e, 0xd47fcb8c0852f0c0})}, + {Sign::POS, -128, MType({0x8e2d7d378127d823, 0xd712ac0cf811659d})}, + {Sign::POS, -128, MType({0x5c1a7f14b168b365, 0xd9aa2c3b0ea3cbc1})}, + {Sign::POS, -128, MType({0x5c1a7f14b168b365, 0xd9aa2c3b0ea3cbc1})}, + {Sign::POS, -128, MType({0xb7579f0f8d3d514b, 0xdc465cd155a90942})}, + {Sign::POS, -128, MType({0xb7579f0f8d3d514b, 0xdc465cd155a90942})}, + {Sign::POS, -128, MType({0xb087205eb55aea85, 0xdee74ee64b0c38d3})}, + {Sign::POS, -128, MType({0x424a2623d60dfb16, 0xe18d13ee805a4de3})}, + {Sign::POS, -128, MType({0x424a2623d60dfb16, 0xe18d13ee805a4de3})}, + {Sign::POS, -128, MType({0x4d3a591ae6854787, 0xe437bdbf5254459c})}, + {Sign::POS, -128, MType({0x4d3a591ae6854787, 0xe437bdbf5254459c})}, + {Sign::POS, -128, MType({0x8dcdb6b24c5c5cdf, 0xe6e75e91b9cca551})}, + {Sign::POS, -128, MType({0x33ac7d9ebba8a53c, 0xe99c090536ece983})}, + {Sign::POS, -128, MType({0x33ac7d9ebba8a53c, 0xe99c090536ece983})}, + {Sign::POS, -128, MType({0xfb2eede4b59d8959, 0xec55d022d80e3d27})}, + {Sign::POS, -128, MType({0xfb2eede4b59d8959, 0xec55d022d80e3d27})}, + {Sign::POS, -128, MType({0x308b454666de8f99, 0xef14c7605d60654c})}, + {Sign::POS, -128, MType({0x308b454666de8f99, 0xef14c7605d60654c})}, + {Sign::POS, -128, MType({0x8383cb0ce23bebd4, 0xf1d902a37aaa5085})}, + {Sign::POS, -128, MType({0x8383cb0ce23bebd4, 0xf1d902a37aaa5085})}, + {Sign::POS, -128, MType({0x64fc87b4a41f7b70, 0xf4a2964538813c67})}, + {Sign::POS, -128, MType({0x64fc87b4a41f7b70, 0xf4a2964538813c67})}, + {Sign::POS, -128, MType({0x3f5d7d82b65c5686, 0xf77197157665f689})}, + {Sign::POS, -128, MType({0x3f5d7d82b65c5686, 0xf77197157665f689})}, + {Sign::POS, -128, MType({0x6476077b9fbd41ae, 0xfa461a5e8f4b759d})}, + {Sign::POS, -128, MType({0x6476077b9fbd41ae, 0xfa461a5e8f4b759d})}, + {Sign::POS, -128, MType({0xe3909ffd0d61778, 0xfd2035e9221ef5d0})}, + {Sign::POS, 0, MType(0)}, }, // -log2(r) for the second step, generated by SageMath with: // @@ -313,202 +310,202 @@ const LogRR LOG2_TABLE = { // r = 2^-16 * round( 2^16 / (1 + i*2^(-14)) ); // s, m, e = RealField(128)(r).log2().sign_mantissa_exponent(); // print("{Sign::NEG," if s == 1 else "{Sign::POS,", e, ", - // format_hex(m), "},"); + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_2 = */ { - {Sign::NEG, -135, 0xb9061559'18954401'b5cfed58'337e848a_u128}, - {Sign::NEG, -135, 0xb6264958'a3c7fa2b'ffaf2ac1'b1d20910_u128}, - {Sign::NEG, -135, 0xb34671e4'39aa448e'52521a39'50ea2ed8_u128}, - {Sign::NEG, -135, 0xb0668efb'7ef48ab7'f87e1abd'ee10fd95_u128}, - {Sign::NEG, -135, 0xad86a09e'185af0e8'fbd43bbc'c24c5e43_u128}, - {Sign::NEG, -135, 0xaaa6a6cb'aa8d57ce'2f4f5d48'f9796742_u128}, - {Sign::NEG, -135, 0xa7c6a183'da375c3d'3477fd67'c1cab6b3_u128}, - {Sign::NEG, -135, 0xa4e690c6'4c0056f0'7b4d33eb'381fe558_u128}, - {Sign::NEG, -135, 0xa2067492'a48b5c43'3ce25e48'cb498dea_u128}, - {Sign::NEG, -135, 0x9f264ce8'88773bed'70b0fcc9'e4330983_u128}, - {Sign::NEG, -135, 0x9c4619c7'9c5e80bf'bc9e4267'd3189b22_u128}, - {Sign::NEG, -135, 0x9965db2f'84d7705f'5fb3d896'326615c4_u128}, - {Sign::NEG, -135, 0x9685911f'e6740b02'178b5831'1e96d323_u128}, - {Sign::NEG, -135, 0x93a53b98'65c20b2a'006bf8b6'cf73d847_u128}, - {Sign::NEG, -135, 0x90c4da98'a74ae561'7019f6e6'4a580a02_u128}, - {Sign::NEG, -135, 0x8de46e20'4f93c7f6'cb5733cf'0eb4191d_u128}, - {Sign::NEG, -135, 0x8b03f62f'031d9ab8'56148d4f'c5e415b6_u128}, - {Sign::NEG, -135, 0x882372c4'6664feaf'fe5370f4'25872623_u128}, - {Sign::NEG, -135, 0x8542e3e0'1de24ddf'21b72a14'57ee70d6_u128}, - {Sign::NEG, -135, 0x81aa211f'1e332fcf'abff4f89'968bed0b_u128}, - {Sign::NEG, -136, 0xfd92f0cf'88d75f24'86410a67'6480a5a7_u128}, - {Sign::NEG, -136, 0xf7d1886b'2a876289'44280889'021970e4_u128}, - {Sign::NEG, -136, 0xf2100910'6a42bc14'32eb139d'9812090d_u128}, - {Sign::NEG, -136, 0xec4e72be'90cd2d2d'bef9dd41'e8e42810_u128}, - {Sign::NEG, -136, 0xe68cc574'e6e1e5d7'689d08ca'6c7c3eb1_u128}, - {Sign::NEG, -136, 0xe0cb0132'b5338423'01ef259a'7f69821d_u128}, - {Sign::NEG, -136, 0xdb0925f7'446c13a9'e22cea71'b7bb8467_u128}, - {Sign::NEG, -136, 0xd54733c1'dd2d0d04'0e5bb273'03f542fe_u128}, - {Sign::NEG, -136, 0xcf852a91'c80f553f'57453c8d'5dc64ce1_u128}, - {Sign::NEG, -136, 0xc9c30a66'4da33d56'6cc7add1'fc09ef92_u128}, - {Sign::NEG, -136, 0xc400d33e'b67081a7'e678d728'0de1c07f_u128}, - {Sign::NEG, -136, 0xbe3e851a'4af6496d'419bbeb2'239bdc39_u128}, - {Sign::NEG, -136, 0xb87c1ff8'53ab2631'd4676d1d'81755809_u128}, - {Sign::NEG, -136, 0xb2b9a3d8'18fd1349'b69dfef7'ac2e2890_u128}, - {Sign::NEG, -136, 0xacf710b8'e3517548'9f72fa0a'8fccabc0_u128}, - {Sign::NEG, -136, 0xa7346699'fb051978'b8bfe6a3'addb988e_u128}, - {Sign::NEG, -136, 0xa171a57a'a86c3551'67862c8e'c9dcd60d_u128}, - {Sign::NEG, -136, 0x9baecd5a'33d265ee'09bd3370'909e28a6_u128}, - {Sign::NEG, -136, 0x95ebde37'e57aaf84'a96bc611'b991419b_u128}, - {Sign::NEG, -136, 0x9028d813'059f7cdc'a50bb80f'203f0d62_u128}, - {Sign::NEG, -136, 0x8a65baea'dc729ec5'4d36cd47'4f65a317_u128}, - {Sign::NEG, -136, 0x84a286be'b21d4b8c'779be241'ef4874a3_u128}, - {Sign::NEG, -137, 0xfdbe771b'9d803cea'0e76a962'fa65ace3_u128}, - {Sign::NEG, -137, 0xf237b2ae'f4e62e5a'd3d35627'464a5267_u128}, - {Sign::NEG, -137, 0xe6b0c035'fa8b328c'162ef4b0'e838c363_u128}, - {Sign::NEG, -137, 0xdb299faf'3e7cd74f'77bb10b9'76b3b9ca_u128}, - {Sign::NEG, -137, 0xcfa25119'50b77014'209853ce'e70bc58b_u128}, - {Sign::NEG, -137, 0xc41ad472'c12614d3'63f9b57c'baf2e58d_u128}, - {Sign::NEG, -137, 0xb89329ba'1fa2a0fd'4fca1c93'1bd6e6d6_u128}, - {Sign::NEG, -137, 0xad0b50ed'fbf5b265'26d26e43'4a53490a_u128}, - {Sign::NEG, -137, 0xa1834a0c'e5d6a82d'c55e0790'78dc86a0_u128}, - {Sign::NEG, -137, 0x95fb1515'6ceba1b5'f05b9d5b'd28f540b_u128}, - {Sign::NEG, -137, 0x8a72b206'20c97d84'8ef87f1a'11cdb727_u128}, - {Sign::NEG, -138, 0xfdd441bb'21e7b069'9d687011'4c1183cf_u128}, - {Sign::NEG, -138, 0xe6c2c334'99ba16c4'63d514ff'f97e86f3_u128}, - {Sign::NEG, -138, 0xcfb0e875'c7cc5929'11a38190'1eadd883_u128}, - {Sign::NEG, -138, 0xb89eb17b'cabe1857'a9d69d37'bc0a5bac_u128}, - {Sign::NEG, -138, 0xa18c1e43'c10c6898'2dc97c9f'fefd2497_u128}, - {Sign::NEG, -138, 0x8a792eca'c911cf92'0dcdc8af'cb2ac09a_u128}, - {Sign::NEG, -139, 0xe6cbc61c'020c8446'dd454eb3'a1489470_u128}, - {Sign::NEG, -139, 0xb8a47615'0dfe4470'87803586'4d84b319_u128}, - {Sign::NEG, -139, 0x8a7c6d7a'f1de7942'7ce595cc'53b8342c_u128}, - {Sign::NEG, -140, 0xb8a7588f'd29b1baa'4710b590'49899141_u128}, - {Sign::NEG, -141, 0xb8a8c9d8'be9ae994'5957f633'309d74e3_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -141, 0xb8abac81'ab576f3b'8268aba0'30b1adf6_u128}, - {Sign::POS, -140, 0xb8ad1de1'ac9ea6a5'1511cba2'fb213a10_u128}, - {Sign::POS, -139, 0x8a82eb77'08262500'6379fb9f'd9bc6235_u128}, - {Sign::POS, -139, 0xb8b000b8'c65957cc'b6fe1bf6'01ee27d5_u128}, - {Sign::POS, -139, 0xe6ddcebb'd72d3f7f'8c6e6069'3a14e6d0_u128}, - {Sign::POS, -138, 0x8a862ac3'0095c084'e9bcfd0c'62eaa2ca_u128}, - {Sign::POS, -138, 0xa19dca8e'85918b6d'73b21420'9a5234a7_u128}, - {Sign::POS, -138, 0xb8b5c6c3'5e142a9b'347d4ca3'109fe4db_u128}, - {Sign::POS, -138, 0xcfce1f64'6dca7745'37a62c48'783bb066_u128}, - {Sign::POS, -138, 0xe6e6d474'9883fbe3'0794b643'7fb56344_u128}, - {Sign::POS, -138, 0xfdffe5f6'c232f658'1cb9a45e'd90318e6_u128}, - {Sign::POS, -137, 0x8a8ca9f6'e7762d0f'bc118e5d'bbef7dbc_u128}, - {Sign::POS, -137, 0x96198f2e'5173e93b'b4c0fb95'35907cf8_u128}, - {Sign::POS, -137, 0xa1a6a2a3'113fe246'c051d2c5'f00a9bb9_u128}, - {Sign::POS, -137, 0xad33e456'9918a8d5'55326987'8c1e5110_u128}, - {Sign::POS, -137, 0xb8c1544a'5b4e2caf'bc906750'b0ce372c_u128}, - {Sign::POS, -137, 0xc44ef27f'ca41bdd8'4c50eaa6'3be294b6_u128}, - {Sign::POS, -137, 0xcfdcbef8'58660da1'b6cb28db'8c065b44_u128}, - {Sign::POS, -137, 0xdb6ab9b5'783f2fc5'70479336'830ceb05_u128}, - {Sign::POS, -137, 0xe6f8e2b8'9c629b7a'2a458c83'1f6aeb49_u128}, - {Sign::POS, -137, 0xf2873a03'37772c8a'6489ba5b'd391e206_u128}, - {Sign::POS, -137, 0xfe15bf96'bc35246b'13f6fda5'10aeec3b_u128}, - {Sign::POS, -136, 0x84d239ba'4eb315a9'2f9a0ef9'e8250836_u128}, - {Sign::POS, -136, 0x8a99aacf'26f2a8a7'389019e8'22b70f1e_u128}, - {Sign::POS, -136, 0x9061330a'a04f87ae'308beeff'a12cf669_u128}, - {Sign::POS, -136, 0x9628d26d'7448a43f'9886a71b'25a2085d_u128}, - {Sign::POS, -136, 0x9bf088f8'5c65a56b'70ba9ceb'e0b969c3_u128}, - {Sign::POS, -136, 0xa1b856ac'1236e85b'cd855dc7'05ea2bea_u128}, - {Sign::POS, -136, 0xa7803b89'4f5580e0'7736196b'11afb331_u128}, - {Sign::POS, -136, 0xad483790'cd6339fa'94c99761'b8eab3d8_u128}, - {Sign::POS, -136, 0xb3104ac3'460a9668'6194b8c0'40814736_u128}, - {Sign::POS, -136, 0xb8d87521'72fed130'edde8d24'c7a999cc_u128}, - {Sign::POS, -136, 0xbea0b6ac'0dfbde2f'ea6b01eb'de42f1d0_u128}, - {Sign::POS, -136, 0xc4690f63'd0c66aa1'7ef732b6'9334cf50_u128}, - {Sign::POS, -136, 0xca317f49'752bddae'2ba86275'fcfc2d72_u128}, - {Sign::POS, -136, 0xcffa065d'b50258f6'b56ea44e'185bf99f_u128}, - {Sign::POS, -136, 0xd5c2a4a1'4a28b920'1d5c3bbe'b6902bfe_u128}, - {Sign::POS, -136, 0xdb8b5a14'ee86965f'a2f2bb9e'156b0f37_u128}, - {Sign::POS, -136, 0xe15426b9'5c0c4506'd166eb8d'a06ab5ef_u128}, - {Sign::POS, -136, 0xe71d0a8f'4cb2d60f'97dc7bae'4219de0f_u128}, - {Sign::POS, -136, 0xece60597'7a7c17a8'6c9a8e76'98f416c4_u128}, - {Sign::POS, -136, 0xf2af17d2'9f7295c0'7b3a20aa'5289695e_u128}, - {Sign::POS, -136, 0xf8784141'75a99a93'ddcf578e'e2c2897b_u128}, - {Sign::POS, -136, 0xfe4181e4'b73d2f37'e10ebd96'c3ec30ec_u128}, - {Sign::POS, -135, 0x82056cde'8f290e13'a9b7baec'b34ba577_u128}, - {Sign::POS, -135, 0x8430f56d'5e1edfd1'2da910dc'61c182da_u128}, - {Sign::POS, -135, 0x8715b5a8'f27bed90'faca09dc'7e0ba8b5_u128}, - {Sign::POS, -135, 0x89fa8180'19a2cace'0d723876'173c0947_u128}, - {Sign::POS, -135, 0x8cdf58f3'30b64515'4e6651df'154e8f8c_u128}, - {Sign::POS, -135, 0x8fc43c02'94dd8af3'ee54b77d'3bc34b6d_u128}, - {Sign::POS, -135, 0x92a92aae'a3442c3d'ad07dde9'b5f92cce_u128}, - {Sign::POS, -135, 0x958e24f7'b91a1a53'261aacf9'44b638f0_u128}, - {Sign::POS, -135, 0x98732ade'3393a868'232f5d64'a85b219d_u128}, - {Sign::POS, -135, 0x9b583c62'6fe98bc9'f3a958bb'706093fc_u128}, - {Sign::POS, -135, 0x9e3d5984'cb58dc25'c9eaa059'e7b0333a_u128}, - {Sign::POS, -135, 0xa1228245'a32313cf'1e154029'663243c0_u128}, - {Sign::POS, -135, 0xa407b6a5'548e1006'16515200'e283d006_u128}, - {Sign::POS, -135, 0xa6ecf6a4'3ce4113d'f498168a'3337ca4f_u128}, - {Sign::POS, -135, 0xa9d24242'b973bb63'8a04a89f'0548a10f_u128}, - {Sign::POS, -135, 0xacb79981'27901623'afaad01f'25772805_u128}, - {Sign::POS, -135, 0xaf9cfc5f'e4908d31'c4f47950'543fe0b8_u128}, - {Sign::POS, -135, 0xb2826adf'4dd0f08e'338655e6'77d0d3ec_u128}, - {Sign::POS, -135, 0xb567e4ff'c0b174cc'f8ac2ce1'9d009541_u128}, - {Sign::POS, -135, 0xb84d6ac1'9a96b35c'344d5e7d'd7b2f465_u128}, - {Sign::POS, -135, 0xbb32fc25'38e9aaca'bd6a217f'b4598ec7_u128}, - {Sign::POS, -135, 0xbe18992a'f917bf0e'bc21ff36'8f562b75_u128}, - {Sign::POS, -135, 0xc0fe41d3'3892b9cc'4944139c'cbf2cb9a_u128}, - {Sign::POS, -135, 0xc3e3f61e'54d0ca9c'1369970c'8b67e6b5_u128}, - {Sign::POS, -135, 0xc6c9b60c'ab4c8752'099b370e'2d04a530_u128}, - {Sign::POS, -135, 0xc9af819e'9984ec44'0b81c3d4'8aff589f_u128}, - {Sign::POS, -135, 0xcc9558d4'7cfd5c90'9f22b809'93be311b_u128}, - {Sign::POS, -135, 0xcf7b3bae'b33da265'ac29209c'8d8985ae_u128}, - {Sign::POS, -135, 0xd2612a2d'99d1ef47'3cbb6a52'0292351d_u128}, - {Sign::POS, -135, 0xd5472451'8e4adc56'43de9ae4'0507ef24_u128}, - {Sign::POS, -135, 0xd82d2a1a'ee3d6a97'69677b90'2ea4df3a_u128}, - {Sign::POS, -135, 0xdb133b8a'17430339'db7a3aff'74967bd5_u128}, - {Sign::POS, -135, 0xddf9589f'66f977de'25990c82'a0066ac6_u128}, - {Sign::POS, -135, 0xe0df815b'3b0302dd'0d424aac'f4babf55_u128}, - {Sign::POS, -135, 0xe30c278d'9936c595'f8e3e7eb'5a7bdebb_u128}, - {Sign::POS, -135, 0xe5f264ad'b62d5810'5ef8bf5a'df5deebe_u128}, - {Sign::POS, -135, 0xe8d8ad75'590bdf92'331d1996'5368fc82_u128}, - {Sign::POS, -135, 0xebbf01e4'df85219e'901c30c4'27e358b8_u128}, - {Sign::POS, -135, 0xeea561fc'a7504dc1'aeac7e98'57253b06_u128}, - {Sign::POS, -135, 0xf18bcdbd'0e28fdd7'e2113e58'93ab5b40_u128}, - {Sign::POS, -135, 0xf4724526'71cf3654'9a4efc80'ae977826_u128}, - {Sign::POS, -135, 0xf758c839'30076689'6bf3ba83'19332c9f_u128}, - {Sign::POS, -135, 0xfa3f56f5'a69a68ed'1d732d30'2e75018b_u128}, - {Sign::POS, -135, 0xfd25f15c'33558362'ba179c5d'bcceec01_u128}, - {Sign::POS, -134, 0x80064bb6'9a0533c0'5543f53b'8ad85039_u128}, - {Sign::POS, -134, 0x8179a494'8347996b'e971a556'5b93cb67_u128}, - {Sign::POS, -134, 0x82ed0348'045f379d'5b399644'ba714691_u128}, - {Sign::POS, -134, 0x846067d1'4c3b8982'5079f1e0'ec4b8496_u128}, - {Sign::POS, -134, 0x85d3d230'89ce40b0'6aba4990'a32e8873_u128}, - {Sign::POS, -134, 0x87474265'ec0b4548'e16770c3'a404291c_u128}, - {Sign::POS, -134, 0x88bab871'a1e8b61c'1edb7ffb'1d6b3eab_u128}, - {Sign::POS, -134, 0x8a2e3453'da5ee8cd'603243e1'ba7c7865_u128}, - {Sign::POS, -134, 0x8ba1b60c'c46869f6'57ea5c03'ea4621dd_u128}, - {Sign::POS, -134, 0x8d153d9c'8f01fd4a'd3534cbf'43bd7fd8_u128}, - {Sign::POS, -134, 0x8e88cb03'692a9dbc'62c8c807'5dc91cd5_u128}, - {Sign::POS, -134, 0x8ffc5e41'81e37d9e'04bb70a5'e3db7b85_u128}, - {Sign::POS, -134, 0x916ff757'083006c7'd3875ba3'2159547a_u128}, - {Sign::POS, -134, 0x9286adfc'a91ba28d'5c94c80e'7a8f66b1_u128}, - {Sign::POS, -134, 0x93fa514b'a0517623'52d313c4'7b4f91db_u128}, - {Sign::POS, -134, 0x956dfa72'866fc57d'80829e9f'3957a4c3_u128}, - {Sign::POS, -134, 0x96e1a971'8a824be5'1cd49179'72015ae7_u128}, - {Sign::POS, -134, 0x98555e48'db96fcd2'1af23c29'ef3032da_u128}, - {Sign::POS, -134, 0x99c918f8'a8be040e'e7f7bf24'0be67b80_u128}, - {Sign::POS, -134, 0x9b3cd981'2109c5dc'2bbe3cd4'f7d868fa_u128}, - {Sign::POS, -134, 0x9cb09fe2'738edf14'8c75d6a4'c5ae460d_u128}, - {Sign::POS, -134, 0x9e246c1c'cf642550'750fb989'c9a06186_u128}, - {Sign::POS, -134, 0x9f983e30'63a2a709'de787e24'4901bdf9_u128}, - {Sign::POS, -134, 0xa10c161d'5f65abc0'1ba3205f'f729efa4_u128}, - {Sign::POS, -134, 0xa27ff3e3'f1cab41b'a864d2a0'38fb19cd_u128}, - {Sign::POS, -134, 0xa3f3d784'49f17a11'fb21f083'a5fec56d_u128}, - {Sign::POS, -134, 0xa567c0fe'96fbf109'594c5552'bcc377f5_u128}, - {Sign::POS, -134, 0xa6dbb053'080e45fc'aeb35a35'3fc5a503_u128}, - {Sign::POS, -134, 0xa84fa581'cc4edf9f'67a5c051'30c0f330_u128}, - {Sign::POS, -134, 0xa9c3a08b'12e65e81'4de5cafd'e1caf46f_u128}, - {Sign::POS, -134, 0xab37a16f'0aff9d32'686fce3d'160e88fd_u128}, - {Sign::POS, -134, 0xacaba82d'e3c7b066'de1375b3'af6749a6_u128}, - {Sign::POS, -134, 0xadc2b114'c632da56'24356904'8ac4affe_u128}, - {Sign::POS, -134, 0xaf36c213'19b80ea2'd6796227'dcd39551_u128}, - {Sign::POS, -134, 0xb0aad8ec'cfb38d51'abc92653'86172074_u128}, - {Sign::POS, -134, 0xb21ef5a2'175ac65e'0caac9f1'7896f2ce_u128}, - {Sign::POS, -134, 0xb3931833'1fe56492'1c65a3c7'f828972b_u128}, - {Sign::POS, -134, 0xb50740a0'188d4daa'abdc6644'6a4286d9_u128}, - {Sign::POS, -134, 0xb67b6ee9'308ea27b'2f3bbe8e'8d72abec_u128}, - {Sign::POS, -134, 0xb7efa30e'9727bf11'b67dbdd7'f03d168c_u128}, + {Sign::NEG, -135, MType({0xb5cfed58337e848a, 0xb906155918954401})}, + {Sign::NEG, -135, MType({0xffaf2ac1b1d20910, 0xb6264958a3c7fa2b})}, + {Sign::NEG, -135, MType({0x52521a3950ea2ed8, 0xb34671e439aa448e})}, + {Sign::NEG, -135, MType({0xf87e1abdee10fd95, 0xb0668efb7ef48ab7})}, + {Sign::NEG, -135, MType({0xfbd43bbcc24c5e43, 0xad86a09e185af0e8})}, + {Sign::NEG, -135, MType({0x2f4f5d48f9796742, 0xaaa6a6cbaa8d57ce})}, + {Sign::NEG, -135, MType({0x3477fd67c1cab6b3, 0xa7c6a183da375c3d})}, + {Sign::NEG, -135, MType({0x7b4d33eb381fe558, 0xa4e690c64c0056f0})}, + {Sign::NEG, -135, MType({0x3ce25e48cb498dea, 0xa2067492a48b5c43})}, + {Sign::NEG, -135, MType({0x70b0fcc9e4330983, 0x9f264ce888773bed})}, + {Sign::NEG, -135, MType({0xbc9e4267d3189b22, 0x9c4619c79c5e80bf})}, + {Sign::NEG, -135, MType({0x5fb3d896326615c4, 0x9965db2f84d7705f})}, + {Sign::NEG, -135, MType({0x178b58311e96d323, 0x9685911fe6740b02})}, + {Sign::NEG, -135, MType({0x6bf8b6cf73d847, 0x93a53b9865c20b2a})}, + {Sign::NEG, -135, MType({0x7019f6e64a580a02, 0x90c4da98a74ae561})}, + {Sign::NEG, -135, MType({0xcb5733cf0eb4191d, 0x8de46e204f93c7f6})}, + {Sign::NEG, -135, MType({0x56148d4fc5e415b6, 0x8b03f62f031d9ab8})}, + {Sign::NEG, -135, MType({0xfe5370f425872623, 0x882372c46664feaf})}, + {Sign::NEG, -135, MType({0x21b72a1457ee70d6, 0x8542e3e01de24ddf})}, + {Sign::NEG, -135, MType({0xabff4f89968bed0b, 0x81aa211f1e332fcf})}, + {Sign::NEG, -136, MType({0x86410a676480a5a7, 0xfd92f0cf88d75f24})}, + {Sign::NEG, -136, MType({0x44280889021970e4, 0xf7d1886b2a876289})}, + {Sign::NEG, -136, MType({0x32eb139d9812090d, 0xf21009106a42bc14})}, + {Sign::NEG, -136, MType({0xbef9dd41e8e42810, 0xec4e72be90cd2d2d})}, + {Sign::NEG, -136, MType({0x689d08ca6c7c3eb1, 0xe68cc574e6e1e5d7})}, + {Sign::NEG, -136, MType({0x1ef259a7f69821d, 0xe0cb0132b5338423})}, + {Sign::NEG, -136, MType({0xe22cea71b7bb8467, 0xdb0925f7446c13a9})}, + {Sign::NEG, -136, MType({0xe5bb27303f542fe, 0xd54733c1dd2d0d04})}, + {Sign::NEG, -136, MType({0x57453c8d5dc64ce1, 0xcf852a91c80f553f})}, + {Sign::NEG, -136, MType({0x6cc7add1fc09ef92, 0xc9c30a664da33d56})}, + {Sign::NEG, -136, MType({0xe678d7280de1c07f, 0xc400d33eb67081a7})}, + {Sign::NEG, -136, MType({0x419bbeb2239bdc39, 0xbe3e851a4af6496d})}, + {Sign::NEG, -136, MType({0xd4676d1d81755809, 0xb87c1ff853ab2631})}, + {Sign::NEG, -136, MType({0xb69dfef7ac2e2890, 0xb2b9a3d818fd1349})}, + {Sign::NEG, -136, MType({0x9f72fa0a8fccabc0, 0xacf710b8e3517548})}, + {Sign::NEG, -136, MType({0xb8bfe6a3addb988e, 0xa7346699fb051978})}, + {Sign::NEG, -136, MType({0x67862c8ec9dcd60d, 0xa171a57aa86c3551})}, + {Sign::NEG, -136, MType({0x9bd3370909e28a6, 0x9baecd5a33d265ee})}, + {Sign::NEG, -136, MType({0xa96bc611b991419b, 0x95ebde37e57aaf84})}, + {Sign::NEG, -136, MType({0xa50bb80f203f0d62, 0x9028d813059f7cdc})}, + {Sign::NEG, -136, MType({0x4d36cd474f65a317, 0x8a65baeadc729ec5})}, + {Sign::NEG, -136, MType({0x779be241ef4874a3, 0x84a286beb21d4b8c})}, + {Sign::NEG, -137, MType({0xe76a962fa65ace3, 0xfdbe771b9d803cea})}, + {Sign::NEG, -137, MType({0xd3d35627464a5267, 0xf237b2aef4e62e5a})}, + {Sign::NEG, -137, MType({0x162ef4b0e838c363, 0xe6b0c035fa8b328c})}, + {Sign::NEG, -137, MType({0x77bb10b976b3b9ca, 0xdb299faf3e7cd74f})}, + {Sign::NEG, -137, MType({0x209853cee70bc58b, 0xcfa2511950b77014})}, + {Sign::NEG, -137, MType({0x63f9b57cbaf2e58d, 0xc41ad472c12614d3})}, + {Sign::NEG, -137, MType({0x4fca1c931bd6e6d6, 0xb89329ba1fa2a0fd})}, + {Sign::NEG, -137, MType({0x26d26e434a53490a, 0xad0b50edfbf5b265})}, + {Sign::NEG, -137, MType({0xc55e079078dc86a0, 0xa1834a0ce5d6a82d})}, + {Sign::NEG, -137, MType({0xf05b9d5bd28f540b, 0x95fb15156ceba1b5})}, + {Sign::NEG, -137, MType({0x8ef87f1a11cdb727, 0x8a72b20620c97d84})}, + {Sign::NEG, -138, MType({0x9d6870114c1183cf, 0xfdd441bb21e7b069})}, + {Sign::NEG, -138, MType({0x63d514fff97e86f3, 0xe6c2c33499ba16c4})}, + {Sign::NEG, -138, MType({0x11a381901eadd883, 0xcfb0e875c7cc5929})}, + {Sign::NEG, -138, MType({0xa9d69d37bc0a5bac, 0xb89eb17bcabe1857})}, + {Sign::NEG, -138, MType({0x2dc97c9ffefd2497, 0xa18c1e43c10c6898})}, + {Sign::NEG, -138, MType({0xdcdc8afcb2ac09a, 0x8a792ecac911cf92})}, + {Sign::NEG, -139, MType({0xdd454eb3a1489470, 0xe6cbc61c020c8446})}, + {Sign::NEG, -139, MType({0x878035864d84b319, 0xb8a476150dfe4470})}, + {Sign::NEG, -139, MType({0x7ce595cc53b8342c, 0x8a7c6d7af1de7942})}, + {Sign::NEG, -140, MType({0x4710b59049899141, 0xb8a7588fd29b1baa})}, + {Sign::NEG, -141, MType({0x5957f633309d74e3, 0xb8a8c9d8be9ae994})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -141, MType({0x8268aba030b1adf6, 0xb8abac81ab576f3b})}, + {Sign::POS, -140, MType({0x1511cba2fb213a10, 0xb8ad1de1ac9ea6a5})}, + {Sign::POS, -139, MType({0x6379fb9fd9bc6235, 0x8a82eb7708262500})}, + {Sign::POS, -139, MType({0xb6fe1bf601ee27d5, 0xb8b000b8c65957cc})}, + {Sign::POS, -139, MType({0x8c6e60693a14e6d0, 0xe6ddcebbd72d3f7f})}, + {Sign::POS, -138, MType({0xe9bcfd0c62eaa2ca, 0x8a862ac30095c084})}, + {Sign::POS, -138, MType({0x73b214209a5234a7, 0xa19dca8e85918b6d})}, + {Sign::POS, -138, MType({0x347d4ca3109fe4db, 0xb8b5c6c35e142a9b})}, + {Sign::POS, -138, MType({0x37a62c48783bb066, 0xcfce1f646dca7745})}, + {Sign::POS, -138, MType({0x794b6437fb56344, 0xe6e6d4749883fbe3})}, + {Sign::POS, -138, MType({0x1cb9a45ed90318e6, 0xfdffe5f6c232f658})}, + {Sign::POS, -137, MType({0xbc118e5dbbef7dbc, 0x8a8ca9f6e7762d0f})}, + {Sign::POS, -137, MType({0xb4c0fb9535907cf8, 0x96198f2e5173e93b})}, + {Sign::POS, -137, MType({0xc051d2c5f00a9bb9, 0xa1a6a2a3113fe246})}, + {Sign::POS, -137, MType({0x553269878c1e5110, 0xad33e4569918a8d5})}, + {Sign::POS, -137, MType({0xbc906750b0ce372c, 0xb8c1544a5b4e2caf})}, + {Sign::POS, -137, MType({0x4c50eaa63be294b6, 0xc44ef27fca41bdd8})}, + {Sign::POS, -137, MType({0xb6cb28db8c065b44, 0xcfdcbef858660da1})}, + {Sign::POS, -137, MType({0x70479336830ceb05, 0xdb6ab9b5783f2fc5})}, + {Sign::POS, -137, MType({0x2a458c831f6aeb49, 0xe6f8e2b89c629b7a})}, + {Sign::POS, -137, MType({0x6489ba5bd391e206, 0xf2873a0337772c8a})}, + {Sign::POS, -137, MType({0x13f6fda510aeec3b, 0xfe15bf96bc35246b})}, + {Sign::POS, -136, MType({0x2f9a0ef9e8250836, 0x84d239ba4eb315a9})}, + {Sign::POS, -136, MType({0x389019e822b70f1e, 0x8a99aacf26f2a8a7})}, + {Sign::POS, -136, MType({0x308beeffa12cf669, 0x9061330aa04f87ae})}, + {Sign::POS, -136, MType({0x9886a71b25a2085d, 0x9628d26d7448a43f})}, + {Sign::POS, -136, MType({0x70ba9cebe0b969c3, 0x9bf088f85c65a56b})}, + {Sign::POS, -136, MType({0xcd855dc705ea2bea, 0xa1b856ac1236e85b})}, + {Sign::POS, -136, MType({0x7736196b11afb331, 0xa7803b894f5580e0})}, + {Sign::POS, -136, MType({0x94c99761b8eab3d8, 0xad483790cd6339fa})}, + {Sign::POS, -136, MType({0x6194b8c040814736, 0xb3104ac3460a9668})}, + {Sign::POS, -136, MType({0xedde8d24c7a999cc, 0xb8d8752172fed130})}, + {Sign::POS, -136, MType({0xea6b01ebde42f1d0, 0xbea0b6ac0dfbde2f})}, + {Sign::POS, -136, MType({0x7ef732b69334cf50, 0xc4690f63d0c66aa1})}, + {Sign::POS, -136, MType({0x2ba86275fcfc2d72, 0xca317f49752bddae})}, + {Sign::POS, -136, MType({0xb56ea44e185bf99f, 0xcffa065db50258f6})}, + {Sign::POS, -136, MType({0x1d5c3bbeb6902bfe, 0xd5c2a4a14a28b920})}, + {Sign::POS, -136, MType({0xa2f2bb9e156b0f37, 0xdb8b5a14ee86965f})}, + {Sign::POS, -136, MType({0xd166eb8da06ab5ef, 0xe15426b95c0c4506})}, + {Sign::POS, -136, MType({0x97dc7bae4219de0f, 0xe71d0a8f4cb2d60f})}, + {Sign::POS, -136, MType({0x6c9a8e7698f416c4, 0xece605977a7c17a8})}, + {Sign::POS, -136, MType({0x7b3a20aa5289695e, 0xf2af17d29f7295c0})}, + {Sign::POS, -136, MType({0xddcf578ee2c2897b, 0xf878414175a99a93})}, + {Sign::POS, -136, MType({0xe10ebd96c3ec30ec, 0xfe4181e4b73d2f37})}, + {Sign::POS, -135, MType({0xa9b7baecb34ba577, 0x82056cde8f290e13})}, + {Sign::POS, -135, MType({0x2da910dc61c182da, 0x8430f56d5e1edfd1})}, + {Sign::POS, -135, MType({0xfaca09dc7e0ba8b5, 0x8715b5a8f27bed90})}, + {Sign::POS, -135, MType({0xd723876173c0947, 0x89fa818019a2cace})}, + {Sign::POS, -135, MType({0x4e6651df154e8f8c, 0x8cdf58f330b64515})}, + {Sign::POS, -135, MType({0xee54b77d3bc34b6d, 0x8fc43c0294dd8af3})}, + {Sign::POS, -135, MType({0xad07dde9b5f92cce, 0x92a92aaea3442c3d})}, + {Sign::POS, -135, MType({0x261aacf944b638f0, 0x958e24f7b91a1a53})}, + {Sign::POS, -135, MType({0x232f5d64a85b219d, 0x98732ade3393a868})}, + {Sign::POS, -135, MType({0xf3a958bb706093fc, 0x9b583c626fe98bc9})}, + {Sign::POS, -135, MType({0xc9eaa059e7b0333a, 0x9e3d5984cb58dc25})}, + {Sign::POS, -135, MType({0x1e154029663243c0, 0xa1228245a32313cf})}, + {Sign::POS, -135, MType({0x16515200e283d006, 0xa407b6a5548e1006})}, + {Sign::POS, -135, MType({0xf498168a3337ca4f, 0xa6ecf6a43ce4113d})}, + {Sign::POS, -135, MType({0x8a04a89f0548a10f, 0xa9d24242b973bb63})}, + {Sign::POS, -135, MType({0xafaad01f25772805, 0xacb7998127901623})}, + {Sign::POS, -135, MType({0xc4f47950543fe0b8, 0xaf9cfc5fe4908d31})}, + {Sign::POS, -135, MType({0x338655e677d0d3ec, 0xb2826adf4dd0f08e})}, + {Sign::POS, -135, MType({0xf8ac2ce19d009541, 0xb567e4ffc0b174cc})}, + {Sign::POS, -135, MType({0x344d5e7dd7b2f465, 0xb84d6ac19a96b35c})}, + {Sign::POS, -135, MType({0xbd6a217fb4598ec7, 0xbb32fc2538e9aaca})}, + {Sign::POS, -135, MType({0xbc21ff368f562b75, 0xbe18992af917bf0e})}, + {Sign::POS, -135, MType({0x4944139ccbf2cb9a, 0xc0fe41d33892b9cc})}, + {Sign::POS, -135, MType({0x1369970c8b67e6b5, 0xc3e3f61e54d0ca9c})}, + {Sign::POS, -135, MType({0x99b370e2d04a530, 0xc6c9b60cab4c8752})}, + {Sign::POS, -135, MType({0xb81c3d48aff589f, 0xc9af819e9984ec44})}, + {Sign::POS, -135, MType({0x9f22b80993be311b, 0xcc9558d47cfd5c90})}, + {Sign::POS, -135, MType({0xac29209c8d8985ae, 0xcf7b3baeb33da265})}, + {Sign::POS, -135, MType({0x3cbb6a520292351d, 0xd2612a2d99d1ef47})}, + {Sign::POS, -135, MType({0x43de9ae40507ef24, 0xd54724518e4adc56})}, + {Sign::POS, -135, MType({0x69677b902ea4df3a, 0xd82d2a1aee3d6a97})}, + {Sign::POS, -135, MType({0xdb7a3aff74967bd5, 0xdb133b8a17430339})}, + {Sign::POS, -135, MType({0x25990c82a0066ac6, 0xddf9589f66f977de})}, + {Sign::POS, -135, MType({0xd424aacf4babf55, 0xe0df815b3b0302dd})}, + {Sign::POS, -135, MType({0xf8e3e7eb5a7bdebb, 0xe30c278d9936c595})}, + {Sign::POS, -135, MType({0x5ef8bf5adf5deebe, 0xe5f264adb62d5810})}, + {Sign::POS, -135, MType({0x331d19965368fc82, 0xe8d8ad75590bdf92})}, + {Sign::POS, -135, MType({0x901c30c427e358b8, 0xebbf01e4df85219e})}, + {Sign::POS, -135, MType({0xaeac7e9857253b06, 0xeea561fca7504dc1})}, + {Sign::POS, -135, MType({0xe2113e5893ab5b40, 0xf18bcdbd0e28fdd7})}, + {Sign::POS, -135, MType({0x9a4efc80ae977826, 0xf472452671cf3654})}, + {Sign::POS, -135, MType({0x6bf3ba8319332c9f, 0xf758c83930076689})}, + {Sign::POS, -135, MType({0x1d732d302e75018b, 0xfa3f56f5a69a68ed})}, + {Sign::POS, -135, MType({0xba179c5dbcceec01, 0xfd25f15c33558362})}, + {Sign::POS, -134, MType({0x5543f53b8ad85039, 0x80064bb69a0533c0})}, + {Sign::POS, -134, MType({0xe971a5565b93cb67, 0x8179a4948347996b})}, + {Sign::POS, -134, MType({0x5b399644ba714691, 0x82ed0348045f379d})}, + {Sign::POS, -134, MType({0x5079f1e0ec4b8496, 0x846067d14c3b8982})}, + {Sign::POS, -134, MType({0x6aba4990a32e8873, 0x85d3d23089ce40b0})}, + {Sign::POS, -134, MType({0xe16770c3a404291c, 0x87474265ec0b4548})}, + {Sign::POS, -134, MType({0x1edb7ffb1d6b3eab, 0x88bab871a1e8b61c})}, + {Sign::POS, -134, MType({0x603243e1ba7c7865, 0x8a2e3453da5ee8cd})}, + {Sign::POS, -134, MType({0x57ea5c03ea4621dd, 0x8ba1b60cc46869f6})}, + {Sign::POS, -134, MType({0xd3534cbf43bd7fd8, 0x8d153d9c8f01fd4a})}, + {Sign::POS, -134, MType({0x62c8c8075dc91cd5, 0x8e88cb03692a9dbc})}, + {Sign::POS, -134, MType({0x4bb70a5e3db7b85, 0x8ffc5e4181e37d9e})}, + {Sign::POS, -134, MType({0xd3875ba32159547a, 0x916ff757083006c7})}, + {Sign::POS, -134, MType({0x5c94c80e7a8f66b1, 0x9286adfca91ba28d})}, + {Sign::POS, -134, MType({0x52d313c47b4f91db, 0x93fa514ba0517623})}, + {Sign::POS, -134, MType({0x80829e9f3957a4c3, 0x956dfa72866fc57d})}, + {Sign::POS, -134, MType({0x1cd4917972015ae7, 0x96e1a9718a824be5})}, + {Sign::POS, -134, MType({0x1af23c29ef3032da, 0x98555e48db96fcd2})}, + {Sign::POS, -134, MType({0xe7f7bf240be67b80, 0x99c918f8a8be040e})}, + {Sign::POS, -134, MType({0x2bbe3cd4f7d868fa, 0x9b3cd9812109c5dc})}, + {Sign::POS, -134, MType({0x8c75d6a4c5ae460d, 0x9cb09fe2738edf14})}, + {Sign::POS, -134, MType({0x750fb989c9a06186, 0x9e246c1ccf642550})}, + {Sign::POS, -134, MType({0xde787e244901bdf9, 0x9f983e3063a2a709})}, + {Sign::POS, -134, MType({0x1ba3205ff729efa4, 0xa10c161d5f65abc0})}, + {Sign::POS, -134, MType({0xa864d2a038fb19cd, 0xa27ff3e3f1cab41b})}, + {Sign::POS, -134, MType({0xfb21f083a5fec56d, 0xa3f3d78449f17a11})}, + {Sign::POS, -134, MType({0x594c5552bcc377f5, 0xa567c0fe96fbf109})}, + {Sign::POS, -134, MType({0xaeb35a353fc5a503, 0xa6dbb053080e45fc})}, + {Sign::POS, -134, MType({0x67a5c05130c0f330, 0xa84fa581cc4edf9f})}, + {Sign::POS, -134, MType({0x4de5cafde1caf46f, 0xa9c3a08b12e65e81})}, + {Sign::POS, -134, MType({0x686fce3d160e88fd, 0xab37a16f0aff9d32})}, + {Sign::POS, -134, MType({0xde1375b3af6749a6, 0xacaba82de3c7b066})}, + {Sign::POS, -134, MType({0x243569048ac4affe, 0xadc2b114c632da56})}, + {Sign::POS, -134, MType({0xd6796227dcd39551, 0xaf36c21319b80ea2})}, + {Sign::POS, -134, MType({0xabc9265386172074, 0xb0aad8eccfb38d51})}, + {Sign::POS, -134, MType({0xcaac9f17896f2ce, 0xb21ef5a2175ac65e})}, + {Sign::POS, -134, MType({0x1c65a3c7f828972b, 0xb39318331fe56492})}, + {Sign::POS, -134, MType({0xabdc66446a4286d9, 0xb50740a0188d4daa})}, + {Sign::POS, -134, MType({0x2f3bbe8e8d72abec, 0xb67b6ee9308ea27b})}, + {Sign::POS, -134, MType({0xb67dbdd7f03d168c, 0xb7efa30e9727bf11})}, }, // -log2(r) for the third step, generated by SageMath with: // @@ -516,170 +513,170 @@ const LogRR LOG2_TABLE = { // r = 2^-21 * round( 2^21 / (1 + i*2^(-21)) ); // s, m, e = RealField(128)(r).log2().sign_mantissa_exponent(); // print("{Sign::NEG," if (s == 1) else "{Sign::POS,", e, ", - // format_hex(m), "},"); + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_3 = */ { - {Sign::NEG, -142, 0xe6d3a96b'978fc16e'26f2c63c'0827ccbb_u128}, - {Sign::NEG, -142, 0xe3f107a9'fbfc50ca'4b56fe66'7c8ec091_u128}, - {Sign::NEG, -142, 0xe10e65d1'4b937265'647d7618'1aec10fc_u128}, - {Sign::NEG, -142, 0xde2bc3e1'8653b4f5'99e8f4d5'379eca79_u128}, - {Sign::NEG, -142, 0xdb4921da'ac3ba730'f07da899'90c20623_u128}, - {Sign::NEG, -142, 0xd8667fbc'bd49d7cd'4a812184'8531851a_u128}, - {Sign::NEG, -142, 0xd583dd87'b97cd580'679a4d85'4ae13619_u128}, - {Sign::NEG, -142, 0xd2a13b3b'a0d32eff'e4d17407'2487a514_u128}, - {Sign::NEG, -142, 0xcfbe98d8'734b7301'3c90319d'969b54be_u128}, - {Sign::NEG, -142, 0xccdbf65e'30e43039'c6a173b0'9ba301e6_u128}, - {Sign::NEG, -142, 0xc9f953cc'd99bf55e'b8317428'd7d8d06b_u128}, - {Sign::NEG, -142, 0xc716b124'6d715125'23cdb51b'cc2061cd_u128}, - {Sign::NEG, -142, 0xc4340e64'ec62d241'f964fc78'084fd515_u128}, - {Sign::NEG, -142, 0xc1516b8e'566f076a'06474fb1'5ccbb015_u128}, - {Sign::NEG, -142, 0xbe6ec8a0'ab947f51'f525ef6d'0b75b1c3_u128}, - {Sign::NEG, -142, 0xbb8c259b'ebd1c8ae'4e13532d'f7ee8da7_u128}, - {Sign::NEG, -142, 0xb8a98280'17257233'76832500'd72a9027_u128}, - {Sign::NEG, -142, 0xb5c6df4d'2d8e0a95'b14a3d28'5e592ba0_u128}, - {Sign::NEG, -142, 0xb2e43c03'2f0a2089'1e9e9dc9'711f6e20_u128}, - {Sign::NEG, -142, 0xb00198a2'1b9842c1'bc176e97'4f255fac_u128}, - {Sign::NEG, -142, 0xad1ef529'f336fff3'64acf87f'c0f648e6_u128}, - {Sign::NEG, -142, 0xaa3c519a'b5e4e6d1'd0b8a157'4433e1f8_u128}, - {Sign::NEG, -142, 0xa759adf4'63a08610'95f4e785'371c69a9_u128}, - {Sign::NEG, -142, 0xa4770a36'fc686c63'277d5db0'0363a46f_u128}, - {Sign::NEG, -142, 0xa1946662'803b287c'd5cea669'485ec36c_u128}, - {Sign::NEG, -142, 0x9eb1c276'ef174910'cec66fda'04833322_u128}, - {Sign::NEG, -142, 0x9bcf1e74'48fb5cd2'1da36f6e'be3851db_u128}, - {Sign::NEG, -142, 0x98ec7a5a'8de5f273'ab055d83'abfc0d82_u128}, - {Sign::NEG, -142, 0x9609d629'bdd598a8'3cecf110'dbda68e9_u128}, - {Sign::NEG, -142, 0x932731e1'd8c8de22'76bbdb56'5a37e84b_u128}, - {Sign::NEG, -142, 0x90448d82'debe5194'd934c388'57eee4f3_u128}, - {Sign::NEG, -142, 0x8d61e90c'cfb481b1'c27b427b'4fbfc7db_u128}, - {Sign::NEG, -142, 0x8a7f447f'aba9fd2b'6e13de50'2b142b39_u128}, - {Sign::NEG, -142, 0x879c9fdb'729d52b3'f4e40620'6614e2ba_u128}, - {Sign::NEG, -142, 0x84b9fb20'248d10fd'4d320daa'3312ea6c_u128}, - {Sign::NEG, -142, 0x81d7564d'c177c6b9'4aa528fc'9d433c1a_u128}, - {Sign::NEG, -143, 0xfde962c8'92b80533'3c8ad047'559b1622_u128}, - {Sign::NEG, -143, 0xf82418c7'7870a69f'acf765a8'fc5bcc31_u128}, - {Sign::NEG, -143, 0xf25ece98'34168f1a'be238832'edd27f20_u128}, - {Sign::NEG, -143, 0xec99843a'c5a6dc07'02644bfc'a329b708_u128}, - {Sign::NEG, -143, 0xe6d439af'2d1eaac6'c6d05a78'8e614744_u128}, - {Sign::NEG, -143, 0xe10eeef5'6a7b18bc'133fe9cc'57a8c1d0_u128}, - {Sign::NEG, -143, 0xdb49a40d'7db94348'aa4cb429'195fb5dd_u128}, - {Sign::NEG, -143, 0xd58458f7'66d647ce'0951ef23'9abbb959_u128}, - {Sign::NEG, -143, 0xcfbf0db3'25cf43ad'686c430c'89143d35_u128}, - {Sign::NEG, -143, 0xc9f9c240'baa15447'ba79c248'afd42c12_u128}, - {Sign::NEG, -143, 0xc43476a0'254996fd'ad19e0a9'2f115327_u128}, - {Sign::NEG, -143, 0xbe6f2ad1'65c5292f'a8ad6ac3'b0c99520_u128}, - {Sign::NEG, -143, 0xb8a9ded4'7c11283d'd0567d4a'9cc5e6a1_u128}, - {Sign::NEG, -143, 0xb2e492a9'682ab188'01f87c65'4b231443_u128}, - {Sign::NEG, -143, 0xad1f4650'2a0ee26d'd6380b08'358051bc_u128}, - {Sign::NEG, -143, 0xa759f9c8'c1bad84e'a07b024d'26d391f6_u128}, - {Sign::NEG, -143, 0xa194ad13'2f2bb089'6ee868cb'69e3a7d8_u128}, - {Sign::NEG, -143, 0x9bcf602f'725e887d'0a6869ef'f6682f73_u128}, - {Sign::NEG, -143, 0x960a131d'8b507d87'f6a44d55'9ccf3f61_u128}, - {Sign::NEG, -143, 0x9044c5dd'79fead08'72066e1d'30a8e210_u128}, - {Sign::NEG, -143, 0x8a7f786f'3e66345c'75ba3245'b1b856af_u128}, - {Sign::NEG, -143, 0x84ba2ad2'd88430e1'b5ac0204'73ab198f_u128}, - {Sign::NEG, -144, 0xfde9ba10'90ab7feb'41127e3a'88eb6741_u128}, - {Sign::NEG, -144, 0xf25f1e1f'1baffdea'bf807875'22aca1c4_u128}, - {Sign::NEG, -144, 0xe6d481d1'5210167b'af00688b'14fa3adc_u128}, - {Sign::NEG, -144, 0xdb49e527'33c60457'4d72837c'8ab4d1e5_u128}, - {Sign::NEG, -144, 0xcfbf4820'c0cc0236'4e38ac27'bb252090_u128}, - {Sign::NEG, -144, 0xc434aabd'f91c4ad0'da3661f9'292f59e8_u128}, - {Sign::NEG, -144, 0xb8aa0cfe'dcb118de'8fd0af9b'dfd21488_u128}, - {Sign::NEG, -144, 0xad1f6ee3'6b84a716'82ee19a9'abf0bfa5_u128}, - {Sign::NEG, -144, 0xa194d06b'a591302f'3cf68d5b'5369a251_u128}, - {Sign::NEG, -144, 0x960a3197'8ad0eede'bcd34f38'c977647e_u128}, - {Sign::NEG, -144, 0x8a7f9267'1b3e1dda'76eee9c9'605e2143_u128}, - {Sign::NEG, -145, 0xfde9e5b4'ada5efae'aa6a3887'f0c803ab_u128}, - {Sign::NEG, -145, 0xe6d4a5e2'7b136f13'6e25927e'582ac191_u128}, - {Sign::NEG, -145, 0xcfbf6557'9eb92f4a'e2ebcac2'f3a8e9eb_u128}, - {Sign::NEG, -145, 0xb8aa2414'188ba5bb'9d9acc22'd5690751_u128}, - {Sign::NEG, -145, 0xa194e217'e87f47cb'1e12604b'6d4132ef_u128}, - {Sign::NEG, -145, 0x8a7f9f63'0e888add'cf340d2a'cb9b92a9_u128}, - {Sign::NEG, -146, 0xe6d4b7eb'1537c8ae'0dc5e49f'bde3c520_u128}, - {Sign::NEG, -146, 0xb8aa2f9e'b95b9332'0c074c95'57c01188_u128}, - {Sign::NEG, -146, 0x8a7fa5e1'09656009'f0f82818'ff9b654f_u128}, - {Sign::NEG, -147, 0xb8aa3564'0a7c33eb'd4cd6120'78bbe9b0_u128}, - {Sign::NEG, -148, 0xb8aa3846'b33aaecf'f08cf68f'42e09fa0_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -148, 0xb8aa3e0c'0513f9b1'68bd0fac'df0ddaaf_u128}, - {Sign::POS, -147, 0xb8aa40ee'ae2ec9b3'192af653'dd41575b_u128}, - {Sign::POS, -146, 0x8a7fb2dd'018e4892'3b5c8984'2e540a51_u128}, - {Sign::POS, -146, 0xb8aa46b4'00c0bee3'34ad8ebd'd8b2750c_u128}, - {Sign::POS, -146, 0xe6d4dbfc'54c5dd1b'70b12bd6'98e5be74_u128}, - {Sign::POS, -145, 0x8a7fb95a'feda5c46'08c7e424'efbd90e1_u128}, - {Sign::POS, -145, 0xa1950570'7dd23344'31b8eba7'74a1de77_u128}, - {Sign::POS, -145, 0xb8aa523e'a755fe32'ee400e8c'68838733_u128}, - {Sign::POS, -145, 0xcfbf9fc5'7b7147be'0e71fa0b'5603bc2f_u128}, - {Sign::POS, -145, 0xe6d4ee04'fa2f9a92'7763c919'd8ac65f1_u128}, - {Sign::POS, -145, 0xfdea3cfd'239c815e'232b270b'b6046ec1_u128}, - {Sign::POS, -144, 0x8a7fc656'fbe1c368'106f3919'7e068972_u128}, - {Sign::POS, -144, 0x960a6e8b'bb581acc'4a4a6f40'12941bd9_u128}, - {Sign::POS, -144, 0xa195171c'd0370c34'5bb34c11'20b3e54b_u128}, - {Sign::POS, -144, 0xad1fc00a'3a845cf9'6bb67313'92a3147a_u128}, - {Sign::POS, -144, 0xb8aa6953'fa45d275'2be1268d'cee3c8fc_u128}, - {Sign::POS, -144, 0xc43512fa'0f813201'd84158d5'd50251a9_u128}, - {Sign::POS, -144, 0xcfbfbcfc'7a3c40fa'3765bda1'5d0ef0fa_u128}, - {Sign::POS, -144, 0xdb4a675b'3a7cc4b9'9a5ddb55'f9cc27d9_u128}, - {Sign::POS, -144, 0xe6d51216'5048829b'dcba1c59'3d918775_u128}, - {Sign::POS, -144, 0xf25fbd2d'bba53ffd'648be060'e1e30a95_u128}, - {Sign::POS, -144, 0xfdea68a1'7c98c23b'22658dc2'f1bcf6e8_u128}, - {Sign::POS, -143, 0x84ba8a38'c9946759'48ad5162'fb4a236e_u128}, - {Sign::POS, -143, 0x8a7fe04e'ffad9560'db7fe378'9405ce3a_u128}, - {Sign::POS, -143, 0x90453693'609acde3'91b56e2e'4f2e5ed8_u128}, - {Sign::POS, -143, 0x960a8d05'ec5ef390'f8998880'c3bb4d76_u128}, - {Sign::POS, -143, 0x9bcfe3a6'a2fce918'e2b87805'2f67efee_u128}, - {Sign::POS, -143, 0xa1953a75'8477912b'67df3991'93f707c0_u128}, - {Sign::POS, -143, 0xa75a9172'90d1ce78'e51b89e4'd5d095e1_u128}, - {Sign::POS, -143, 0xad1fe89d'c80e83b1'fcbbee4e'dbf9f47d_u128}, - {Sign::POS, -143, 0xb2e53ff7'2a309387'964fbd58'b168371b_u128}, - {Sign::POS, -143, 0xb8aa977e'b73ae0aa'dea7276c'a7acd135_u128}, - {Sign::POS, -143, 0xbe6fef34'6f304dcd'47d33f7e'7afc83a6_u128}, - {Sign::POS, -143, 0xc4354718'5213bda0'892603b3'77909123_u128}, - {Sign::POS, -143, 0xc9fa9f2a'5fe812d6'9f32660a'a06239fb_u128}, - {Sign::POS, -143, 0xcfbff76a'98b03021'cbcc5504'd7407f6c_u128}, - {Sign::POS, -143, 0xd5854fd8'fc6ef834'9608c44d'06402ebe_u128}, - {Sign::POS, -143, 0xdb4aa875'8b274dc1'ca3db560'4a863477_u128}, - {Sign::POS, -143, 0xe1100140'44dc137c'7a024036'206c37d6_u128}, - {Sign::POS, -143, 0xe6d55a39'29902c17'fc2e9be8'90ff7ee3_u128}, - {Sign::POS, -143, 0xec9ab360'39467a47'ecdc275c'60da1b53_u128}, - {Sign::POS, -143, 0xf2600cb5'7401e0c0'2d6571e9'4056607f_u128}, - {Sign::POS, -143, 0xf8256638'd9c54234'e4664401'fd1ca2a7_u128}, - {Sign::POS, -143, 0xfdeabfea'6a93815a'7dbba7dc'b50b3fd7_u128}, - {Sign::POS, -142, 0x81d80ce5'1337c072'd541f90d'853c794b_u128}, - {Sign::POS, -142, 0x84bab9ec'06ae11c5'b08f6539'2ce8b75b_u128}, - {Sign::POS, -142, 0x879d670a'0fae2600'6e969a29'f8462436_u128}, - {Sign::POS, -142, 0x8a80143f'2e396e7d'cfc8cbca'a2bf130c_u128}, - {Sign::POS, -142, 0x8d62c18b'62515c98'b737e48c'19421e68_u128}, - {Sign::POS, -142, 0x90456eee'abf761ac'2a9689b9'97c50c0b_u128}, - {Sign::POS, -142, 0x93281c69'0b2cef13'52381fcc'c774d66b_u128}, - {Sign::POS, -142, 0x960ac9fa'7ff37629'7910cec1'dd92dc10_u128}, - {Sign::POS, -142, 0x98ed77a3'0a4c684a'0cb5866b'baff34cb_u128}, - {Sign::POS, -142, 0x9bd02562'aa3936d0'9d5c02c8'0c702d11_u128}, - {Sign::POS, -142, 0x9eb2d339'5fbb5318'dddad053'6b56e775_u128}, - {Sign::POS, -142, 0xa1958127'2ad42e7e'a3a9505d'7f71247a_u128}, - {Sign::POS, -142, 0xa4782f2c'0b853a5d'e6dfbd5d'210830d7_u128}, - {Sign::POS, -142, 0xa75add48'01cfe812'c2372f44'7bdcfa45_u128}, - {Sign::POS, -142, 0xaa3d8b7b'0db5a8f9'73099fd5'32c14b05_u128}, - {Sign::POS, -142, 0xad2039c5'2f37ee6e'5951eef4'83de2c37_u128}, - {Sign::POS, -142, 0xb002e826'665829cd'f7abe6ff'6da76f1e_u128}, - {Sign::POS, -142, 0xb2e5969e'b317cc74'f354411e'd47c5d7b_u128}, - {Sign::POS, -142, 0xb5c8452e'157847c0'1428a99b'a8f5911f_u128}, - {Sign::POS, -142, 0xb8aaf3d4'8d7b0d0c'44a7c433'0edff2c8_u128}, - {Sign::POS, -142, 0xbb8da292'1b218db6'91f1306a'84e4e07b_u128}, - {Sign::POS, -142, 0xbe705166'be6d3b1c'2bc58de4'0cdf7b6a_u128}, - {Sign::POS, -142, 0xc1530052'775f869a'648680b2'54df1d99_u128}, - {Sign::POS, -142, 0xc435af55'45f9e18e'b136b5ac'e0d6f74d_u128}, - {Sign::POS, -142, 0xc7185e6f'2a3dbd56'a979e6c4'34fad480_u128}, - {Sign::POS, -142, 0xc9fb0da0'242c8b50'0794df56'00c90a5a_u128}, - {Sign::POS, -142, 0xccddbce8'33c7bcd8'a86d8081'4ac18cf1_u128}, - {Sign::POS, -142, 0xcfc06c47'5910c34e'8b8ac57a'9cca2d56_u128}, - {Sign::POS, -142, 0xd2a31bbd'9409100f'd314c7e0'3140001f_u128}, - {Sign::POS, -142, 0xd585cb4a'e4b2147a'c3d4c40e'20b5ec89_u128}, - {Sign::POS, -142, 0xd8687aef'4b0d41ed'c5351d72'9060644e_u128}, - {Sign::POS, -142, 0xdb4b2aaa'c71c09c7'614162e1'e12e445d_u128}, - {Sign::POS, -142, 0xde2dda7d'58dfdd66'44a652ea'df8ede85_u128}, - {Sign::POS, -142, 0xe1108a67'005a2e29'3eb1e02a'f3e52c3c_u128}, - {Sign::POS, -142, 0xe3f33a67'bd8c6d6f'415335a2'53a82aa2_u128}, - {Sign::POS, -142, 0xe6d5ea7f'90780c97'611abb08'33305fe1_u128}, + {Sign::NEG, -142, MType({0x26f2c63c0827ccbb, 0xe6d3a96b978fc16e})}, + {Sign::NEG, -142, MType({0x4b56fe667c8ec091, 0xe3f107a9fbfc50ca})}, + {Sign::NEG, -142, MType({0x647d76181aec10fc, 0xe10e65d14b937265})}, + {Sign::NEG, -142, MType({0x99e8f4d5379eca79, 0xde2bc3e18653b4f5})}, + {Sign::NEG, -142, MType({0xf07da89990c20623, 0xdb4921daac3ba730})}, + {Sign::NEG, -142, MType({0x4a8121848531851a, 0xd8667fbcbd49d7cd})}, + {Sign::NEG, -142, MType({0x679a4d854ae13619, 0xd583dd87b97cd580})}, + {Sign::NEG, -142, MType({0xe4d174072487a514, 0xd2a13b3ba0d32eff})}, + {Sign::NEG, -142, MType({0x3c90319d969b54be, 0xcfbe98d8734b7301})}, + {Sign::NEG, -142, MType({0xc6a173b09ba301e6, 0xccdbf65e30e43039})}, + {Sign::NEG, -142, MType({0xb8317428d7d8d06b, 0xc9f953ccd99bf55e})}, + {Sign::NEG, -142, MType({0x23cdb51bcc2061cd, 0xc716b1246d715125})}, + {Sign::NEG, -142, MType({0xf964fc78084fd515, 0xc4340e64ec62d241})}, + {Sign::NEG, -142, MType({0x6474fb15ccbb015, 0xc1516b8e566f076a})}, + {Sign::NEG, -142, MType({0xf525ef6d0b75b1c3, 0xbe6ec8a0ab947f51})}, + {Sign::NEG, -142, MType({0x4e13532df7ee8da7, 0xbb8c259bebd1c8ae})}, + {Sign::NEG, -142, MType({0x76832500d72a9027, 0xb8a9828017257233})}, + {Sign::NEG, -142, MType({0xb14a3d285e592ba0, 0xb5c6df4d2d8e0a95})}, + {Sign::NEG, -142, MType({0x1e9e9dc9711f6e20, 0xb2e43c032f0a2089})}, + {Sign::NEG, -142, MType({0xbc176e974f255fac, 0xb00198a21b9842c1})}, + {Sign::NEG, -142, MType({0x64acf87fc0f648e6, 0xad1ef529f336fff3})}, + {Sign::NEG, -142, MType({0xd0b8a1574433e1f8, 0xaa3c519ab5e4e6d1})}, + {Sign::NEG, -142, MType({0x95f4e785371c69a9, 0xa759adf463a08610})}, + {Sign::NEG, -142, MType({0x277d5db00363a46f, 0xa4770a36fc686c63})}, + {Sign::NEG, -142, MType({0xd5cea669485ec36c, 0xa1946662803b287c})}, + {Sign::NEG, -142, MType({0xcec66fda04833322, 0x9eb1c276ef174910})}, + {Sign::NEG, -142, MType({0x1da36f6ebe3851db, 0x9bcf1e7448fb5cd2})}, + {Sign::NEG, -142, MType({0xab055d83abfc0d82, 0x98ec7a5a8de5f273})}, + {Sign::NEG, -142, MType({0x3cecf110dbda68e9, 0x9609d629bdd598a8})}, + {Sign::NEG, -142, MType({0x76bbdb565a37e84b, 0x932731e1d8c8de22})}, + {Sign::NEG, -142, MType({0xd934c38857eee4f3, 0x90448d82debe5194})}, + {Sign::NEG, -142, MType({0xc27b427b4fbfc7db, 0x8d61e90ccfb481b1})}, + {Sign::NEG, -142, MType({0x6e13de502b142b39, 0x8a7f447faba9fd2b})}, + {Sign::NEG, -142, MType({0xf4e406206614e2ba, 0x879c9fdb729d52b3})}, + {Sign::NEG, -142, MType({0x4d320daa3312ea6c, 0x84b9fb20248d10fd})}, + {Sign::NEG, -142, MType({0x4aa528fc9d433c1a, 0x81d7564dc177c6b9})}, + {Sign::NEG, -143, MType({0x3c8ad047559b1622, 0xfde962c892b80533})}, + {Sign::NEG, -143, MType({0xacf765a8fc5bcc31, 0xf82418c77870a69f})}, + {Sign::NEG, -143, MType({0xbe238832edd27f20, 0xf25ece9834168f1a})}, + {Sign::NEG, -143, MType({0x2644bfca329b708, 0xec99843ac5a6dc07})}, + {Sign::NEG, -143, MType({0xc6d05a788e614744, 0xe6d439af2d1eaac6})}, + {Sign::NEG, -143, MType({0x133fe9cc57a8c1d0, 0xe10eeef56a7b18bc})}, + {Sign::NEG, -143, MType({0xaa4cb429195fb5dd, 0xdb49a40d7db94348})}, + {Sign::NEG, -143, MType({0x951ef239abbb959, 0xd58458f766d647ce})}, + {Sign::NEG, -143, MType({0x686c430c89143d35, 0xcfbf0db325cf43ad})}, + {Sign::NEG, -143, MType({0xba79c248afd42c12, 0xc9f9c240baa15447})}, + {Sign::NEG, -143, MType({0xad19e0a92f115327, 0xc43476a0254996fd})}, + {Sign::NEG, -143, MType({0xa8ad6ac3b0c99520, 0xbe6f2ad165c5292f})}, + {Sign::NEG, -143, MType({0xd0567d4a9cc5e6a1, 0xb8a9ded47c11283d})}, + {Sign::NEG, -143, MType({0x1f87c654b231443, 0xb2e492a9682ab188})}, + {Sign::NEG, -143, MType({0xd6380b08358051bc, 0xad1f46502a0ee26d})}, + {Sign::NEG, -143, MType({0xa07b024d26d391f6, 0xa759f9c8c1bad84e})}, + {Sign::NEG, -143, MType({0x6ee868cb69e3a7d8, 0xa194ad132f2bb089})}, + {Sign::NEG, -143, MType({0xa6869eff6682f73, 0x9bcf602f725e887d})}, + {Sign::NEG, -143, MType({0xf6a44d559ccf3f61, 0x960a131d8b507d87})}, + {Sign::NEG, -143, MType({0x72066e1d30a8e210, 0x9044c5dd79fead08})}, + {Sign::NEG, -143, MType({0x75ba3245b1b856af, 0x8a7f786f3e66345c})}, + {Sign::NEG, -143, MType({0xb5ac020473ab198f, 0x84ba2ad2d88430e1})}, + {Sign::NEG, -144, MType({0x41127e3a88eb6741, 0xfde9ba1090ab7feb})}, + {Sign::NEG, -144, MType({0xbf80787522aca1c4, 0xf25f1e1f1baffdea})}, + {Sign::NEG, -144, MType({0xaf00688b14fa3adc, 0xe6d481d15210167b})}, + {Sign::NEG, -144, MType({0x4d72837c8ab4d1e5, 0xdb49e52733c60457})}, + {Sign::NEG, -144, MType({0x4e38ac27bb252090, 0xcfbf4820c0cc0236})}, + {Sign::NEG, -144, MType({0xda3661f9292f59e8, 0xc434aabdf91c4ad0})}, + {Sign::NEG, -144, MType({0x8fd0af9bdfd21488, 0xb8aa0cfedcb118de})}, + {Sign::NEG, -144, MType({0x82ee19a9abf0bfa5, 0xad1f6ee36b84a716})}, + {Sign::NEG, -144, MType({0x3cf68d5b5369a251, 0xa194d06ba591302f})}, + {Sign::NEG, -144, MType({0xbcd34f38c977647e, 0x960a31978ad0eede})}, + {Sign::NEG, -144, MType({0x76eee9c9605e2143, 0x8a7f92671b3e1dda})}, + {Sign::NEG, -145, MType({0xaa6a3887f0c803ab, 0xfde9e5b4ada5efae})}, + {Sign::NEG, -145, MType({0x6e25927e582ac191, 0xe6d4a5e27b136f13})}, + {Sign::NEG, -145, MType({0xe2ebcac2f3a8e9eb, 0xcfbf65579eb92f4a})}, + {Sign::NEG, -145, MType({0x9d9acc22d5690751, 0xb8aa2414188ba5bb})}, + {Sign::NEG, -145, MType({0x1e12604b6d4132ef, 0xa194e217e87f47cb})}, + {Sign::NEG, -145, MType({0xcf340d2acb9b92a9, 0x8a7f9f630e888add})}, + {Sign::NEG, -146, MType({0xdc5e49fbde3c520, 0xe6d4b7eb1537c8ae})}, + {Sign::NEG, -146, MType({0xc074c9557c01188, 0xb8aa2f9eb95b9332})}, + {Sign::NEG, -146, MType({0xf0f82818ff9b654f, 0x8a7fa5e109656009})}, + {Sign::NEG, -147, MType({0xd4cd612078bbe9b0, 0xb8aa35640a7c33eb})}, + {Sign::NEG, -148, MType({0xf08cf68f42e09fa0, 0xb8aa3846b33aaecf})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -148, MType({0x68bd0facdf0ddaaf, 0xb8aa3e0c0513f9b1})}, + {Sign::POS, -147, MType({0x192af653dd41575b, 0xb8aa40eeae2ec9b3})}, + {Sign::POS, -146, MType({0x3b5c89842e540a51, 0x8a7fb2dd018e4892})}, + {Sign::POS, -146, MType({0x34ad8ebdd8b2750c, 0xb8aa46b400c0bee3})}, + {Sign::POS, -146, MType({0x70b12bd698e5be74, 0xe6d4dbfc54c5dd1b})}, + {Sign::POS, -145, MType({0x8c7e424efbd90e1, 0x8a7fb95afeda5c46})}, + {Sign::POS, -145, MType({0x31b8eba774a1de77, 0xa19505707dd23344})}, + {Sign::POS, -145, MType({0xee400e8c68838733, 0xb8aa523ea755fe32})}, + {Sign::POS, -145, MType({0xe71fa0b5603bc2f, 0xcfbf9fc57b7147be})}, + {Sign::POS, -145, MType({0x7763c919d8ac65f1, 0xe6d4ee04fa2f9a92})}, + {Sign::POS, -145, MType({0x232b270bb6046ec1, 0xfdea3cfd239c815e})}, + {Sign::POS, -144, MType({0x106f39197e068972, 0x8a7fc656fbe1c368})}, + {Sign::POS, -144, MType({0x4a4a6f4012941bd9, 0x960a6e8bbb581acc})}, + {Sign::POS, -144, MType({0x5bb34c1120b3e54b, 0xa195171cd0370c34})}, + {Sign::POS, -144, MType({0x6bb6731392a3147a, 0xad1fc00a3a845cf9})}, + {Sign::POS, -144, MType({0x2be1268dcee3c8fc, 0xb8aa6953fa45d275})}, + {Sign::POS, -144, MType({0xd84158d5d50251a9, 0xc43512fa0f813201})}, + {Sign::POS, -144, MType({0x3765bda15d0ef0fa, 0xcfbfbcfc7a3c40fa})}, + {Sign::POS, -144, MType({0x9a5ddb55f9cc27d9, 0xdb4a675b3a7cc4b9})}, + {Sign::POS, -144, MType({0xdcba1c593d918775, 0xe6d512165048829b})}, + {Sign::POS, -144, MType({0x648be060e1e30a95, 0xf25fbd2dbba53ffd})}, + {Sign::POS, -144, MType({0x22658dc2f1bcf6e8, 0xfdea68a17c98c23b})}, + {Sign::POS, -143, MType({0x48ad5162fb4a236e, 0x84ba8a38c9946759})}, + {Sign::POS, -143, MType({0xdb7fe3789405ce3a, 0x8a7fe04effad9560})}, + {Sign::POS, -143, MType({0x91b56e2e4f2e5ed8, 0x90453693609acde3})}, + {Sign::POS, -143, MType({0xf8998880c3bb4d76, 0x960a8d05ec5ef390})}, + {Sign::POS, -143, MType({0xe2b878052f67efee, 0x9bcfe3a6a2fce918})}, + {Sign::POS, -143, MType({0x67df399193f707c0, 0xa1953a758477912b})}, + {Sign::POS, -143, MType({0xe51b89e4d5d095e1, 0xa75a917290d1ce78})}, + {Sign::POS, -143, MType({0xfcbbee4edbf9f47d, 0xad1fe89dc80e83b1})}, + {Sign::POS, -143, MType({0x964fbd58b168371b, 0xb2e53ff72a309387})}, + {Sign::POS, -143, MType({0xdea7276ca7acd135, 0xb8aa977eb73ae0aa})}, + {Sign::POS, -143, MType({0x47d33f7e7afc83a6, 0xbe6fef346f304dcd})}, + {Sign::POS, -143, MType({0x892603b377909123, 0xc43547185213bda0})}, + {Sign::POS, -143, MType({0x9f32660aa06239fb, 0xc9fa9f2a5fe812d6})}, + {Sign::POS, -143, MType({0xcbcc5504d7407f6c, 0xcfbff76a98b03021})}, + {Sign::POS, -143, MType({0x9608c44d06402ebe, 0xd5854fd8fc6ef834})}, + {Sign::POS, -143, MType({0xca3db5604a863477, 0xdb4aa8758b274dc1})}, + {Sign::POS, -143, MType({0x7a024036206c37d6, 0xe110014044dc137c})}, + {Sign::POS, -143, MType({0xfc2e9be890ff7ee3, 0xe6d55a3929902c17})}, + {Sign::POS, -143, MType({0xecdc275c60da1b53, 0xec9ab36039467a47})}, + {Sign::POS, -143, MType({0x2d6571e94056607f, 0xf2600cb57401e0c0})}, + {Sign::POS, -143, MType({0xe4664401fd1ca2a7, 0xf8256638d9c54234})}, + {Sign::POS, -143, MType({0x7dbba7dcb50b3fd7, 0xfdeabfea6a93815a})}, + {Sign::POS, -142, MType({0xd541f90d853c794b, 0x81d80ce51337c072})}, + {Sign::POS, -142, MType({0xb08f65392ce8b75b, 0x84bab9ec06ae11c5})}, + {Sign::POS, -142, MType({0x6e969a29f8462436, 0x879d670a0fae2600})}, + {Sign::POS, -142, MType({0xcfc8cbcaa2bf130c, 0x8a80143f2e396e7d})}, + {Sign::POS, -142, MType({0xb737e48c19421e68, 0x8d62c18b62515c98})}, + {Sign::POS, -142, MType({0x2a9689b997c50c0b, 0x90456eeeabf761ac})}, + {Sign::POS, -142, MType({0x52381fccc774d66b, 0x93281c690b2cef13})}, + {Sign::POS, -142, MType({0x7910cec1dd92dc10, 0x960ac9fa7ff37629})}, + {Sign::POS, -142, MType({0xcb5866bbaff34cb, 0x98ed77a30a4c684a})}, + {Sign::POS, -142, MType({0x9d5c02c80c702d11, 0x9bd02562aa3936d0})}, + {Sign::POS, -142, MType({0xdddad0536b56e775, 0x9eb2d3395fbb5318})}, + {Sign::POS, -142, MType({0xa3a9505d7f71247a, 0xa19581272ad42e7e})}, + {Sign::POS, -142, MType({0xe6dfbd5d210830d7, 0xa4782f2c0b853a5d})}, + {Sign::POS, -142, MType({0xc2372f447bdcfa45, 0xa75add4801cfe812})}, + {Sign::POS, -142, MType({0x73099fd532c14b05, 0xaa3d8b7b0db5a8f9})}, + {Sign::POS, -142, MType({0x5951eef483de2c37, 0xad2039c52f37ee6e})}, + {Sign::POS, -142, MType({0xf7abe6ff6da76f1e, 0xb002e826665829cd})}, + {Sign::POS, -142, MType({0xf354411ed47c5d7b, 0xb2e5969eb317cc74})}, + {Sign::POS, -142, MType({0x1428a99ba8f5911f, 0xb5c8452e157847c0})}, + {Sign::POS, -142, MType({0x44a7c4330edff2c8, 0xb8aaf3d48d7b0d0c})}, + {Sign::POS, -142, MType({0x91f1306a84e4e07b, 0xbb8da2921b218db6})}, + {Sign::POS, -142, MType({0x2bc58de40cdf7b6a, 0xbe705166be6d3b1c})}, + {Sign::POS, -142, MType({0x648680b254df1d99, 0xc1530052775f869a})}, + {Sign::POS, -142, MType({0xb136b5ace0d6f74d, 0xc435af5545f9e18e})}, + {Sign::POS, -142, MType({0xa979e6c434fad480, 0xc7185e6f2a3dbd56})}, + {Sign::POS, -142, MType({0x794df5600c90a5a, 0xc9fb0da0242c8b50})}, + {Sign::POS, -142, MType({0xa86d80814ac18cf1, 0xccddbce833c7bcd8})}, + {Sign::POS, -142, MType({0x8b8ac57a9cca2d56, 0xcfc06c475910c34e})}, + {Sign::POS, -142, MType({0xd314c7e03140001f, 0xd2a31bbd9409100f})}, + {Sign::POS, -142, MType({0xc3d4c40e20b5ec89, 0xd585cb4ae4b2147a})}, + {Sign::POS, -142, MType({0xc5351d729060644e, 0xd8687aef4b0d41ed})}, + {Sign::POS, -142, MType({0x614162e1e12e445d, 0xdb4b2aaac71c09c7})}, + {Sign::POS, -142, MType({0x44a652eadf8ede85, 0xde2dda7d58dfdd66})}, + {Sign::POS, -142, MType({0x3eb1e02af3e52c3c, 0xe1108a67005a2e29})}, + {Sign::POS, -142, MType({0x415335a253a82aa2, 0xe3f33a67bd8c6d6f})}, + {Sign::POS, -142, MType({0x611abb0833305fe1, 0xe6d5ea7f90780c97})}, }, // -log2(r) for the fourth step, generated by SageMath with: // @@ -687,139 +684,139 @@ const LogRR LOG2_TABLE = { // r = 2^-28 * round( 2^28 / (1 + i*2^(-28)) ); // s, m, e = RealField(128)(r).log2().sign_mantissa_exponent(); // print("{Sign::NEG," if (s == 1) else "{Sign::POS,", e, ", - // format_hex(m), "},"); + // MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), "})},"); /* .step_4 = */ { - {Sign::NEG, -149, 0xbb8ce299'0b5d0b90'ef1bffe5'65ce0a46_u128}, - {Sign::NEG, -149, 0xb8aa39b8'07a576e4'bea32445'60ca3d99_u128}, - {Sign::NEG, -149, 0xb5c790d6'd5c354df'8b91f71c'eefa31a2_u128}, - {Sign::NEG, -149, 0xb2e4e7f5'75b6a57b'9096e3d6'84001c0e_u128}, - {Sign::NEG, -149, 0xb0023f13'e77f68b3'086054c7'94367f36_u128}, - {Sign::NEG, -149, 0xad1f9632'2b1d9e80'2d9cb330'94afe4de_u128}, - {Sign::NEG, -149, 0xaa3ced50'409146dd'3afa673c'fb3698f3_u128}, - {Sign::NEG, -149, 0xa75a446e'27da61c4'6b27d803'3e4c6450_u128}, - {Sign::NEG, -149, 0xa4779b8b'e0f8ef2f'f8d36b84'd52a477b_u128}, - {Sign::NEG, -149, 0xa194f2a9'6becef1a'1eab86ae'37c03565_u128}, - {Sign::NEG, -149, 0x9eb249c6'c8b6617d'175e8d56'deb4ce2c_u128}, - {Sign::NEG, -149, 0x9bcfa0e3'f7554653'1d9ae241'436519da_u128}, - {Sign::NEG, -149, 0x98ecf800'f7c99d96'6c0ee71a'dfe44325_u128}, - {Sign::NEG, -149, 0x960a4f1d'ca136741'3d68fc7c'2efb522f_u128}, - {Sign::NEG, -149, 0x9327a63a'6e32a34d'cc5781e8'ac28e749_u128}, - {Sign::NEG, -149, 0x9044fd56'e42751b6'5388d5ce'd3a0f5af_u128}, - {Sign::NEG, -149, 0x8d625473'2bf17275'0dab5588'224c7e4a_u128}, - {Sign::NEG, -149, 0x8a7fab8f'45910584'356d5d59'15c94a70_u128}, - {Sign::NEG, -149, 0x879d02ab'31060ade'057d4871'2c69a6a7_u128}, - {Sign::NEG, -149, 0x84ba59c6'ee50827c'b88970ea'e5341d60_u128}, - {Sign::NEG, -149, 0x81d7b0e2'7d706c5a'89402fcb'bfe331bb_u128}, - {Sign::NEG, -150, 0xfdea0ffb'bccb90e3'649fba08'79ca348b_u128}, - {Sign::NEG, -150, 0xf824be32'22612d78'dccd9edf'bab6f777_u128}, - {Sign::NEG, -150, 0xf25f6c68'2ba1ae69'f066b9aa'4636478e_u128}, - {Sign::NEG, -150, 0xec9a1a9d'd88d13ab'14c7b3cb'21578781_u128}, - {Sign::NEG, -150, 0xe6d4c8d3'29235d30'bf4d347b'528f56e1_u128}, - {Sign::NEG, -150, 0xe10f7708'1d648aef'6553e0c9'e1b70799_u128}, - {Sign::NEG, -150, 0xdb4a253c'b5509cdb'7c385b9b'd80c1375_u128}, - {Sign::NEG, -150, 0xd584d370'f0e792e9'795745ac'402f919d_u128}, - {Sign::NEG, -150, 0xcfbf81a4'd0296d0d'd20d3d8c'2625ac1b_u128}, - {Sign::NEG, -150, 0xc9fa2fd8'53162b3c'fbb6dfa2'97551554_u128}, - {Sign::NEG, -150, 0xc434de0b'79adcd6b'6bb0c62c'a2867d91_u128}, - {Sign::NEG, -150, 0xbe6f8c3e'43f0538d'9757893d'57e40877_u128}, - {Sign::NEG, -150, 0xb8aa3a70'b1ddbd97'f407bebd'c8f8c28e_u128}, - {Sign::NEG, -150, 0xb2e4e8a2'c3760b7e'f71dfa6d'08b016be_u128}, - {Sign::NEG, -150, 0xad1f96d4'78b93d37'15f6cde0'2b5543ce_u128}, - {Sign::NEG, -150, 0xa75a4505'd1a752b4'c5eec882'4692d1e9_u128}, - {Sign::NEG, -150, 0xa194f336'ce404bec'7c627794'7172081a_u128}, - {Sign::NEG, -150, 0x9bcfa167'6e8428d2'aeae662d'c45a61ce_u128}, - {Sign::NEG, -150, 0x960a4f97'b272e95b'd22f1d3b'59110455_u128}, - {Sign::NEG, -150, 0x9044fdc7'9a0c8d7c'5c412380'4ab83462_u128}, - {Sign::NEG, -150, 0x8a7fabf7'25511528'c240fd95'b5cecb89_u128}, - {Sign::NEG, -150, 0x84ba5a26'54408055'798b2dea'b82fadc4_u128}, - {Sign::NEG, -151, 0xfdea10aa'4db59ded'eef86988'e2227ddb_u128}, - {Sign::NEG, -151, 0xf25f6d07'3a400203'62e1207c'0209b090_u128}, - {Sign::NEG, -151, 0xe6d4c963'6e202cd4'39897891'13ec7bee_u128}, - {Sign::NEG, -151, 0xdb4a25be'e9561e49'5daa6556'5e562909_u128}, - {Sign::NEG, -151, 0xcfbf8219'abe1d64b'b9fcd606'2a84acbd_u128}, - {Sign::NEG, -151, 0xc434de73'b5c354c4'3939b586'c46792b3_u128}, - {Sign::NEG, -151, 0xb8aa3acd'06fa999b'c619ea6a'7a9ee85e_u128}, - {Sign::NEG, -151, 0xad1f9725'9f87a4bb'4b5656ef'9e7a27fd_u128}, - {Sign::NEG, -151, 0xa194f37d'7f6a760b'b3a7d900'83f7239c_u128}, - {Sign::NEG, -151, 0x960a4fd4'a6a30d75'e9c74a33'81c0f016_u128}, - {Sign::NEG, -151, 0x8a7fac2b'15316ae2'd86d7fca'f12ed012_u128}, - {Sign::NEG, -152, 0xfdea1101'962b1c76'd4a6956a'5c863e0f_u128}, - {Sign::NEG, -152, 0xe6d4c9ab'909eeed1'1462ef19'2f547877_u128}, - {Sign::NEG, -152, 0xcfbf8254'19be4ca6'45819d2f'1d72eb8b_u128}, - {Sign::NEG, -152, 0xb8aa3afb'318935c8'3d742790'eedbe719_u128}, - {Sign::NEG, -152, 0xa194f3a0'd7ffaa08'd1ac0d7b'70d74492_u128}, - {Sign::NEG, -152, 0x8a7fac45'0d21a939'd79ac583'75f83d0c_u128}, - {Sign::NEG, -153, 0xe6d4c9cf'a1de665a'49637b2b'ac367e87_u128}, - {Sign::NEG, -153, 0xb8aa3b12'46d08f69'1cc4b5ee'dcc78b35_u128}, - {Sign::NEG, -153, 0x8a7fac52'0919cd43'd43bf48a'42745836_u128}, - {Sign::NEG, -154, 0xb8aa3b1d'd1743f1c'3557bdcf'592619eb_u128}, - {Sign::NEG, -155, 0xb8aa3b23'96c617ae'6bdc2e83'd3ebb0c4_u128}, - {Sign::POS, 0, 0_u128}, - {Sign::POS, -155, 0xb8aa3b2f'2169ca44'2d5b4005'0e44e8ab_u128}, - {Sign::POS, -154, 0xb8aa3b34'e6bba447'b8560371'b8f04afe_u128}, - {Sign::POS, -153, 0x8a7fac6c'010a1f14'c79a43cc'c70459cc_u128}, - {Sign::POS, -153, 0xb8aa3b40'715f59c0'22c25632'f519f77f_u128}, - {Sign::POS, -153, 0xe6d4ca17'c45d8282'42c10a31'4e35fb9e_u128}, - {Sign::POS, -152, 0x8a7fac78'fd024cdb'be5a212e'd7b949e4_u128}, - {Sign::POS, -152, 0xa194f3e7'892a4fde'12dcf94e'f5c5b918_u128}, - {Sign::POS, -152, 0xb8aa3b57'86a6ca76'49781013'e57110ce_u128}, - {Sign::POS, -152, 0xcfbf82c8'f577bcd2'8cba70c0'85c12cb3_u128}, - {Sign::POS, -152, 0xe6d4ca3b'd59d2721'07332f3f'b09328b8_u128}, - {Sign::POS, -152, 0xfdea11b0'2717098f'e3716824'3a9d8b14_u128}, - {Sign::POS, -151, 0x8a7fac92'f4f2b226'a6022054'79b93722_u128}, - {Sign::POS, -151, 0x960a504e'8f041bc3'b5bd7358'52c0d583_u128}, - {Sign::POS, -151, 0xa194f40a'e1bfc1b6'36324863'0b0d812d_u128}, - {Sign::POS, -151, 0xad1f97c7'ed25a415'3ca83f0e'02b823c0_u128}, - {Sign::POS, -151, 0xb8aa3b85'b135c2f7'de66fb46'974bc4fd_u128}, - {Sign::POS, -151, 0xc434df44'2df01e75'30b6254e'23c69fc2_u128}, - {Sign::POS, -151, 0xcfbf8303'6354b6a4'48dd69ba'009b370c_u128}, - {Sign::POS, -151, 0xdb4a26c3'51638b9c'3c247973'83b16af5_u128}, - {Sign::POS, -151, 0xe6d4ca83'f81c9d74'1fd309b8'00678db7_u128}, - {Sign::POS, -151, 0xf25f6e45'577fec43'0930d418'c79378a3_u128}, - {Sign::POS, -151, 0xfdea1207'6f8d7820'0d85967b'2783a12c_u128}, - {Sign::POS, -150, 0x84ba5ae5'2022a091'210c898c'360016ed_u128}, - {Sign::POS, -150, 0x8a7facc6'e4d3a3b0'5e19883e'ef2605ab_u128}, - {Sign::POS, -150, 0x9044fea9'05d9c579'488dacc6'629300ae_u128}, - {Sign::POS, -150, 0x960a508b'833505f7'6b0cdebd'3264e3e3_u128}, - {Sign::POS, -150, 0x9bcfa26e'5ce56536'503b07e7'ff788dc2_u128}, - {Sign::POS, -150, 0xa194f451'92eae341'82bc1435'696a69d1_u128}, - {Sign::POS, -150, 0xa75a4635'25458024'8d33f1be'0e96fb1f_u128}, - {Sign::POS, -150, 0xad1f9819'13f53bea'fa4690c4'8c1b66c9_u128}, - {Sign::POS, -150, 0xb2e4e9fd'5efa16a0'5497e3b5'7dd5fe75_u128}, - {Sign::POS, -150, 0xb8aa3be2'06541050'26cbdf27'7e66cad5_u128}, - {Sign::POS, -150, 0xbe6f8dc7'0a032905'fb8679db'27301625_u128}, - {Sign::POS, -150, 0xc434dfac'6a0760cd'5d6bacbb'1056f6aa_u128}, - {Sign::POS, -150, 0xc9fa3192'2660b7b1'd71f72db'd0c3d936_u128}, - {Sign::POS, -150, 0xcfbf8378'3f0f2dbe'f345c97b'fe230ba2_u128}, - {Sign::POS, -150, 0xd584d55e'b412c300'3c82b004'2ce54751_u128}, - {Sign::POS, -150, 0xdb4a2745'856b7781'3d7a2806'f0403bae_u128}, - {Sign::POS, -150, 0xe10f792c'b3194b4d'80d03540'da2f18ae_u128}, - {Sign::POS, -150, 0xe6d4cb14'3d1c3e70'9128dd98'7b73194f_u128}, - {Sign::POS, -150, 0xec9a1cfc'237450f5'f928291e'63940e14_u128}, - {Sign::POS, -150, 0xf25f6ee4'662182e9'4372220d'20e0e78a_u128}, - {Sign::POS, -150, 0xf824c0cd'0523d455'faaad4c9'407040c7_u128}, - {Sign::POS, -150, 0xfdea12b6'007b4547'a9764fe1'4e20e9e4_u128}, - {Sign::POS, -149, 0x81d7b24f'ac13eae4'ed3c5206'ea4d3942_u128}, - {Sign::POS, -149, 0x84ba5b44'8614c2f4'0c2af218'aea6da27_u128}, - {Sign::POS, -149, 0x879d0439'8e402ad6'f6d912ac'383aaeba_u128}, - {Sign::POS, -149, 0x8a7fad2e'c4962293'7298bf5c'ca8b3d95_u128}, - {Sign::POS, -149, 0x8d625624'2916aa2f'44bc04da'a8808214_u128}, - {Sign::POS, -149, 0x9044ff19'bbc1c1b0'3294f0eb'14683198_u128}, - {Sign::POS, -149, 0x9327a80f'7c97691c'01759268'4ff600c3_u128}, - {Sign::POS, -149, 0x960a5105'6b97a078'76aff941'9c43e8b9_u128}, - {Sign::POS, -149, 0x98ecf9fb'88c267cb'5796367b'39d26c63_u128}, - {Sign::POS, -149, 0x9bcfa2f1'd417bf1a'697a5c2e'6888ddaa_u128}, - {Sign::POS, -149, 0x9eb24be8'4d97a66b'71ae7d89'67b5a2b7_u128}, - {Sign::POS, -149, 0xa194f4de'f5421dc4'3584aecf'760e7b39_u128}, - {Sign::POS, -149, 0xa4779dd5'cb17252a'7a4f0558'd1b0c59e_u128}, - {Sign::POS, -149, 0xa75a46cc'cf16bca4'055f9792'b821c455_u128}, - {Sign::POS, -149, 0xaa3cefc4'0140e436'9c087cff'664ee311_u128}, - {Sign::POS, -149, 0xad1f98bb'61959be8'039bce36'188dfc04_u128}, - {Sign::POS, -149, 0xb00241b2'f014e3be'016ba4e3'0a9d9d21_u128}, - {Sign::POS, -149, 0xb2e4eaaa'acbebbbe'5aca1bc7'77a54d5e_u128}, - {Sign::POS, -149, 0xb5c793a2'979323ee'd5094eb9'9a35d1f0_u128}, - {Sign::POS, -149, 0xb8aa3c9a'b0921c55'357b5aa4'ac49738d_u128}, + {Sign::NEG, -149, MType({0xef1bffe565ce0a46, 0xbb8ce2990b5d0b90})}, + {Sign::NEG, -149, MType({0xbea3244560ca3d99, 0xb8aa39b807a576e4})}, + {Sign::NEG, -149, MType({0x8b91f71ceefa31a2, 0xb5c790d6d5c354df})}, + {Sign::NEG, -149, MType({0x9096e3d684001c0e, 0xb2e4e7f575b6a57b})}, + {Sign::NEG, -149, MType({0x86054c794367f36, 0xb0023f13e77f68b3})}, + {Sign::NEG, -149, MType({0x2d9cb33094afe4de, 0xad1f96322b1d9e80})}, + {Sign::NEG, -149, MType({0x3afa673cfb3698f3, 0xaa3ced50409146dd})}, + {Sign::NEG, -149, MType({0x6b27d8033e4c6450, 0xa75a446e27da61c4})}, + {Sign::NEG, -149, MType({0xf8d36b84d52a477b, 0xa4779b8be0f8ef2f})}, + {Sign::NEG, -149, MType({0x1eab86ae37c03565, 0xa194f2a96becef1a})}, + {Sign::NEG, -149, MType({0x175e8d56deb4ce2c, 0x9eb249c6c8b6617d})}, + {Sign::NEG, -149, MType({0x1d9ae241436519da, 0x9bcfa0e3f7554653})}, + {Sign::NEG, -149, MType({0x6c0ee71adfe44325, 0x98ecf800f7c99d96})}, + {Sign::NEG, -149, MType({0x3d68fc7c2efb522f, 0x960a4f1dca136741})}, + {Sign::NEG, -149, MType({0xcc5781e8ac28e749, 0x9327a63a6e32a34d})}, + {Sign::NEG, -149, MType({0x5388d5ced3a0f5af, 0x9044fd56e42751b6})}, + {Sign::NEG, -149, MType({0xdab5588224c7e4a, 0x8d6254732bf17275})}, + {Sign::NEG, -149, MType({0x356d5d5915c94a70, 0x8a7fab8f45910584})}, + {Sign::NEG, -149, MType({0x57d48712c69a6a7, 0x879d02ab31060ade})}, + {Sign::NEG, -149, MType({0xb88970eae5341d60, 0x84ba59c6ee50827c})}, + {Sign::NEG, -149, MType({0x89402fcbbfe331bb, 0x81d7b0e27d706c5a})}, + {Sign::NEG, -150, MType({0x649fba0879ca348b, 0xfdea0ffbbccb90e3})}, + {Sign::NEG, -150, MType({0xdccd9edfbab6f777, 0xf824be3222612d78})}, + {Sign::NEG, -150, MType({0xf066b9aa4636478e, 0xf25f6c682ba1ae69})}, + {Sign::NEG, -150, MType({0x14c7b3cb21578781, 0xec9a1a9dd88d13ab})}, + {Sign::NEG, -150, MType({0xbf4d347b528f56e1, 0xe6d4c8d329235d30})}, + {Sign::NEG, -150, MType({0x6553e0c9e1b70799, 0xe10f77081d648aef})}, + {Sign::NEG, -150, MType({0x7c385b9bd80c1375, 0xdb4a253cb5509cdb})}, + {Sign::NEG, -150, MType({0x795745ac402f919d, 0xd584d370f0e792e9})}, + {Sign::NEG, -150, MType({0xd20d3d8c2625ac1b, 0xcfbf81a4d0296d0d})}, + {Sign::NEG, -150, MType({0xfbb6dfa297551554, 0xc9fa2fd853162b3c})}, + {Sign::NEG, -150, MType({0x6bb0c62ca2867d91, 0xc434de0b79adcd6b})}, + {Sign::NEG, -150, MType({0x9757893d57e40877, 0xbe6f8c3e43f0538d})}, + {Sign::NEG, -150, MType({0xf407bebdc8f8c28e, 0xb8aa3a70b1ddbd97})}, + {Sign::NEG, -150, MType({0xf71dfa6d08b016be, 0xb2e4e8a2c3760b7e})}, + {Sign::NEG, -150, MType({0x15f6cde02b5543ce, 0xad1f96d478b93d37})}, + {Sign::NEG, -150, MType({0xc5eec8824692d1e9, 0xa75a4505d1a752b4})}, + {Sign::NEG, -150, MType({0x7c6277947172081a, 0xa194f336ce404bec})}, + {Sign::NEG, -150, MType({0xaeae662dc45a61ce, 0x9bcfa1676e8428d2})}, + {Sign::NEG, -150, MType({0xd22f1d3b59110455, 0x960a4f97b272e95b})}, + {Sign::NEG, -150, MType({0x5c4123804ab83462, 0x9044fdc79a0c8d7c})}, + {Sign::NEG, -150, MType({0xc240fd95b5cecb89, 0x8a7fabf725511528})}, + {Sign::NEG, -150, MType({0x798b2deab82fadc4, 0x84ba5a2654408055})}, + {Sign::NEG, -151, MType({0xeef86988e2227ddb, 0xfdea10aa4db59ded})}, + {Sign::NEG, -151, MType({0x62e1207c0209b090, 0xf25f6d073a400203})}, + {Sign::NEG, -151, MType({0x3989789113ec7bee, 0xe6d4c9636e202cd4})}, + {Sign::NEG, -151, MType({0x5daa65565e562909, 0xdb4a25bee9561e49})}, + {Sign::NEG, -151, MType({0xb9fcd6062a84acbd, 0xcfbf8219abe1d64b})}, + {Sign::NEG, -151, MType({0x3939b586c46792b3, 0xc434de73b5c354c4})}, + {Sign::NEG, -151, MType({0xc619ea6a7a9ee85e, 0xb8aa3acd06fa999b})}, + {Sign::NEG, -151, MType({0x4b5656ef9e7a27fd, 0xad1f97259f87a4bb})}, + {Sign::NEG, -151, MType({0xb3a7d90083f7239c, 0xa194f37d7f6a760b})}, + {Sign::NEG, -151, MType({0xe9c74a3381c0f016, 0x960a4fd4a6a30d75})}, + {Sign::NEG, -151, MType({0xd86d7fcaf12ed012, 0x8a7fac2b15316ae2})}, + {Sign::NEG, -152, MType({0xd4a6956a5c863e0f, 0xfdea1101962b1c76})}, + {Sign::NEG, -152, MType({0x1462ef192f547877, 0xe6d4c9ab909eeed1})}, + {Sign::NEG, -152, MType({0x45819d2f1d72eb8b, 0xcfbf825419be4ca6})}, + {Sign::NEG, -152, MType({0x3d742790eedbe719, 0xb8aa3afb318935c8})}, + {Sign::NEG, -152, MType({0xd1ac0d7b70d74492, 0xa194f3a0d7ffaa08})}, + {Sign::NEG, -152, MType({0xd79ac58375f83d0c, 0x8a7fac450d21a939})}, + {Sign::NEG, -153, MType({0x49637b2bac367e87, 0xe6d4c9cfa1de665a})}, + {Sign::NEG, -153, MType({0x1cc4b5eedcc78b35, 0xb8aa3b1246d08f69})}, + {Sign::NEG, -153, MType({0xd43bf48a42745836, 0x8a7fac520919cd43})}, + {Sign::NEG, -154, MType({0x3557bdcf592619eb, 0xb8aa3b1dd1743f1c})}, + {Sign::NEG, -155, MType({0x6bdc2e83d3ebb0c4, 0xb8aa3b2396c617ae})}, + {Sign::POS, 0, MType({0x0, 0x0})}, + {Sign::POS, -155, MType({0x2d5b40050e44e8ab, 0xb8aa3b2f2169ca44})}, + {Sign::POS, -154, MType({0xb8560371b8f04afe, 0xb8aa3b34e6bba447})}, + {Sign::POS, -153, MType({0xc79a43ccc70459cc, 0x8a7fac6c010a1f14})}, + {Sign::POS, -153, MType({0x22c25632f519f77f, 0xb8aa3b40715f59c0})}, + {Sign::POS, -153, MType({0x42c10a314e35fb9e, 0xe6d4ca17c45d8282})}, + {Sign::POS, -152, MType({0xbe5a212ed7b949e4, 0x8a7fac78fd024cdb})}, + {Sign::POS, -152, MType({0x12dcf94ef5c5b918, 0xa194f3e7892a4fde})}, + {Sign::POS, -152, MType({0x49781013e57110ce, 0xb8aa3b5786a6ca76})}, + {Sign::POS, -152, MType({0x8cba70c085c12cb3, 0xcfbf82c8f577bcd2})}, + {Sign::POS, -152, MType({0x7332f3fb09328b8, 0xe6d4ca3bd59d2721})}, + {Sign::POS, -152, MType({0xe37168243a9d8b14, 0xfdea11b02717098f})}, + {Sign::POS, -151, MType({0xa602205479b93722, 0x8a7fac92f4f2b226})}, + {Sign::POS, -151, MType({0xb5bd735852c0d583, 0x960a504e8f041bc3})}, + {Sign::POS, -151, MType({0x363248630b0d812d, 0xa194f40ae1bfc1b6})}, + {Sign::POS, -151, MType({0x3ca83f0e02b823c0, 0xad1f97c7ed25a415})}, + {Sign::POS, -151, MType({0xde66fb46974bc4fd, 0xb8aa3b85b135c2f7})}, + {Sign::POS, -151, MType({0x30b6254e23c69fc2, 0xc434df442df01e75})}, + {Sign::POS, -151, MType({0x48dd69ba009b370c, 0xcfbf83036354b6a4})}, + {Sign::POS, -151, MType({0x3c24797383b16af5, 0xdb4a26c351638b9c})}, + {Sign::POS, -151, MType({0x1fd309b800678db7, 0xe6d4ca83f81c9d74})}, + {Sign::POS, -151, MType({0x930d418c79378a3, 0xf25f6e45577fec43})}, + {Sign::POS, -151, MType({0xd85967b2783a12c, 0xfdea12076f8d7820})}, + {Sign::POS, -150, MType({0x210c898c360016ed, 0x84ba5ae52022a091})}, + {Sign::POS, -150, MType({0x5e19883eef2605ab, 0x8a7facc6e4d3a3b0})}, + {Sign::POS, -150, MType({0x488dacc6629300ae, 0x9044fea905d9c579})}, + {Sign::POS, -150, MType({0x6b0cdebd3264e3e3, 0x960a508b833505f7})}, + {Sign::POS, -150, MType({0x503b07e7ff788dc2, 0x9bcfa26e5ce56536})}, + {Sign::POS, -150, MType({0x82bc1435696a69d1, 0xa194f45192eae341})}, + {Sign::POS, -150, MType({0x8d33f1be0e96fb1f, 0xa75a463525458024})}, + {Sign::POS, -150, MType({0xfa4690c48c1b66c9, 0xad1f981913f53bea})}, + {Sign::POS, -150, MType({0x5497e3b57dd5fe75, 0xb2e4e9fd5efa16a0})}, + {Sign::POS, -150, MType({0x26cbdf277e66cad5, 0xb8aa3be206541050})}, + {Sign::POS, -150, MType({0xfb8679db27301625, 0xbe6f8dc70a032905})}, + {Sign::POS, -150, MType({0x5d6bacbb1056f6aa, 0xc434dfac6a0760cd})}, + {Sign::POS, -150, MType({0xd71f72dbd0c3d936, 0xc9fa31922660b7b1})}, + {Sign::POS, -150, MType({0xf345c97bfe230ba2, 0xcfbf83783f0f2dbe})}, + {Sign::POS, -150, MType({0x3c82b0042ce54751, 0xd584d55eb412c300})}, + {Sign::POS, -150, MType({0x3d7a2806f0403bae, 0xdb4a2745856b7781})}, + {Sign::POS, -150, MType({0x80d03540da2f18ae, 0xe10f792cb3194b4d})}, + {Sign::POS, -150, MType({0x9128dd987b73194f, 0xe6d4cb143d1c3e70})}, + {Sign::POS, -150, MType({0xf928291e63940e14, 0xec9a1cfc237450f5})}, + {Sign::POS, -150, MType({0x4372220d20e0e78a, 0xf25f6ee4662182e9})}, + {Sign::POS, -150, MType({0xfaaad4c9407040c7, 0xf824c0cd0523d455})}, + {Sign::POS, -150, MType({0xa9764fe14e20e9e4, 0xfdea12b6007b4547})}, + {Sign::POS, -149, MType({0xed3c5206ea4d3942, 0x81d7b24fac13eae4})}, + {Sign::POS, -149, MType({0xc2af218aea6da27, 0x84ba5b448614c2f4})}, + {Sign::POS, -149, MType({0xf6d912ac383aaeba, 0x879d04398e402ad6})}, + {Sign::POS, -149, MType({0x7298bf5cca8b3d95, 0x8a7fad2ec4962293})}, + {Sign::POS, -149, MType({0x44bc04daa8808214, 0x8d6256242916aa2f})}, + {Sign::POS, -149, MType({0x3294f0eb14683198, 0x9044ff19bbc1c1b0})}, + {Sign::POS, -149, MType({0x17592684ff600c3, 0x9327a80f7c97691c})}, + {Sign::POS, -149, MType({0x76aff9419c43e8b9, 0x960a51056b97a078})}, + {Sign::POS, -149, MType({0x5796367b39d26c63, 0x98ecf9fb88c267cb})}, + {Sign::POS, -149, MType({0x697a5c2e6888ddaa, 0x9bcfa2f1d417bf1a})}, + {Sign::POS, -149, MType({0x71ae7d8967b5a2b7, 0x9eb24be84d97a66b})}, + {Sign::POS, -149, MType({0x3584aecf760e7b39, 0xa194f4def5421dc4})}, + {Sign::POS, -149, MType({0x7a4f0558d1b0c59e, 0xa4779dd5cb17252a})}, + {Sign::POS, -149, MType({0x55f9792b821c455, 0xa75a46cccf16bca4})}, + {Sign::POS, -149, MType({0x9c087cff664ee311, 0xaa3cefc40140e436})}, + {Sign::POS, -149, MType({0x39bce36188dfc04, 0xad1f98bb61959be8})}, + {Sign::POS, -149, MType({0x16ba4e30a9d9d21, 0xb00241b2f014e3be})}, + {Sign::POS, -149, MType({0x5aca1bc777a54d5e, 0xb2e4eaaaacbebbbe})}, + {Sign::POS, -149, MType({0xd5094eb99a35d1f0, 0xb5c793a2979323ee})}, + {Sign::POS, -149, MType({0x357b5aa4ac49738d, 0xb8aa3c9ab0921c55})}, }}; // > P = fpminimax(log2(1 + x)/x, 3, [|128...|], [-0x1.0002143p-29 , 0x1p-29]); @@ -827,10 +824,10 @@ const LogRR LOG2_TABLE = { // > dirtyinfnorm(log2(1 + x)/x - P, [-0x1.0002143p-29 , 0x1p-29]); // 0x1.27ad5...p-121 const Float128 BIG_COEFFS[4]{ - {Sign::NEG, -129, 0xb8aa3b29'5c2b21e3'3eccf694'0d66bbcc_u128}, - {Sign::POS, -129, 0xf6384ee1'd01febc9'ee39a6d6'49394bb1_u128}, - {Sign::NEG, -128, 0xb8aa3b29'5c17f0bb'be87fed0'67ea2ad5_u128}, - {Sign::POS, -127, 0xb8aa3b29'5c17f0bb'be87fed0'691d3e3f_u128}, + {Sign::NEG, -129, MType({0x3eccf6940d66bbcc, 0xb8aa3b295c2b21e3})}, + {Sign::POS, -129, MType({0xee39a6d649394bb1, 0xf6384ee1d01febc9})}, + {Sign::NEG, -128, MType({0xbe87fed067ea2ad5, 0xb8aa3b295c17f0bb})}, + {Sign::POS, -127, MType({0xbe87fed0691d3e3f, 0xb8aa3b295c17f0bb})}, }; // Reuse the output of the fast pass range reduction. diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 6dfd0f196b1088..231b01e0ee50a7 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -55,7 +55,6 @@ add_libc_test( str_to_double_test.cpp str_to_long_double_test.cpp DEPENDS - libc.src.__support.integer_literals libc.src.__support.str_to_float libc.src.__support.uint128 libc.src.errno.errno @@ -68,9 +67,8 @@ add_libc_test( SRCS integer_to_string_test.cpp DEPENDS - libc.src.__support.CPP.string_view - libc.src.__support.integer_literals libc.src.__support.integer_to_string + libc.src.__support.CPP.string_view libc.src.__support.uint libc.src.__support.uint128 ) diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp index 46f7d250596873..1c8a1c5b9d4cee 100644 --- a/libc/test/src/__support/FPUtil/fpbits_test.cpp +++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp @@ -452,7 +452,7 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { EXPECT_TRUE(negzero.is_neg()); EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); EXPECT_EQ(negzero.get_mantissa(), 0_u128); - EXPECT_EQ(negzero.uintval(), 0x8000'00000000'00000000_u128); + EXPECT_EQ(negzero.uintval(), 0x80000000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negzero).c_str(), "0x00000000000080000000000000000000 = " @@ -462,7 +462,7 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { EXPECT_TRUE(one.is_pos()); EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(one.get_mantissa(), 0_u128); - EXPECT_EQ(one.uintval(), 0x3FFF'80000000'00000000_u128); + EXPECT_EQ(one.uintval(), 0x3FFF8000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(one).c_str(), "0x0000000000003FFF8000000000000000 = " @@ -472,7 +472,7 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { EXPECT_TRUE(negone.is_neg()); EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(negone.get_mantissa(), 0_u128); - EXPECT_EQ(negone.uintval(), 0xBFFF'80000000'00000000_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF8000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negone).c_str(), "0x000000000000BFFF8000000000000000 = " @@ -481,8 +481,8 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits num(1.125l); EXPECT_TRUE(num.is_pos()); EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(num.get_mantissa(), 0x10000000'00000000_u128); - EXPECT_EQ(num.uintval(), 0x3FFF'90000000'00000000_u128); + EXPECT_EQ(num.get_mantissa(), 0x1000000000000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF9000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(num).c_str(), "0x0000000000003FFF9000000000000000 = " @@ -491,8 +491,8 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { LongDoubleBits negnum(-1.125l); EXPECT_TRUE(negnum.is_neg()); EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(negnum.get_mantissa(), 0x10000000'00000000_u128); - EXPECT_EQ(negnum.uintval(), 0xBFFF'90000000'00000000_u128); + EXPECT_EQ(negnum.get_mantissa(), 0x1000000000000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF9000000000000000_u128); EXPECT_STREQ( LIBC_NAMESPACE::str(negnum).c_str(), "0x000000000000BFFF9000000000000000 = " @@ -528,7 +528,7 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { EXPECT_TRUE(negzero.is_neg()); EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); EXPECT_EQ(negzero.get_mantissa(), 0_u128); - EXPECT_EQ(negzero.uintval(), 0x80000000'00000000'00000000'00000000_u128); + EXPECT_EQ(negzero.uintval(), 0x80000000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negzero).c_str(), "0x80000000000000000000000000000000 = " "(S: 1, E: 0x0000, M: 0x00000000000000000000000000000000)"); @@ -537,7 +537,7 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { EXPECT_TRUE(one.is_pos()); EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(one.get_mantissa(), 0_u128); - EXPECT_EQ(one.uintval(), 0x3FFF0000'00000000'00000000'00000000_u128); + EXPECT_EQ(one.uintval(), 0x3FFF0000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(one).c_str(), "0x3FFF0000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); @@ -546,7 +546,7 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { EXPECT_TRUE(negone.is_neg()); EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(negone.get_mantissa(), 0_u128); - EXPECT_EQ(negone.uintval(), 0xBFFF0000'00000000'00000000'00000000_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF0000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negone).c_str(), "0xBFFF0000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); @@ -554,8 +554,8 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { LongDoubleBits num(1.125l); EXPECT_TRUE(num.is_pos()); EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(num.get_mantissa(), 0x2000'00000000'00000000'00000000_u128); - EXPECT_EQ(num.uintval(), 0x3FFF2000'00000000'00000000'00000000_u128); + EXPECT_EQ(num.get_mantissa(), 0x2000000000000000000000000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF2000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(num).c_str(), "0x3FFF2000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); @@ -563,8 +563,8 @@ TEST(LlvmLibcFPBitsTest, LongDoubleType) { LongDoubleBits negnum(-1.125l); EXPECT_TRUE(negnum.is_neg()); EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(negnum.get_mantissa(), 0x2000'00000000'00000000'00000000_u128); - EXPECT_EQ(negnum.uintval(), 0xBFFF2000'00000000'00000000'00000000_u128); + EXPECT_EQ(negnum.get_mantissa(), 0x2000000000000000000000000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF2000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negnum).c_str(), "0xBFFF2000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); @@ -599,7 +599,7 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { EXPECT_TRUE(negzero.is_neg()); EXPECT_EQ(negzero.get_biased_exponent(), 0_u16); EXPECT_EQ(negzero.get_mantissa(), 0_u128); - EXPECT_EQ(negzero.uintval(), 0x80000000'00000000'00000000'00000000_u128); + EXPECT_EQ(negzero.uintval(), 0x80000000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negzero).c_str(), "0x80000000000000000000000000000000 = " "(S: 1, E: 0x0000, M: 0x00000000000000000000000000000000)"); @@ -608,7 +608,7 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { EXPECT_TRUE(one.is_pos()); EXPECT_EQ(one.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(one.get_mantissa(), 0_u128); - EXPECT_EQ(one.uintval(), 0x3FFF0000'00000000'00000000'00000000_u128); + EXPECT_EQ(one.uintval(), 0x3FFF0000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(one).c_str(), "0x3FFF0000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); @@ -617,7 +617,7 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { EXPECT_TRUE(negone.is_neg()); EXPECT_EQ(negone.get_biased_exponent(), 0x3FFF_u16); EXPECT_EQ(negone.get_mantissa(), 0_u128); - EXPECT_EQ(negone.uintval(), 0xBFFF0000'00000000'00000000'00000000_u128); + EXPECT_EQ(negone.uintval(), 0xBFFF0000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negone).c_str(), "0xBFFF0000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00000000000000000000000000000000)"); @@ -625,8 +625,8 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { Float128Bits num(float128(1.125)); EXPECT_TRUE(num.is_pos()); EXPECT_EQ(num.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(num.get_mantissa(), 0x2000'00000000'00000000'00000000_u128); - EXPECT_EQ(num.uintval(), 0x3FFF2000'00000000'00000000'00000000_u128); + EXPECT_EQ(num.get_mantissa(), 0x2000000000000000000000000000_u128); + EXPECT_EQ(num.uintval(), 0x3FFF2000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(num).c_str(), "0x3FFF2000000000000000000000000000 = " "(S: 0, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); @@ -634,8 +634,8 @@ TEST(LlvmLibcFPBitsTest, Float128Type) { Float128Bits negnum(float128(-1.125)); EXPECT_TRUE(negnum.is_neg()); EXPECT_EQ(negnum.get_biased_exponent(), 0x3FFF_u16); - EXPECT_EQ(negnum.get_mantissa(), 0x2000'00000000'00000000'00000000_u128); - EXPECT_EQ(negnum.uintval(), 0xBFFF2000'00000000'00000000'00000000_u128); + EXPECT_EQ(negnum.get_mantissa(), 0x2000000000000000000000000000_u128); + EXPECT_EQ(negnum.uintval(), 0xBFFF2000000000000000000000000000_u128); EXPECT_STREQ(LIBC_NAMESPACE::str(negnum).c_str(), "0xBFFF2000000000000000000000000000 = " "(S: 1, E: 0x3FFF, M: 0x00002000000000000000000000000000)"); diff --git a/libc/test/src/__support/integer_to_string_test.cpp b/libc/test/src/__support/integer_to_string_test.cpp index 2a19c5bf7549c6..c8913bf461bb36 100644 --- a/libc/test/src/__support/integer_to_string_test.cpp +++ b/libc/test/src/__support/integer_to_string_test.cpp @@ -10,7 +10,6 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/UInt.h" #include "src/__support/UInt128.h" -#include "src/__support/integer_literals.h" #include "src/__support/integer_to_string.h" #include "test/UnitTest/Test.h" @@ -25,8 +24,6 @@ using LIBC_NAMESPACE::radix::Custom; using LIBC_NAMESPACE::radix::Dec; using LIBC_NAMESPACE::radix::Hex; using LIBC_NAMESPACE::radix::Oct; -using LIBC_NAMESPACE::operator""_u128; -using LIBC_NAMESPACE::operator""_u256; #define EXPECT(type, value, string_value) \ { \ @@ -207,11 +204,11 @@ TEST(LlvmLibcIntegerToStringTest, UINT128_Base_16) { using type = IntegerToString>; EXPECT(type, 0, "00000000000000000000000000000000"); EXPECT(type, 0x12345, "00000000000000000000000000012345"); - EXPECT(type, 0x12340000'00000000'00000000'00000000_u128, + EXPECT(type, static_cast(0x1234) << 112, "12340000000000000000000000000000"); - EXPECT(type, 0x00000000'00000000'12340000'00000000_u128, + EXPECT(type, static_cast(0x1234) << 48, "00000000000000001234000000000000"); - EXPECT(type, 0x00000000'00000001'23400000'00000000_u128, + EXPECT(type, static_cast(0x1234) << 52, "00000000000000012340000000000000"); } @@ -230,26 +227,16 @@ TEST(LlvmLibcIntegerToStringTest, UINT64_Base_36) { TEST(LlvmLibcIntegerToStringTest, UINT256_Base_16) { using UInt256 = LIBC_NAMESPACE::cpp::UInt<256>; using type = IntegerToString>; - EXPECT( - type, - 0x0000000000000000000000000000000000000000000000000000000000000000_u256, - "0000000000000000000000000000000000000000000000000000000000000000"); - EXPECT( - type, - 0x0000000000000000000000000000000000000000000000000000000000012345_u256, - "0000000000000000000000000000000000000000000000000000000000012345"); - EXPECT( - type, - 0x0000000000000000000000000000000012340000000000000000000000000000_u256, - "0000000000000000000000000000000012340000000000000000000000000000"); - EXPECT( - type, - 0x0000000000000000000000000000000123400000000000000000000000000000_u256, - "0000000000000000000000000000000123400000000000000000000000000000"); - EXPECT( - type, - 0x1234000000000000000000000000000000000000000000000000000000000000_u256, - "1234000000000000000000000000000000000000000000000000000000000000"); + EXPECT(type, static_cast(0), + "0000000000000000000000000000000000000000000000000000000000000000"); + EXPECT(type, static_cast(0x12345), + "0000000000000000000000000000000000000000000000000000000000012345"); + EXPECT(type, static_cast(0x1234) << 112, + "0000000000000000000000000000000012340000000000000000000000000000"); + EXPECT(type, static_cast(0x1234) << 116, + "0000000000000000000000000000000123400000000000000000000000000000"); + EXPECT(type, static_cast(0x1234) << 240, + "1234000000000000000000000000000000000000000000000000000000000000"); } TEST(LlvmLibcIntegerToStringTest, NegativeInterpretedAsPositive) { diff --git a/libc/test/src/__support/str_to_long_double_test.cpp b/libc/test/src/__support/str_to_long_double_test.cpp index c4686cfba3317e..6fefc89ac3a945 100644 --- a/libc/test/src/__support/str_to_long_double_test.cpp +++ b/libc/test/src/__support/str_to_long_double_test.cpp @@ -1,11 +1,8 @@ #include "str_to_fp_test.h" -#include "src/__support/integer_literals.h" - namespace LIBC_NAMESPACE { using LlvmLibcStrToLongDblTest = LlvmLibcStrToFloatTest; -using LIBC_NAMESPACE::operator""_u128; #if defined(LIBC_LONG_DOUBLE_IS_FLOAT64) @@ -21,12 +18,15 @@ TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat80Simple) { } TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat80LongerMantissa) { - eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, 0, - 0x91a2b3c091a2b3c1, 16507); - eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, 300, - 0xd97757de56adb65c, 17503); - eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, -300, - 0xc30feb9a7618457d, 15510); + eisel_lemire_test((UInt128(0x1234567812345678) << 64) + + UInt128(0x1234567812345678), + 0, 0x91a2b3c091a2b3c1, 16507); + eisel_lemire_test((UInt128(0x1234567812345678) << 64) + + UInt128(0x1234567812345678), + 300, 0xd97757de56adb65c, 17503); + eisel_lemire_test((UInt128(0x1234567812345678) << 64) + + UInt128(0x1234567812345678), + -300, 0xc30feb9a7618457d, 15510); } // These tests check numbers at the edge of the DETAILED_POWERS_OF_TEN table. @@ -57,24 +57,30 @@ TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat80Fallback) { #else // Quad precision long double TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat128Simple) { - eisel_lemire_test(123, 0, 0x1ec00'00000000'00000000'00000000_u128, 16389); - eisel_lemire_test(12345678901234568192u, 0, - 0x156a9'5319d63e'18000000'00000000_u128, 16446); + eisel_lemire_test(123, 0, (UInt128(0x1ec0000000000) << 64), 16389); + eisel_lemire_test( + 12345678901234568192u, 0, + (UInt128(0x156a95319d63e) << 64) + UInt128(0x1800000000000000), 16446); } TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat128LongerMantissa) { - eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, 0, - 0x12345'67812345'67812345'67812345_u128, 16507); - eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, 300, - 0x1b2ee'afbcad5b'6cb8b445'1dfcde19_u128, 17503); - eisel_lemire_test(0x12345678'12345678'12345678'12345678_u128, -300, - 0x1861f'd734ec30'8afa7189'f0f7595f_u128, 15510); + eisel_lemire_test( + (UInt128(0x1234567812345678) << 64) + UInt128(0x1234567812345678), 0, + (UInt128(0x1234567812345) << 64) + UInt128(0x6781234567812345), 16507); + eisel_lemire_test( + (UInt128(0x1234567812345678) << 64) + UInt128(0x1234567812345678), 300, + (UInt128(0x1b2eeafbcad5b) << 64) + UInt128(0x6cb8b4451dfcde19), 17503); + eisel_lemire_test( + (UInt128(0x1234567812345678) << 64) + UInt128(0x1234567812345678), -300, + (UInt128(0x1861fd734ec30) << 64) + UInt128(0x8afa7189f0f7595f), 15510); } TEST_F(LlvmLibcStrToLongDblTest, EiselLemireFloat128Fallback) { - ASSERT_FALSE(internal::eisel_lemire( - {0x5ce0e9a5'6015fec5'aadfa328'ae39b333_u128, 1}) - .has_value()); + ASSERT_FALSE( + internal::eisel_lemire( + {(UInt128(0x5ce0e9a56015fec5) << 64) + UInt128(0xaadfa328ae39b333), + 1}) + .has_value()); } #endif diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index c6f01f435e12c5..fde2bac746f4f8 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1283,7 +1283,6 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_fputil_triple_double", - ":__support_integer_literals", ":__support_macros_optimization", ":common_constants", ":explogxf", @@ -1314,7 +1313,6 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_fputil_triple_double", - ":__support_integer_literals", ":__support_macros_optimization", ":common_constants", ":explogxf", @@ -1344,7 +1342,6 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_fputil_triple_double", - ":__support_integer_literals", ":__support_macros_optimization", ":common_constants", ":explogxf", @@ -1368,7 +1365,6 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_fputil_triple_double", - ":__support_integer_literals", ":__support_macros_optimization", ":common_constants", ":explogxf", @@ -1432,12 +1428,11 @@ libc_math_function( libc_math_function( name = "log", additional_deps = [ - ":__support_fputil_double_double", - ":__support_fputil_dyadic_float", ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":__support_integer_literals", + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", ":common_constants", @@ -1448,12 +1443,11 @@ libc_math_function( libc_math_function( name = "log2", additional_deps = [ - ":__support_fputil_double_double", - ":__support_fputil_dyadic_float", ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":__support_integer_literals", + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", ":common_constants", @@ -1464,12 +1458,11 @@ libc_math_function( libc_math_function( name = "log10", additional_deps = [ - ":__support_fputil_double_double", - ":__support_fputil_dyadic_float", ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":__support_integer_literals", + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", ":common_constants", @@ -1480,12 +1473,11 @@ libc_math_function( libc_math_function( name = "log1p", additional_deps = [ - ":__support_fputil_double_double", - ":__support_fputil_dyadic_float", ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":__support_integer_literals", + ":__support_fputil_double_double", + ":__support_fputil_dyadic_float", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", ":common_constants", diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index 8e94a84f586f4c..e691d3c3d2ebdd 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -50,7 +50,6 @@ libc_test( ], deps = [ "//libc:__support_fputil_fp_bits", - "//libc:__support_integer_literals", "//libc:__support_str_to_float", "//libc:__support_uint128", ], @@ -62,7 +61,6 @@ libc_test( deps = [ "//libc:__support_cpp_span", "//libc:__support_cpp_string_view", - "//libc:__support_integer_literals", "//libc:__support_integer_to_string", "//libc:__support_uint", "//libc:__support_uint128", From 630f82ec0c61b772711355fad08ed0d0adce922d Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 14 Feb 2024 12:59:59 -0500 Subject: [PATCH 160/240] [Clang][CodeGen] Loose the cast check when emitting builtins (#81669) This patch looses the cast check (`canLosslesslyBitCastTo`) and leaves it to the one inside `CreateBitCast`. It seems too conservative for the use case here. --- clang/lib/CodeGen/CGBuiltin.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ee0b7504769622..9bc60466d09be6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5912,8 +5912,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } - assert(ArgValue->getType()->canLosslesslyBitCastTo(PTy) && - "Must be able to losslessly bit cast to param"); // Cast vector type (e.g., v256i32) to x86_amx, this only happen // in amx intrinsics. if (PTy->isX86_AMXTy()) @@ -5943,8 +5941,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } - assert(V->getType()->canLosslesslyBitCastTo(RetTy) && - "Must be able to losslessly bit cast result type"); // Cast x86_amx to vector type (e.g., v256i32), this only happen // in amx intrinsics. if (V->getType()->isX86_AMXTy()) From dbc40b34617b5ee5d2f82272c8863c602265063c Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Wed, 14 Feb 2024 10:05:22 -0800 Subject: [PATCH 161/240] [lldb] Fix the flakey Concurrent tests on macOS (#81710) The concurrent tests all do a pthread_join at the end, and concurrent_base.py stops after that pthread_join and sanity checks that only 1 thread is running. On macOS, after pthread_join() has completed, there can be an extra thread still running which is completing the details of that task asynchronously; this causes testsuite failures. When this happens, we see the second thread is in ``` frame #0: 0x0000000180ce7700 libsystem_kernel.dylib`__ulock_wake + 8 frame #1: 0x0000000180d25ad4 libsystem_pthread.dylib`_pthread_joiner_wake + 52 frame #2: 0x0000000180d23c18 libsystem_pthread.dylib`_pthread_terminate + 384 frame #3: 0x0000000180d23a98 libsystem_pthread.dylib`_pthread_terminate_invoke + 92 frame #4: 0x0000000180d26740 libsystem_pthread.dylib`_pthread_exit + 112 frame #5: 0x0000000180d26040 libsystem_pthread.dylib`_pthread_start + 148 ``` there are none of the functions from the test file present on this thread. In this patch, instead of counting the number of threads, I iterate over the threads looking for functions from our test file (by name) and only count threads that have at least one of them. It's a lower frequency failure than the darwin kernel bug causing an extra step instruction mach exception when hardware breakpoint/watchpoints are used, but once I fixed that, this came up as the next most common failure for these tests. rdar://110555062 --- .../Python/lldbsuite/test/concurrent_base.py | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/concurrent_base.py b/lldb/packages/Python/lldbsuite/test/concurrent_base.py index 39eb27fd997471..46d71666d06977 100644 --- a/lldb/packages/Python/lldbsuite/test/concurrent_base.py +++ b/lldb/packages/Python/lldbsuite/test/concurrent_base.py @@ -264,12 +264,40 @@ def do_thread_actions( "Expected main thread (finish) breakpoint to be hit once", ) - num_threads = self.inferior_process.GetNumThreads() + # There should be a single active thread (the main one) which hit + # the breakpoint after joining. Depending on the pthread + # implementation we may have a worker thread finishing the pthread_join() + # after it has returned. Filter the threads to only count those + # with user functions on them from our test case file, + # lldb/test/API/functionalities/thread/concurrent_events/main.cpp + user_code_funcnames = [ + "breakpoint_func", + "crash_func", + "do_action_args", + "dotest", + "main", + "register_signal_handler", + "signal_func", + "sigusr1_handler", + "start_threads", + "watchpoint_func", + ] + num_threads_with_usercode = 0 + for t in self.inferior_process.threads: + thread_has_user_code = False + for f in t.frames: + for funcname in user_code_funcnames: + if funcname in f.GetDisplayFunctionName(): + thread_has_user_code = True + break + if thread_has_user_code: + num_threads_with_usercode += 1 + self.assertEqual( 1, - num_threads, + num_threads_with_usercode, "Expecting 1 thread but seeing %d. Details:%s" - % (num_threads, "\n\t".join(self.describe_threads())), + % (num_threads_with_usercode, "\n\t".join(self.describe_threads())), ) self.runCmd("continue") From 1ddc5413b12c80170477240cd54513358e6f8fe4 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 08:34:44 -0700 Subject: [PATCH 162/240] Apply clang-tidy fixes for readability-simplify-boolean-expr in TransformOps.cpp (NFC) --- mlir/lib/Dialect/Transform/IR/TransformOps.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index efb724006674f4..180d11c30e65de 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -2223,7 +2223,7 @@ transform::SplitHandleOp::apply(transform::TransformRewriter &rewriter, // - "fail_on_payload_too_small" is set to "false", or // - "pass_through_empty_handle" is set to "true" and there are 0 payload ops. if (numPayloadOps < getNumResults() && getFailOnPayloadTooSmall() && - !(numPayloadOps == 0 && getPassThroughEmptyHandle())) + (numPayloadOps != 0 || !getPassThroughEmptyHandle())) return produceNumOpsError(); // Distribute payload ops. From 8383bf23074e4a7910441aa7ab54707242eac405 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 08:41:42 -0700 Subject: [PATCH 163/240] Apply clang-tidy fixes for llvm-else-after-return in IndexingUtils.cpp (NFC) --- mlir/lib/Dialect/Utils/IndexingUtils.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Utils/IndexingUtils.cpp b/mlir/lib/Dialect/Utils/IndexingUtils.cpp index 2765d1eb1000da..baaa581ab6f225 100644 --- a/mlir/lib/Dialect/Utils/IndexingUtils.cpp +++ b/mlir/lib/Dialect/Utils/IndexingUtils.cpp @@ -271,9 +271,8 @@ static MLIRContext *getContext(OpFoldResult val) { assert(val && "Invalid value"); if (auto attr = dyn_cast(val)) { return attr.getContext(); - } else { - return cast(val).getContext(); } + return cast(val).getContext(); } std::pair> From 89dc313af9fbabbf7b064df8ab09d4e49f36c0cd Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 09:02:08 -0700 Subject: [PATCH 164/240] Apply clang-tidy fixes for llvm-qualified-auto in VectorUnroll.cpp (NFC) --- mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp index 78b041255443c3..c83776422f224c 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp @@ -311,7 +311,7 @@ struct UnrollContractionPattern applyPermutationMap(accPermutationMap, ArrayRef(offsets)); // If a version of the accumulator has already been computed, use it // otherwise extract the first version from the original operand. - auto accIt = accCache.find(accOffets); + auto *accIt = accCache.find(accOffets); if (accIt != accCache.end()) slicesOperands[2] = accIt->second; else @@ -387,7 +387,7 @@ struct UnrollMultiReductionPattern SmallVector accStrides(destOffset.size(), 1); // If a version of the accumulator has already been computed, use it // otherwise extract the first version from the original operand. - auto accIt = accCache.find(destOffset); + auto *accIt = accCache.find(destOffset); if (accIt != accCache.end()) acc = accIt->second; else From bf4480d923c7ead1193006d20caffdbf8468aac0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 09:15:07 -0700 Subject: [PATCH 165/240] Apply clang-tidy fixes for readability-identifier-naming in SparseTensorRuntime.cpp (NFC) --- mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp index e7ac8f161875db..a5e75a77b4e47f 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp @@ -508,9 +508,9 @@ MLIR_SPARSETENSOR_FOREVERY_V(IMPL_DELCOO) #undef IMPL_DELCOO char *getTensorFilename(index_type id) { - constexpr size_t BUF_SIZE = 80; - char var[BUF_SIZE]; - snprintf(var, BUF_SIZE, "TENSOR%" PRIu64, id); + constexpr size_t bufSize = 80; + char var[bufSize]; + snprintf(var, bufSize, "TENSOR%" PRIu64, id); char *env = getenv(var); if (!env) MLIR_SPARSETENSOR_FATAL("Environment variable %s is not set\n", var); From d99d258e3e41b8afd1d33676bb3d2e7d83ce8ffc Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 20 Oct 2023 09:21:40 -0700 Subject: [PATCH 166/240] Apply clang-tidy fixes for llvm-include-order in InferIntRangeInterface.cpp (NFC) --- mlir/lib/Interfaces/InferIntRangeInterface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Interfaces/InferIntRangeInterface.cpp b/mlir/lib/Interfaces/InferIntRangeInterface.cpp index cc31104ce33352..b3f6c0ee3cc32d 100644 --- a/mlir/lib/Interfaces/InferIntRangeInterface.cpp +++ b/mlir/lib/Interfaces/InferIntRangeInterface.cpp @@ -8,8 +8,8 @@ #include "mlir/Interfaces/InferIntRangeInterface.h" #include "mlir/IR/BuiltinTypes.h" -#include #include "mlir/Interfaces/InferIntRangeInterface.cpp.inc" +#include using namespace mlir; From 275eeda32f4f32d2385043f8d1d8af3d4f65bb2c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 14 Feb 2024 10:15:24 -0800 Subject: [PATCH 167/240] [RISCV] Split long build_vector sequences to reduce critical path (#81312) If we have a long chain of vslide1down instructions to build e.g. a <16 x i8> from scalar, we end up with a critical path going through the entire chain. We can instead build two halves, and then combine them with a vselect. This costs one additional temporary register, but reduces the critical path by roughly half. To avoid needing to change VL, we fill each half with undefs for the elements which will come from the other half. The vselect will at worst become a vmerge, but is often folded back into the final instruction of the sequence building the lower half. A couple notes on the heuristic here: * This is restricted to LMUL1 to avoid quadratic costing reasoning. * This only splits once. In future work, we can explore recursive splitting here, but I'm a bit worried about register pressure and thus decided to be conservative. It also happens to be "enough" at the default zvl of 128. * "8" is picked somewhat arbitrarily as being "long". In practice, our build_vector codegen for 2 defined elements in a VL=4 vector appears to need some work. 4 defined elements in a VL=8 vector seems to generally produce reasonable results. * Halves may not be an optimal split point. I went down the rabit hole of trying to find the optimal one, and decided it wasn't worth the effort to start with. --------- Co-authored-by: Luke Lau --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 41 +++ .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 16 +- .../RISCV/rvv/fixed-vectors-fp2i-sat.ll | 112 +++---- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 290 +++++++----------- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 74 +++-- 5 files changed, 258 insertions(+), 275 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4caadef694d725..8235b536c4e00a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3877,6 +3877,47 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, Vec, DAG, Subtarget); } + // For m1 vectors, if we have non-undef values in both halves of our vector, + // split the vector into low and high halves, build them separately, then + // use a vselect to combine them. For long vectors, this cuts the critical + // path of the vslide1down sequence in half, and gives us an opportunity + // to special case each half independently. Note that we don't change the + // length of the sub-vectors here, so if both fallback to the generic + // vslide1down path, we should be able to fold the vselect into the final + // vslidedown (for the undef tail) for the first half w/ masking. + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumUndefElts = + count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); + unsigned NumDefElts = NumElts - NumUndefElts; + if (NumDefElts >= 8 && NumDefElts > NumElts / 2 && + ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) { + SmallVector SubVecAOps, SubVecBOps; + SmallVector MaskVals; + SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0)); + SubVecAOps.reserve(NumElts); + SubVecBOps.reserve(NumElts); + for (unsigned i = 0; i < NumElts; i++) { + SDValue Elem = Op->getOperand(i); + if (i < NumElts / 2) { + SubVecAOps.push_back(Elem); + SubVecBOps.push_back(UndefElem); + } else { + SubVecAOps.push_back(UndefElem); + SubVecBOps.push_back(Elem); + } + bool SelectMaskVal = (i < NumElts / 2); + MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); + } + assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts && + MaskVals.size() == NumElts); + + SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps); + SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps); + MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); + return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB); + } + // Cap the cost at a value linear to the number of elements in the vector. // The default lowering is to use the stack. The vector store + scalar loads // is linear in VL. However, at high lmuls vslide1down and vslidedown end up diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index a2bd862e2ce14f..8e214e40547832 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1399,15 +1399,17 @@ define <2 x double> @vid_step2_v2f64() { define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7) vscale_range(4, 128) { ; CHECK-LABEL: buildvec_v8f32_zvl256: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v9, v8, fa3 +; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x float> poison, float %e0, i64 0 %v1 = insertelement <8 x float> %v0, float %e1, i64 1 @@ -1448,15 +1450,17 @@ define <8 x double> @buildvec_v8f64_zvl256(double %e0, double %e1, double %e2, d define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) vscale_range(8, 128) { ; CHECK-LABEL: buildvec_v8f64_zvl512: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v9, v8, fa3 +; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x double> poison, double %e0, i64 0 %v1 = insertelement <8 x double> %v0, double %e1, i64 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index ed0b15c6add5cd..85b849045e8cee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -359,28 +359,28 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: fld fa2, 40(sp) ; RV32-NEXT: fcvt.w.d a2, fa3, rtz -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: fld fa3, 32(sp) ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vslide1down.vx v8, v10, a0 -; RV32-NEXT: feq.d a0, fa3, fa3 -; RV32-NEXT: fmax.d fa3, fa3, fa5 +; RV32-NEXT: feq.d a2, fa2, fa2 +; RV32-NEXT: fmax.d fa3, fa2, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 -; RV32-NEXT: fcvt.w.d a2, fa3, rtz -; RV32-NEXT: fld fa3, 40(sp) -; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: feq.d a0, fa3, fa3 +; RV32-NEXT: fcvt.w.d a3, fa3, rtz +; RV32-NEXT: fld fa3, 32(sp) +; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: neg a0, a2 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: feq.d a2, fa3, fa3 +; RV32-NEXT: neg a2, a2 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 -; RV32-NEXT: fcvt.w.d a2, fa3, rtz +; RV32-NEXT: fcvt.w.d a3, fa3, rtz ; RV32-NEXT: fld fa3, 48(sp) -; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vmv.v.x v9, a2 +; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 @@ -388,15 +388,17 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fld fa3, 56(sp) ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: fmax.d fa5, fa3, fa5 ; RV32-NEXT: fmin.d fa5, fa5, fa4 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vse8.v v8, (a1) +; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v9, v9, a0 +; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t +; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload @@ -458,28 +460,28 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: feq.d a0, fa3, fa3 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: fld fa2, 40(sp) ; RV64-NEXT: fcvt.l.d a2, fa3, rtz -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: fld fa3, 32(sp) ; RV64-NEXT: neg a0, a0 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vslide1down.vx v8, v10, a0 -; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: fmax.d fa3, fa3, fa5 +; RV64-NEXT: feq.d a2, fa2, fa2 +; RV64-NEXT: fmax.d fa3, fa2, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 -; RV64-NEXT: fcvt.l.d a2, fa3, rtz -; RV64-NEXT: fld fa3, 40(sp) -; RV64-NEXT: neg a0, a0 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: feq.d a0, fa3, fa3 +; RV64-NEXT: fcvt.l.d a3, fa3, rtz +; RV64-NEXT: fld fa3, 32(sp) +; RV64-NEXT: vslide1down.vx v8, v10, a0 +; RV64-NEXT: neg a0, a2 +; RV64-NEXT: and a0, a0, a3 +; RV64-NEXT: feq.d a2, fa3, fa3 +; RV64-NEXT: negw a2, a2 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 -; RV64-NEXT: fcvt.l.d a2, fa3, rtz +; RV64-NEXT: fcvt.l.d a3, fa3, rtz ; RV64-NEXT: fld fa3, 48(sp) -; RV64-NEXT: neg a0, a0 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: vmv.v.x v9, a2 +; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: feq.d a0, fa3, fa3 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 @@ -487,15 +489,17 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fld fa3, 56(sp) ; RV64-NEXT: neg a0, a0 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: feq.d a0, fa3, fa3 ; RV64-NEXT: neg a0, a0 ; RV64-NEXT: fmax.d fa5, fa3, fa5 ; RV64-NEXT: fmin.d fa5, fa5, fa4 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vse8.v v8, (a1) +; RV64-NEXT: vmv.v.i v0, 15 +; RV64-NEXT: vslide1down.vx v9, v9, a0 +; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t +; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload @@ -553,11 +557,11 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa4, v8 ; RV32-NEXT: fmax.d fa4, fa4, fa3 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: fld fa2, 32(sp) +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: fld fa2, 40(sp) ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: fld fa4, 40(sp) +; RV32-NEXT: fld fa4, 32(sp) ; RV32-NEXT: fmax.d fa2, fa2, fa3 ; RV32-NEXT: fmin.d fa2, fa2, fa5 ; RV32-NEXT: fcvt.wu.d a2, fa2, rtz @@ -570,14 +574,16 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz ; RV32-NEXT: fld fa4, 56(sp) -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vmv.v.x v9, a3 +; RV32-NEXT: vslide1down.vx v9, v9, a2 +; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa5, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa5, rtz -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vse8.v v8, (a1) +; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v9, v9, a0 +; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t +; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload @@ -627,11 +633,11 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: vslidedown.vi v8, v8, 3 ; RV64-NEXT: vfmv.f.s fa4, v8 ; RV64-NEXT: fmax.d fa4, fa4, fa3 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: fld fa2, 32(sp) +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: fld fa2, 40(sp) ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: fld fa4, 40(sp) +; RV64-NEXT: fld fa4, 32(sp) ; RV64-NEXT: fmax.d fa2, fa2, fa3 ; RV64-NEXT: fmin.d fa2, fa2, fa5 ; RV64-NEXT: fcvt.lu.d a2, fa2, rtz @@ -644,14 +650,16 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz ; RV64-NEXT: fld fa4, 56(sp) -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vmv.v.x v9, a3 +; RV64-NEXT: vslide1down.vx v9, v9, a2 +; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa5, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa5, rtz -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vse8.v v8, (a1) +; RV64-NEXT: vmv.v.i v0, 15 +; RV64-NEXT: vslide1down.vx v9, v9, a0 +; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t +; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index e691e635811544..ed6c01aaf7fe1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -1181,89 +1181,46 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { -; RV32-LABEL: buildvec_v16i8_loads_contigous: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: lbu a1, 1(a0) -; RV32-NEXT: lbu a2, 2(a0) -; RV32-NEXT: lbu a3, 3(a0) -; RV32-NEXT: lbu a4, 4(a0) -; RV32-NEXT: lbu a5, 5(a0) -; RV32-NEXT: lbu a6, 6(a0) -; RV32-NEXT: lbu a7, 7(a0) -; RV32-NEXT: lbu t0, 8(a0) -; RV32-NEXT: lbu t1, 9(a0) -; RV32-NEXT: lbu t2, 10(a0) -; RV32-NEXT: lbu t3, 11(a0) -; RV32-NEXT: lbu t4, 12(a0) -; RV32-NEXT: lbu t5, 13(a0) -; RV32-NEXT: lbu t6, 14(a0) -; RV32-NEXT: lbu s0, 15(a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vlse8.v v8, (a0), zero -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a7 -; RV32-NEXT: vslide1down.vx v8, v8, t0 -; RV32-NEXT: vslide1down.vx v8, v8, t1 -; RV32-NEXT: vslide1down.vx v8, v8, t2 -; RV32-NEXT: vslide1down.vx v8, v8, t3 -; RV32-NEXT: vslide1down.vx v8, v8, t4 -; RV32-NEXT: vslide1down.vx v8, v8, t5 -; RV32-NEXT: vslide1down.vx v8, v8, t6 -; RV32-NEXT: vslide1down.vx v8, v8, s0 -; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_v16i8_loads_contigous: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset s0, -8 -; RV64-NEXT: lbu a1, 1(a0) -; RV64-NEXT: lbu a2, 2(a0) -; RV64-NEXT: lbu a3, 3(a0) -; RV64-NEXT: lbu a4, 4(a0) -; RV64-NEXT: lbu a5, 5(a0) -; RV64-NEXT: lbu a6, 6(a0) -; RV64-NEXT: lbu a7, 7(a0) -; RV64-NEXT: lbu t0, 8(a0) -; RV64-NEXT: lbu t1, 9(a0) -; RV64-NEXT: lbu t2, 10(a0) -; RV64-NEXT: lbu t3, 11(a0) -; RV64-NEXT: lbu t4, 12(a0) -; RV64-NEXT: lbu t5, 13(a0) -; RV64-NEXT: lbu t6, 14(a0) -; RV64-NEXT: lbu s0, 15(a0) -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vlse8.v v8, (a0), zero -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: vslide1down.vx v8, v8, a4 -; RV64-NEXT: vslide1down.vx v8, v8, a5 -; RV64-NEXT: vslide1down.vx v8, v8, a6 -; RV64-NEXT: vslide1down.vx v8, v8, a7 -; RV64-NEXT: vslide1down.vx v8, v8, t0 -; RV64-NEXT: vslide1down.vx v8, v8, t1 -; RV64-NEXT: vslide1down.vx v8, v8, t2 -; RV64-NEXT: vslide1down.vx v8, v8, t3 -; RV64-NEXT: vslide1down.vx v8, v8, t4 -; RV64-NEXT: vslide1down.vx v8, v8, t5 -; RV64-NEXT: vslide1down.vx v8, v8, t6 -; RV64-NEXT: vslide1down.vx v8, v8, s0 -; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_v16i8_loads_contigous: +; CHECK: # %bb.0: +; CHECK-NEXT: lbu a1, 1(a0) +; CHECK-NEXT: lbu a2, 2(a0) +; CHECK-NEXT: lbu a3, 3(a0) +; CHECK-NEXT: lbu a4, 4(a0) +; CHECK-NEXT: lbu a5, 5(a0) +; CHECK-NEXT: lbu a6, 6(a0) +; CHECK-NEXT: lbu a7, 7(a0) +; CHECK-NEXT: lbu t0, 9(a0) +; CHECK-NEXT: lbu t1, 10(a0) +; CHECK-NEXT: lbu t2, 11(a0) +; CHECK-NEXT: lbu t3, 12(a0) +; CHECK-NEXT: lbu t4, 13(a0) +; CHECK-NEXT: lbu t5, 14(a0) +; CHECK-NEXT: lbu t6, 15(a0) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a0), zero +; CHECK-NEXT: addi a0, a0, 8 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: vlse8.v v9, (a0), zero +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslide1down.vx v10, v8, a7 +; CHECK-NEXT: vslide1down.vx v8, v9, t0 +; CHECK-NEXT: vslide1down.vx v8, v8, t1 +; CHECK-NEXT: vslide1down.vx v8, v8, t2 +; CHECK-NEXT: vslide1down.vx v8, v8, t3 +; CHECK-NEXT: vslide1down.vx v8, v8, t4 +; CHECK-NEXT: vslide1down.vx v8, v8, t5 +; CHECK-NEXT: vslide1down.vx v8, v8, t6 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t +; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 %p3 = getelementptr i8, ptr %p, i32 2 %p4 = getelementptr i8, ptr %p, i32 3 @@ -1318,89 +1275,46 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { -; RV32-LABEL: buildvec_v16i8_loads_gather: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: lbu a1, 1(a0) -; RV32-NEXT: lbu a2, 22(a0) -; RV32-NEXT: lbu a3, 31(a0) -; RV32-NEXT: lbu a4, 44(a0) -; RV32-NEXT: lbu a5, 55(a0) -; RV32-NEXT: lbu a6, 623(a0) -; RV32-NEXT: lbu a7, 75(a0) -; RV32-NEXT: lbu t0, 82(a0) -; RV32-NEXT: lbu t1, 93(a0) -; RV32-NEXT: lbu t2, 105(a0) -; RV32-NEXT: lbu t3, 161(a0) -; RV32-NEXT: lbu t4, 124(a0) -; RV32-NEXT: lbu t5, 163(a0) -; RV32-NEXT: lbu t6, 144(a0) -; RV32-NEXT: lbu s0, 154(a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vlse8.v v8, (a0), zero -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a5 -; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a7 -; RV32-NEXT: vslide1down.vx v8, v8, t0 -; RV32-NEXT: vslide1down.vx v8, v8, t1 -; RV32-NEXT: vslide1down.vx v8, v8, t2 -; RV32-NEXT: vslide1down.vx v8, v8, t3 -; RV32-NEXT: vslide1down.vx v8, v8, t4 -; RV32-NEXT: vslide1down.vx v8, v8, t5 -; RV32-NEXT: vslide1down.vx v8, v8, t6 -; RV32-NEXT: vslide1down.vx v8, v8, s0 -; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_v16i8_loads_gather: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset s0, -8 -; RV64-NEXT: lbu a1, 1(a0) -; RV64-NEXT: lbu a2, 22(a0) -; RV64-NEXT: lbu a3, 31(a0) -; RV64-NEXT: lbu a4, 44(a0) -; RV64-NEXT: lbu a5, 55(a0) -; RV64-NEXT: lbu a6, 623(a0) -; RV64-NEXT: lbu a7, 75(a0) -; RV64-NEXT: lbu t0, 82(a0) -; RV64-NEXT: lbu t1, 93(a0) -; RV64-NEXT: lbu t2, 105(a0) -; RV64-NEXT: lbu t3, 161(a0) -; RV64-NEXT: lbu t4, 124(a0) -; RV64-NEXT: lbu t5, 163(a0) -; RV64-NEXT: lbu t6, 144(a0) -; RV64-NEXT: lbu s0, 154(a0) -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vlse8.v v8, (a0), zero -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: vslide1down.vx v8, v8, a4 -; RV64-NEXT: vslide1down.vx v8, v8, a5 -; RV64-NEXT: vslide1down.vx v8, v8, a6 -; RV64-NEXT: vslide1down.vx v8, v8, a7 -; RV64-NEXT: vslide1down.vx v8, v8, t0 -; RV64-NEXT: vslide1down.vx v8, v8, t1 -; RV64-NEXT: vslide1down.vx v8, v8, t2 -; RV64-NEXT: vslide1down.vx v8, v8, t3 -; RV64-NEXT: vslide1down.vx v8, v8, t4 -; RV64-NEXT: vslide1down.vx v8, v8, t5 -; RV64-NEXT: vslide1down.vx v8, v8, t6 -; RV64-NEXT: vslide1down.vx v8, v8, s0 -; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_v16i8_loads_gather: +; CHECK: # %bb.0: +; CHECK-NEXT: lbu a1, 1(a0) +; CHECK-NEXT: lbu a2, 22(a0) +; CHECK-NEXT: lbu a3, 31(a0) +; CHECK-NEXT: lbu a4, 44(a0) +; CHECK-NEXT: lbu a5, 55(a0) +; CHECK-NEXT: lbu a6, 623(a0) +; CHECK-NEXT: lbu a7, 75(a0) +; CHECK-NEXT: lbu t0, 93(a0) +; CHECK-NEXT: lbu t1, 105(a0) +; CHECK-NEXT: lbu t2, 161(a0) +; CHECK-NEXT: lbu t3, 124(a0) +; CHECK-NEXT: lbu t4, 163(a0) +; CHECK-NEXT: lbu t5, 144(a0) +; CHECK-NEXT: lbu t6, 154(a0) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a0), zero +; CHECK-NEXT: addi a0, a0, 82 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: vlse8.v v9, (a0), zero +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslide1down.vx v10, v8, a7 +; CHECK-NEXT: vslide1down.vx v8, v9, t0 +; CHECK-NEXT: vslide1down.vx v8, v8, t1 +; CHECK-NEXT: vslide1down.vx v8, v8, t2 +; CHECK-NEXT: vslide1down.vx v8, v8, t3 +; CHECK-NEXT: vslide1down.vx v8, v8, t4 +; CHECK-NEXT: vslide1down.vx v8, v8, t5 +; CHECK-NEXT: vslide1down.vx v8, v8, t6 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t +; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 %p3 = getelementptr i8, ptr %p, i32 22 %p4 = getelementptr i8, ptr %p, i32 31 @@ -1560,21 +1474,26 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; CHECK-NEXT: lbu a3, 55(a0) ; CHECK-NEXT: lbu a4, 623(a0) ; CHECK-NEXT: lbu a5, 75(a0) -; CHECK-NEXT: lbu a6, 82(a0) -; CHECK-NEXT: lbu a7, 93(a0) -; CHECK-NEXT: lbu t0, 105(a0) -; CHECK-NEXT: lbu a0, 161(a0) +; CHECK-NEXT: lbu a6, 93(a0) +; CHECK-NEXT: lbu a7, 105(a0) +; CHECK-NEXT: lbu t0, 161(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), zero +; CHECK-NEXT: addi a0, a0, 82 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vlse8.v v9, (a0), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vslide1down.vx v8, v8, a5 -; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslide1down.vx v10, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v9, a6 ; CHECK-NEXT: vslide1down.vx v8, v8, a7 ; CHECK-NEXT: vslide1down.vx v8, v8, t0 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p4 = getelementptr i8, ptr %p, i32 31 %p5 = getelementptr i8, ptr %p, i32 44 @@ -1615,26 +1534,31 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; CHECK-NEXT: lbu a2, 44(a0) ; CHECK-NEXT: lbu a3, 55(a0) ; CHECK-NEXT: lbu a4, 75(a0) -; CHECK-NEXT: lbu a5, 82(a0) -; CHECK-NEXT: lbu a6, 93(a0) -; CHECK-NEXT: lbu a7, 124(a0) -; CHECK-NEXT: lbu t0, 144(a0) -; CHECK-NEXT: lbu t1, 154(a0) +; CHECK-NEXT: lbu a5, 93(a0) +; CHECK-NEXT: lbu a6, 124(a0) +; CHECK-NEXT: lbu a7, 144(a0) +; CHECK-NEXT: lbu t0, 154(a0) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), zero +; CHECK-NEXT: addi a0, a0, 82 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vlse8.v v9, (a0), zero ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vslide1down.vx v8, v8, a5 -; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslide1down.vx v10, v8, a4 +; CHECK-NEXT: vslide1down.vx v8, v9, a5 ; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v8, v8, a6 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 ; CHECK-NEXT: vslide1down.vx v8, v8, t0 -; CHECK-NEXT: vslide1down.vx v8, v8, t1 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 %p3 = getelementptr i8, ptr %p, i32 22 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index dd0fc5a11a0ed6..c295fed2c28c10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -278,32 +278,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: li a4, 1 -; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vslide1down.vx v9, v9, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1_2: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; ZVE32F-NEXT: vmv.v.x v8, a0 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 -; ZVE32F-NEXT: li a4, 1 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a4 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 +; ZVE32F-NEXT: li a0, 1 +; ZVE32F-NEXT: vslide1down.vx v9, v9, a0 +; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 ; ZVE32F-NEXT: ret @@ -321,32 +323,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) optsize { ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: li a4, 1 -; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vslide1down.vx v9, v9, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1_2: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; ZVE32F-NEXT: vmv.v.x v8, a0 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 -; ZVE32F-NEXT: li a4, 1 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a4 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 +; ZVE32F-NEXT: li a0, 1 +; ZVE32F-NEXT: vslide1down.vx v9, v9, a0 +; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 ; ZVE32F-NEXT: ret @@ -364,30 +368,32 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 +; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; ZVE32F-NEXT: vmv.v.x v8, a0 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 +; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 ; ZVE32F-NEXT: ret From 307cd883546348cd658d74699915fd48ae01e9a0 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Wed, 14 Feb 2024 10:20:58 -0800 Subject: [PATCH 168/240] [lldb][NFCI] Remove CommandObjectProcessHandle::VerifyCommandOptionValue (#79901) I was refactoring something else but ran into this function. It was somewhat confusing to read through and understand, but it boils down to two steps: - First we try `OptionArgParser::ToBoolean`. If that works, then we're good to go. - Second, we try `llvm::to_integer` to see if it's an integer. If it parses to 0 or 1, we're good. - Failing either of the steps above means we cannot parse it into a bool. Instead of having an integer out param and a bool return value, the interface is better served with an optional -- Either it parses into true or false, or you get back nothing (nullopt). --- lldb/source/Commands/CommandObjectProcess.cpp | 126 ++++++++---------- 1 file changed, 59 insertions(+), 67 deletions(-) diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index c7b874d1979377..722b0e0c376be8 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -1591,26 +1591,6 @@ class CommandObjectProcessHandle : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } - bool VerifyCommandOptionValue(const std::string &option, int &real_value) { - bool okay = true; - bool success = false; - bool tmp_value = OptionArgParser::ToBoolean(option, false, &success); - - if (success && tmp_value) - real_value = 1; - else if (success && !tmp_value) - real_value = 0; - else { - // If the value isn't 'true' or 'false', it had better be 0 or 1. - if (!llvm::to_integer(option, real_value)) - real_value = 3; - if (real_value != 0 && real_value != 1) - okay = false; - } - - return okay; - } - void PrintSignalHeader(Stream &str) { str.Printf("NAME PASS STOP NOTIFY\n"); str.Printf("=========== ===== ===== ======\n"); @@ -1666,33 +1646,52 @@ class CommandObjectProcessHandle : public CommandObjectParsed { // the user's options. ProcessSP process_sp = target.GetProcessSP(); - int stop_action = -1; // -1 means leave the current setting alone - int pass_action = -1; // -1 means leave the current setting alone - int notify_action = -1; // -1 means leave the current setting alone + std::optional stop_action = {}; + std::optional pass_action = {}; + std::optional notify_action = {}; - if (!m_options.stop.empty() && - !VerifyCommandOptionValue(m_options.stop, stop_action)) { - result.AppendError("Invalid argument for command option --stop; must be " - "true or false.\n"); - return; + if (!m_options.stop.empty()) { + bool success = false; + bool value = OptionArgParser::ToBoolean(m_options.stop, false, &success); + if (!success) { + result.AppendError( + "Invalid argument for command option --stop; must be " + "true or false.\n"); + return; + } + + stop_action = value; } - if (!m_options.notify.empty() && - !VerifyCommandOptionValue(m_options.notify, notify_action)) { - result.AppendError("Invalid argument for command option --notify; must " - "be true or false.\n"); - return; + if (!m_options.pass.empty()) { + bool success = false; + bool value = OptionArgParser::ToBoolean(m_options.pass, false, &success); + if (!success) { + result.AppendError( + "Invalid argument for command option --pass; must be " + "true or false.\n"); + return; + } + pass_action = value; } - if (!m_options.pass.empty() && - !VerifyCommandOptionValue(m_options.pass, pass_action)) { - result.AppendError("Invalid argument for command option --pass; must be " - "true or false.\n"); - return; + if (!m_options.notify.empty()) { + bool success = false; + bool value = + OptionArgParser::ToBoolean(m_options.notify, false, &success); + if (!success) { + result.AppendError("Invalid argument for command option --notify; must " + "be true or false.\n"); + return; + } + notify_action = value; + } + + if (!m_options.notify.empty() && !notify_action.has_value()) { } - bool no_actions = (stop_action == -1 && pass_action == -1 - && notify_action == -1); + bool no_actions = (!stop_action.has_value() && !pass_action.has_value() && + !notify_action.has_value()); if (m_options.only_target_values && !no_actions) { result.AppendError("-t is for reporting, not setting, target values."); return; @@ -1730,16 +1729,14 @@ class CommandObjectProcessHandle : public CommandObjectParsed { if (signals_sp) { int32_t signo = signals_sp->GetSignalNumberFromName(arg.c_str()); if (signo != LLDB_INVALID_SIGNAL_NUMBER) { - // Casting the actions as bools here should be okay, because - // VerifyCommandOptionValue guarantees the value is either 0 or 1. - if (stop_action != -1) - signals_sp->SetShouldStop(signo, stop_action); - if (pass_action != -1) { - bool suppress = !pass_action; + if (stop_action.has_value()) + signals_sp->SetShouldStop(signo, *stop_action); + if (pass_action.has_value()) { + bool suppress = !*pass_action; signals_sp->SetShouldSuppress(signo, suppress); } - if (notify_action != -1) - signals_sp->SetShouldNotify(signo, notify_action); + if (notify_action.has_value()) + signals_sp->SetShouldNotify(signo, *notify_action); ++num_signals_set; } else { result.AppendErrorWithFormat("Invalid signal name '%s'\n", @@ -1759,21 +1756,15 @@ class CommandObjectProcessHandle : public CommandObjectParsed { } num_signals_set = num_args; } - auto set_lazy_bool = [] (int action) -> LazyBool { - LazyBool lazy; - if (action == -1) - lazy = eLazyBoolCalculate; - else if (action) - lazy = eLazyBoolYes; - else - lazy = eLazyBoolNo; - return lazy; + auto set_lazy_bool = [](std::optional action) -> LazyBool { + if (!action.has_value()) + return eLazyBoolCalculate; + return (*action) ? eLazyBoolYes : eLazyBoolNo; }; // If there were no actions, we're just listing, don't add the dummy: if (!no_actions) - target.AddDummySignal(arg.ref(), - set_lazy_bool(pass_action), + target.AddDummySignal(arg.ref(), set_lazy_bool(pass_action), set_lazy_bool(notify_action), set_lazy_bool(stop_action)); } @@ -1781,18 +1772,19 @@ class CommandObjectProcessHandle : public CommandObjectParsed { // No signal specified, if any command options were specified, update ALL // signals. But we can't do this without a process since we don't know // all the possible signals that might be valid for this target. - if (((notify_action != -1) || (stop_action != -1) || (pass_action != -1)) - && process_sp) { + if ((notify_action.has_value() || stop_action.has_value() || + pass_action.has_value()) && + process_sp) { if (m_interpreter.Confirm( "Do you really want to update all the signals?", false)) { int32_t signo = signals_sp->GetFirstSignalNumber(); while (signo != LLDB_INVALID_SIGNAL_NUMBER) { - if (notify_action != -1) - signals_sp->SetShouldNotify(signo, notify_action); - if (stop_action != -1) - signals_sp->SetShouldStop(signo, stop_action); - if (pass_action != -1) { - bool suppress = !pass_action; + if (notify_action.has_value()) + signals_sp->SetShouldNotify(signo, *notify_action); + if (stop_action.has_value()) + signals_sp->SetShouldStop(signo, *stop_action); + if (pass_action.has_value()) { + bool suppress = !*pass_action; signals_sp->SetShouldSuppress(signo, suppress); } signo = signals_sp->GetNextSignalNumber(signo); From 16e7d6842ef6a5c904422c6dba034e0888ea8b7b Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 14 Feb 2024 10:34:28 -0800 Subject: [PATCH 169/240] [libc][stdbit] implement stdc_first_trailing_zero (C23) (#81526) --- libc/config/linux/x86_64/entrypoints.txt | 5 +++++ libc/docs/stdbit.rst | 12 +++++----- libc/include/llvm-libc-macros/stdbit-macros.h | 22 +++++++++++++++++++ libc/spec/stdc.td | 10 +++++++-- libc/src/__support/CPP/bit.h | 7 ++++++ libc/src/stdbit/CMakeLists.txt | 1 + .../stdbit/stdc_first_trailing_zero_uc.cpp | 21 ++++++++++++++++++ libc/src/stdbit/stdc_first_trailing_zero_uc.h | 18 +++++++++++++++ .../stdbit/stdc_first_trailing_zero_ui.cpp | 20 +++++++++++++++++ libc/src/stdbit/stdc_first_trailing_zero_ui.h | 18 +++++++++++++++ .../stdbit/stdc_first_trailing_zero_ul.cpp | 21 ++++++++++++++++++ libc/src/stdbit/stdc_first_trailing_zero_ul.h | 18 +++++++++++++++ .../stdbit/stdc_first_trailing_zero_ull.cpp | 21 ++++++++++++++++++ .../src/stdbit/stdc_first_trailing_zero_ull.h | 18 +++++++++++++++ .../stdbit/stdc_first_trailing_zero_us.cpp | 21 ++++++++++++++++++ libc/src/stdbit/stdc_first_trailing_zero_us.h | 18 +++++++++++++++ libc/test/include/stdbit_test.cpp | 15 +++++++++++++ libc/test/src/__support/CPP/bit_test.cpp | 6 +++++ libc/test/src/stdbit/CMakeLists.txt | 1 + .../stdc_first_trailing_zero_uc_test.cpp | 20 +++++++++++++++++ .../stdc_first_trailing_zero_ui_test.cpp | 20 +++++++++++++++++ .../stdc_first_trailing_zero_ul_test.cpp | 20 +++++++++++++++++ .../stdc_first_trailing_zero_ull_test.cpp | 21 ++++++++++++++++++ .../stdc_first_trailing_zero_us_test.cpp | 20 +++++++++++++++++ 24 files changed, 366 insertions(+), 8 deletions(-) create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_uc.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_uc.h create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_ui.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_ui.h create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_ul.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_ul.h create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_ull.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_ull.h create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_us.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_zero_us.h create mode 100644 libc/test/src/stdbit/stdc_first_trailing_zero_uc_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_trailing_zero_ui_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_trailing_zero_ul_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_trailing_zero_ull_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_trailing_zero_us_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index fc30bcf56665c7..d53b225e4d87e2 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -122,6 +122,11 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_first_leading_one_ui libc.src.stdbit.stdc_first_leading_one_ul libc.src.stdbit.stdc_first_leading_one_ull + libc.src.stdbit.stdc_first_trailing_zero_uc + libc.src.stdbit.stdc_first_trailing_zero_us + libc.src.stdbit.stdc_first_trailing_zero_ui + libc.src.stdbit.stdc_first_trailing_zero_ul + libc.src.stdbit.stdc_first_trailing_zero_ull # stdlib.h entrypoints libc.src.stdlib.abs diff --git a/libc/docs/stdbit.rst b/libc/docs/stdbit.rst index 5ff36dcb0f5929..790a747baaa2da 100644 --- a/libc/docs/stdbit.rst +++ b/libc/docs/stdbit.rst @@ -61,11 +61,11 @@ stdc_first_leading_one_us |check| stdc_first_leading_one_ui |check| stdc_first_leading_one_ul |check| stdc_first_leading_one_ull |check| -stdc_first_trailing_zero_uc -stdc_first_trailing_zero_us -stdc_first_trailing_zero_ui -stdc_first_trailing_zero_ul -stdc_first_trailing_zero_ull +stdc_first_trailing_zero_uc |check| +stdc_first_trailing_zero_us |check| +stdc_first_trailing_zero_ui |check| +stdc_first_trailing_zero_ul |check| +stdc_first_trailing_zero_ull |check| stdc_first_trailing_one_uc stdc_first_trailing_one_us stdc_first_trailing_one_ui @@ -120,7 +120,7 @@ stdc_trailing_zeros |check| stdc_trailing_ones |check| stdc_first_leading_zero |check| stdc_first_leading_one |check| -stdc_first_trailing_zero +stdc_first_trailing_zero |check| stdc_first_trailing_one stdc_count_zeros stdc_count_ones diff --git a/libc/include/llvm-libc-macros/stdbit-macros.h b/libc/include/llvm-libc-macros/stdbit-macros.h index 87c590e61e3999..0ec8aef10d3e0a 100644 --- a/libc/include/llvm-libc-macros/stdbit-macros.h +++ b/libc/include/llvm-libc-macros/stdbit-macros.h @@ -101,6 +101,21 @@ inline unsigned stdc_first_leading_one(unsigned long x) { inline unsigned stdc_first_leading_one(unsigned long long x) { return stdc_first_leading_one_ull(x); } +inline unsigned stdc_first_trailing_zero(unsigned char x) { + return stdc_first_trailing_zero_uc(x); +} +inline unsigned stdc_first_trailing_zero(unsigned short x) { + return stdc_first_trailing_zero_us(x); +} +inline unsigned stdc_first_trailing_zero(unsigned x) { + return stdc_first_trailing_zero_ui(x); +} +inline unsigned stdc_first_trailing_zero(unsigned long x) { + return stdc_first_trailing_zero_ul(x); +} +inline unsigned stdc_first_trailing_zero(unsigned long long x) { + return stdc_first_trailing_zero_ull(x); +} #else #define stdc_leading_zeros(x) \ _Generic((x), \ @@ -144,6 +159,13 @@ inline unsigned stdc_first_leading_one(unsigned long long x) { unsigned: stdc_first_leading_one_ui, \ unsigned long: stdc_first_leading_one_ul, \ unsigned long long: stdc_first_leading_one_ull)(x) +#define stdc_first_trailing_zero(x) \ + _Generic((x), \ + unsigned char: stdc_first_trailing_zero_uc, \ + unsigned short: stdc_first_trailing_zero_us, \ + unsigned: stdc_first_trailing_zero_ui, \ + unsigned long: stdc_first_trailing_zero_ul, \ + unsigned long long: stdc_first_trailing_zero_ull)(x) #endif // __cplusplus #endif // __LLVM_LIBC_MACROS_STDBIT_MACROS_H diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 5e87831b907fb5..b97e90d1a8ae34 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -782,7 +782,8 @@ def StdC : StandardSpec<"stdc"> { Macro<"stdc_trailing_zeros">, Macro<"stdc_trailing_ones">, Macro<"stdc_first_leading_zero">, - Macro<"stdc_first_leading_one"> + Macro<"stdc_first_leading_one">, + Macro<"stdc_first_trailing_zero"> ], // Macros [], // Types [], // Enumerations @@ -816,7 +817,12 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"stdc_first_leading_one_us", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_first_leading_one_ui", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_first_leading_one_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_leading_one_ull", RetValSpec, [ArgSpec]> + FunctionSpec<"stdc_first_leading_one_ull", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_zero_uc", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_zero_us", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_zero_ui", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_zero_ul", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_zero_ull", RetValSpec, [ArgSpec]>, ] // Functions >; diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 23e305ab86219e..5afcc875752cff 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -236,6 +236,13 @@ template >> return first_leading_zero(static_cast(~value)); } +template >> +[[nodiscard]] LIBC_INLINE constexpr int first_trailing_zero(T value) { + return value == cpp::numeric_limits::max() + ? 0 + : countr_zero(static_cast(~value)) + 1; +} + } // namespace LIBC_NAMESPACE::cpp #endif // LLVM_LIBC_SRC___SUPPORT_CPP_BIT_H diff --git a/libc/src/stdbit/CMakeLists.txt b/libc/src/stdbit/CMakeLists.txt index 6ee93861b8db40..fa68d4a90dbb7e 100644 --- a/libc/src/stdbit/CMakeLists.txt +++ b/libc/src/stdbit/CMakeLists.txt @@ -5,6 +5,7 @@ set(prefixes trailing_ones first_leading_zero first_leading_one + first_trailing_zero ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/src/stdbit/stdc_first_trailing_zero_uc.cpp b/libc/src/stdbit/stdc_first_trailing_zero_uc.cpp new file mode 100644 index 00000000000000..5825d5d441c591 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_uc.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_trailing_zero_uc ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_zero_uc.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_uc, + (unsigned char value)) { + return static_cast(cpp::first_trailing_zero(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_zero_uc.h b/libc/src/stdbit/stdc_first_trailing_zero_uc.h new file mode 100644 index 00000000000000..242472ae34f2e6 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_uc.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_zero_uc ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UC_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UC_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_zero_uc(unsigned char value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UC_H diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ui.cpp b/libc/src/stdbit/stdc_first_trailing_zero_ui.cpp new file mode 100644 index 00000000000000..3b51b5fa22c324 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_ui.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_first_trailing_zero_ui ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_zero_ui.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_ui, (unsigned value)) { + return static_cast(cpp::first_trailing_zero(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ui.h b/libc/src/stdbit/stdc_first_trailing_zero_ui.h new file mode 100644 index 00000000000000..cc308f762b2b64 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_ui.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_zero_ui ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UI_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UI_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_zero_ui(unsigned value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UI_H diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ul.cpp b/libc/src/stdbit/stdc_first_trailing_zero_ul.cpp new file mode 100644 index 00000000000000..abf122944a76a4 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_ul.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_trailing_zero_ul ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_zero_ul.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_ul, + (unsigned long value)) { + return static_cast(cpp::first_trailing_zero(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ul.h b/libc/src/stdbit/stdc_first_trailing_zero_ul.h new file mode 100644 index 00000000000000..82413373341748 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_ul.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_zero_ul ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UL_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_zero_ul(unsigned long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_UL_H diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ull.cpp b/libc/src/stdbit/stdc_first_trailing_zero_ull.cpp new file mode 100644 index 00000000000000..336e7d6e075ff2 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_ull.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_trailing_zero_ull --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_zero_ull.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_ull, + (unsigned long long value)) { + return static_cast(cpp::first_trailing_zero(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_zero_ull.h b/libc/src/stdbit/stdc_first_trailing_zero_ull.h new file mode 100644 index 00000000000000..3737fc1be2d435 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_ull.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_zero_ull --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_ULL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_ULL_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_zero_ull(unsigned long long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_ULL_H diff --git a/libc/src/stdbit/stdc_first_trailing_zero_us.cpp b/libc/src/stdbit/stdc_first_trailing_zero_us.cpp new file mode 100644 index 00000000000000..b7d05047b2721a --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_us.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_trailing_zero_us ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_zero_us.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_zero_us, + (unsigned short value)) { + return static_cast(cpp::first_trailing_zero(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_zero_us.h b/libc/src/stdbit/stdc_first_trailing_zero_us.h new file mode 100644 index 00000000000000..608b0522969642 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_zero_us.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_zero_us ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_US_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_US_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_zero_us(unsigned short value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ZERO_US_H diff --git a/libc/test/include/stdbit_test.cpp b/libc/test/include/stdbit_test.cpp index c2fbcb8ce2d321..238c2d44fac45a 100644 --- a/libc/test/include/stdbit_test.cpp +++ b/libc/test/include/stdbit_test.cpp @@ -57,6 +57,13 @@ unsigned stdc_first_leading_one_ul(unsigned long) noexcept { return 0xFDU; } unsigned stdc_first_leading_one_ull(unsigned long long) noexcept { return 0xFFU; } +unsigned stdc_first_trailing_zero_uc(unsigned char) noexcept { return 0x0AU; } +unsigned stdc_first_trailing_zero_us(unsigned short) noexcept { return 0x0BU; } +unsigned stdc_first_trailing_zero_ui(unsigned) noexcept { return 0x0CU; } +unsigned stdc_first_trailing_zero_ul(unsigned long) noexcept { return 0x0DU; } +unsigned stdc_first_trailing_zero_ull(unsigned long long) noexcept { + return 0x0FU; +} } #include "include/llvm-libc-macros/stdbit-macros.h" @@ -108,3 +115,11 @@ TEST(LlvmLibcStdbitTest, TypeGenericMacroFirstLeadingOne) { EXPECT_EQ(stdc_first_leading_one(0UL), 0xFDU); EXPECT_EQ(stdc_first_leading_one(0ULL), 0xFFU); } + +TEST(LlvmLibcStdbitTest, TypeGenericMacroFirstTrailingZero) { + EXPECT_EQ(stdc_first_trailing_zero(static_cast(0U)), 0x0AU); + EXPECT_EQ(stdc_first_trailing_zero(static_cast(0U)), 0x0BU); + EXPECT_EQ(stdc_first_trailing_zero(0U), 0x0CU); + EXPECT_EQ(stdc_first_trailing_zero(0UL), 0x0DU); + EXPECT_EQ(stdc_first_trailing_zero(0ULL), 0x0FU); +} diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index a70726d4feb241..914c6ca823c8ef 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -220,4 +220,10 @@ TYPED_TEST(LlvmLibcBitTest, FirstLeadingOne, UnsignedTypes) { cpp::numeric_limits::digits - i); } +TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypes) { + EXPECT_EQ(first_trailing_zero(cpp::numeric_limits::max()), 0); + for (int i = 0U; i != cpp::numeric_limits::digits; ++i) + EXPECT_EQ(first_trailing_zero(~(T(1) << i)), i + 1); +} + } // namespace LIBC_NAMESPACE::cpp diff --git a/libc/test/src/stdbit/CMakeLists.txt b/libc/test/src/stdbit/CMakeLists.txt index e32663f88e8620..d5896ad83dbfab 100644 --- a/libc/test/src/stdbit/CMakeLists.txt +++ b/libc/test/src/stdbit/CMakeLists.txt @@ -7,6 +7,7 @@ set(prefixes trailing_ones first_leading_zero first_leading_one + first_trailing_zero ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/test/src/stdbit/stdc_first_trailing_zero_uc_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_zero_uc_test.cpp new file mode 100644 index 00000000000000..2b17aa6536e669 --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_zero_uc_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_zero_uc -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_zero_uc.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingZeroUcTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_uc(UCHAR_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingZeroUcTest, ZeroHot) { + for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_uc(~(1U << i)), i + 1); +} diff --git a/libc/test/src/stdbit/stdc_first_trailing_zero_ui_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_zero_ui_test.cpp new file mode 100644 index 00000000000000..08366142e2a737 --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_zero_ui_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_zero_ui -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_zero_ui.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingZeroUiTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_ui(UINT_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingZeroUiTest, ZeroHot) { + for (unsigned i = 0U; i != UINT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_ui(~(1U << i)), i + 1); +} diff --git a/libc/test/src/stdbit/stdc_first_trailing_zero_ul_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_zero_ul_test.cpp new file mode 100644 index 00000000000000..0c18cc73ffcc4c --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_zero_ul_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_zero_ul -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_zero_ul.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingZeroUlTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_ul(ULONG_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingZeroUlTest, ZeroHot) { + for (unsigned i = 0U; i != ULONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_ul(~(1UL << i)), i + 1); +} diff --git a/libc/test/src/stdbit/stdc_first_trailing_zero_ull_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_zero_ull_test.cpp new file mode 100644 index 00000000000000..5dce42987e52fa --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_zero_ull_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for stdc_first_trailing_zero_ull ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_zero_ull.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingZeroUllTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_ull(ULLONG_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingZeroUllTest, ZeroHot) { + for (unsigned i = 0U; i != ULLONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_ull(~(1ULL << i)), + i + 1); +} diff --git a/libc/test/src/stdbit/stdc_first_trailing_zero_us_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_zero_us_test.cpp new file mode 100644 index 00000000000000..e370379300e4a4 --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_zero_us_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_zero_us -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_zero_us.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingZeroUsTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_us(USHRT_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingZeroUsTest, ZeroHot) { + for (unsigned i = 0U; i != USHRT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_us(~(1U << i)), i + 1); +} From 6d4ffbdfa8ff90e4ee6081ad8dbb8ec24e982a02 Mon Sep 17 00:00:00 2001 From: Jacob Lambert Date: Wed, 14 Feb 2024 10:39:21 -0800 Subject: [PATCH 170/240] [clang][CodeGen] Shift relink option implementation away from module cloning (#81693) We recently implemented a new option allowing relinking of bitcode modules via the "-mllvm -relink-builtin-bitcode-postop" option. This implementation relied on llvm::CloneModule() in order to pass copies to modules and preserve the original modules for later relinking. However, cloning modules has been found to be prohibitively expensive, significantly increasing compilation time for large bitcode libraries. In this patch, we shift the relink option implementation to instead link the original modules initially, and reload modules from the file system if relinking is requested. This approach results in significantly reduced overhead. We accomplish this by creating a new ReloadModules() routine that can be called from a BackendConsumer class, to mimic the behavior of ASTConsumer's loadLinkModules(), but without access to the CompilerInstance. Because loading the bitcodes from the filesystem requires access to the FileManager class, we also forward a reference to the CompilerInstance class to the BackendConsumer. This mirrors what is already done for several CompilerInstance members, such as TargetOptions and CodeGenOptions. Finally, we needed to add a const specifier to the FileManager::getBufferForFile() routine to allow it to be called using the const reference returned from CompilerInstance::getFileManager() --- clang/include/clang/Basic/FileManager.h | 4 +- clang/lib/Basic/FileManager.cpp | 2 +- clang/lib/CodeGen/BackendConsumer.h | 16 ++- clang/lib/CodeGen/CodeGenAction.cpp | 161 ++++++++++++------------ clang/lib/CodeGen/LinkInModulesPass.cpp | 14 ++- 5 files changed, 109 insertions(+), 88 deletions(-) diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 997c17a0ffcfcc..2245fd78bfc9f0 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -283,7 +283,7 @@ class FileManager : public RefCountedBase { bool RequiresNullTerminator = true); llvm::ErrorOr> getBufferForFile(StringRef Filename, bool isVolatile = false, - bool RequiresNullTerminator = true) { + bool RequiresNullTerminator = true) const { return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile, RequiresNullTerminator); } @@ -291,7 +291,7 @@ class FileManager : public RefCountedBase { private: llvm::ErrorOr> getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile, - bool RequiresNullTerminator); + bool RequiresNullTerminator) const; public: /// Get the 'stat' information for the given \p Path. diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index 6097a27e429d66..cd520a6375e07e 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -547,7 +547,7 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile, llvm::ErrorOr> FileManager::getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile, - bool RequiresNullTerminator) { + bool RequiresNullTerminator) const { if (FileSystemOpts.WorkingDir.empty()) return FS->getBufferForFile(Filename, FileSize, RequiresNullTerminator, isVolatile); diff --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h index 72a814cd43d738..fd0f1984d6c0f7 100644 --- a/clang/lib/CodeGen/BackendConsumer.h +++ b/clang/lib/CodeGen/BackendConsumer.h @@ -34,6 +34,7 @@ class BackendConsumer : public ASTConsumer { const CodeGenOptions &CodeGenOpts; const TargetOptions &TargetOpts; const LangOptions &LangOpts; + const FileManager &FileMgr; std::unique_ptr AsmOutStream; ASTContext *Context; IntrusiveRefCntPtr FS; @@ -74,8 +75,8 @@ class BackendConsumer : public ASTConsumer { const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, - const LangOptions &LangOpts, const std::string &InFile, + const TargetOptions &TargetOpts, const LangOptions &LangOpts, + const FileManager &FileMgr, const std::string &InFile, SmallVector LinkModules, std::unique_ptr OS, llvm::LLVMContext &C, CoverageSourceInfo *CoverageInfo = nullptr); @@ -88,8 +89,8 @@ class BackendConsumer : public ASTConsumer { const HeaderSearchOptions &HeaderSearchOpts, const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, - const LangOptions &LangOpts, llvm::Module *Module, + const TargetOptions &TargetOpts, const LangOptions &LangOpts, + const FileManager &FileMgr, llvm::Module *Module, SmallVector LinkModules, llvm::LLVMContext &C, CoverageSourceInfo *CoverageInfo = nullptr); @@ -111,10 +112,13 @@ class BackendConsumer : public ASTConsumer { void AssignInheritanceModel(CXXRecordDecl *RD) override; void HandleVTable(CXXRecordDecl *RD) override; - - // Links each entry in LinkModules into our module. Returns true on error. + // Links each entry in LinkModules into our module. Returns true on error. bool LinkInModules(llvm::Module *M, bool ShouldLinkFiles = true); + // Load a bitcode module from -mlink-builtin-bitcode option using + // methods from a BackendConsumer instead of CompilerInstance + bool ReloadModules(llvm::Module *M); + /// Get the best possible source location to represent a diagnostic that /// may have associated debug info. const FullSourceLoc getBestLocationFromDebugLoc( diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index f8038497d90a7b..ab08a875e7e9c1 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -109,56 +109,52 @@ static void reportOptRecordError(Error E, DiagnosticsEngine &Diags, }); } -BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr VFS, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, - const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, - const LangOptions &LangOpts, - const std::string &InFile, - SmallVector LinkModules, - std::unique_ptr OS, - LLVMContext &C, - CoverageSourceInfo *CoverageInfo) - : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), - CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS), - LLVMIRGeneration("irgen", "LLVM IR Generation Time"), - LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts, - PPOpts, CodeGenOpts, C, CoverageInfo)), - LinkModules(std::move(LinkModules)) { - TimerIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; +BackendConsumer::BackendConsumer( + BackendAction Action, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr VFS, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, const LangOptions &LangOpts, + const FileManager &FileMgr, const std::string &InFile, + SmallVector LinkModules, + std::unique_ptr OS, LLVMContext &C, + CoverageSourceInfo *CoverageInfo) + : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), + CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), + FileMgr(FileMgr), AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS), + LLVMIRGeneration("irgen", "LLVM IR Generation Time"), + LLVMIRGenerationRefCount(0), + Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts, + PPOpts, CodeGenOpts, C, CoverageInfo)), + LinkModules(std::move(LinkModules)) { + TimerIsEnabled = CodeGenOpts.TimePasses; + llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; + llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; } // This constructor is used in installing an empty BackendConsumer // to use the clang diagnostic handler for IR input files. It avoids // initializing the OS field. -BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr VFS, - const HeaderSearchOptions &HeaderSearchOpts, - const PreprocessorOptions &PPOpts, - const CodeGenOptions &CodeGenOpts, - const TargetOptions &TargetOpts, - const LangOptions &LangOpts, - llvm::Module *Module, - SmallVector LinkModules, - LLVMContext &C, - CoverageSourceInfo *CoverageInfo) - : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), - CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), - Context(nullptr), FS(VFS), - LLVMIRGeneration("irgen", "LLVM IR Generation Time"), - LLVMIRGenerationRefCount(0), - Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, - PPOpts, CodeGenOpts, C, CoverageInfo)), - LinkModules(std::move(LinkModules)), CurLinkModule(Module) { - TimerIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; - llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; +BackendConsumer::BackendConsumer( + BackendAction Action, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr VFS, + const HeaderSearchOptions &HeaderSearchOpts, + const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts, + const TargetOptions &TargetOpts, const LangOptions &LangOpts, + const FileManager &FileMgr, llvm::Module *Module, + SmallVector LinkModules, LLVMContext &C, + CoverageSourceInfo *CoverageInfo) + : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts), + CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts), + FileMgr(FileMgr), Context(nullptr), FS(VFS), + LLVMIRGeneration("irgen", "LLVM IR Generation Time"), + LLVMIRGenerationRefCount(0), + Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts, PPOpts, + CodeGenOpts, C, CoverageInfo)), + LinkModules(std::move(LinkModules)), CurLinkModule(Module) { + TimerIsEnabled = CodeGenOpts.TimePasses; + llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses; + llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun; } llvm::Module* BackendConsumer::getModule() const { @@ -233,9 +229,37 @@ void BackendConsumer::HandleInterestingDecl(DeclGroupRef D) { HandleTopLevelDecl(D); } +bool BackendConsumer::ReloadModules(llvm::Module *M) { + for (const CodeGenOptions::BitcodeFileToLink &F : + CodeGenOpts.LinkBitcodeFiles) { + auto BCBuf = FileMgr.getBufferForFile(F.Filename); + if (!BCBuf) { + Diags.Report(diag::err_cannot_open_file) + << F.Filename << BCBuf.getError().message(); + LinkModules.clear(); + return true; + } + + LLVMContext &Ctx = getModule()->getContext(); + Expected> ModuleOrErr = + getOwningLazyBitcodeModule(std::move(*BCBuf), Ctx); + + if (!ModuleOrErr) { + handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { + Diags.Report(diag::err_cannot_open_file) << F.Filename << EIB.message(); + }); + LinkModules.clear(); + return true; + } + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + } + + return false; // success +} + // Links each entry in LinkModules into our module. Returns true on error. bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) { - for (auto &LM : LinkModules) { assert(LM.Module && "LinkModule does not actually have a module"); @@ -257,37 +281,19 @@ bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) { CurLinkModule = LM.Module.get(); bool Err; - auto DoLink = [&](auto &Mod) { - if (LM.Internalize) { - Err = Linker::linkModules( - *M, std::move(Mod), LM.LinkFlags, - [](llvm::Module &M, const llvm::StringSet<> &GVS) { - internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { - return !GV.hasName() || (GVS.count(GV.getName()) == 0); - }); + if (LM.Internalize) { + Err = Linker::linkModules( + *M, std::move(LM.Module), LM.LinkFlags, + [](llvm::Module &M, const llvm::StringSet<> &GVS) { + internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { + return !GV.hasName() || (GVS.count(GV.getName()) == 0); }); - } else - Err = Linker::linkModules(*M, std::move(Mod), LM.LinkFlags); - }; - - // Create a Clone to move to the linker, which preserves the original - // linking modules, allowing them to be linked again in the future - if (ClRelinkBuiltinBitcodePostop) { - // TODO: If CloneModule() is updated to support cloning of unmaterialized - // modules, we can remove this - if (Error E = CurLinkModule->materializeAll()) - return false; - - std::unique_ptr Clone = llvm::CloneModule(*LM.Module); - - DoLink(Clone); - } - // Otherwise we can link (and clean up) the original modules - else { - DoLink(LM.Module); - } + }); + } else + Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags); } + LinkModules.clear(); return false; // success } @@ -1037,8 +1043,9 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { std::unique_ptr Result(new BackendConsumer( BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), - CI.getTargetOpts(), CI.getLangOpts(), std::string(InFile), - std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo)); + CI.getTargetOpts(), CI.getLangOpts(), CI.getFileManager(), + std::string(InFile), std::move(LinkModules), std::move(OS), *VMContext, + CoverageInfo)); BEConsumer = Result.get(); // Enable generating macro debug info only when debug info is not disabled and @@ -1199,7 +1206,7 @@ void CodeGenAction::ExecuteAction() { BackendConsumer Result(BA, CI.getDiagnostics(), &CI.getVirtualFileSystem(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(), - CI.getLangOpts(), TheModule.get(), + CI.getLangOpts(), CI.getFileManager(), TheModule.get(), std::move(LinkModules), *VMContext, nullptr); // Link in each pending link module. diff --git a/clang/lib/CodeGen/LinkInModulesPass.cpp b/clang/lib/CodeGen/LinkInModulesPass.cpp index 6ce2b94c1db82c..929539cc8f3346 100644 --- a/clang/lib/CodeGen/LinkInModulesPass.cpp +++ b/clang/lib/CodeGen/LinkInModulesPass.cpp @@ -14,6 +14,10 @@ #include "LinkInModulesPass.h" #include "BackendConsumer.h" +#include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" + using namespace llvm; LinkInModulesPass::LinkInModulesPass(clang::BackendConsumer *BC, @@ -21,9 +25,15 @@ LinkInModulesPass::LinkInModulesPass(clang::BackendConsumer *BC, : BC(BC), ShouldLinkFiles(ShouldLinkFiles) {} PreservedAnalyses LinkInModulesPass::run(Module &M, ModuleAnalysisManager &AM) { + if (!BC) + return PreservedAnalyses::all(); + + // Re-load bitcode modules from files + if (BC->ReloadModules(&M)) + report_fatal_error("Bitcode module re-loading failed, aborted!"); - if (BC && BC->LinkInModules(&M, ShouldLinkFiles)) - report_fatal_error("Bitcode module linking failed, compilation aborted!"); + if (BC->LinkInModules(&M, ShouldLinkFiles)) + report_fatal_error("Bitcode module re-linking failed, aborted!"); return PreservedAnalyses::all(); } From 2d5fb27db71b57f299793160181ef28fea5573e7 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 14 Feb 2024 10:50:24 -0800 Subject: [PATCH 171/240] [ubsan] Support static linking with standalone runtime (#80943) The standalone runtime (not -fsanitize-minimal-runtime/-fsanitize-trap=undefined) installs some signal handlers using `real_sigaction`. With static linking (-static/-static-pie), the called `REAL(sigaction)` is null, leading to an immediate segfault, which is confusing (#51538). Fix #51538 by bailing out. `// REQUIRES: librt_has_multf3` from https://reviews.llvm.org/D109709 actually disabled the test because `librt_has_*` features are only for `compiler-rt/test/builtins`. The test does not reproduce for me: libclang_rt.builtins.a or libgcc. Revert the REQUIRES. --- compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp | 5 +++++ compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp b/compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp index 354f847fab7138..68edd3a1b2062e 100644 --- a/compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp +++ b/compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp @@ -66,6 +66,11 @@ void InitializeDeadlySignals() { return; is_initialized = true; InitializeSignalInterceptors(); +#if SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION + // REAL(sigaction_symname) is nullptr in a static link. Bail out. + if (!REAL(sigaction_symname)) + return; +#endif InstallDeadlySignalHandlers(&UBsanOnDeadlySignal); } diff --git a/compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp b/compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp index cd185049567f79..f26b7b868cad62 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp @@ -1,7 +1,7 @@ // REQUIRES: ubsan-standalone // REQUIRES: target={{x86_64.*}} -// REQUIRES: librt_has_multf3 // RUN: %clangxx -fsanitize=bool -static %s -o %t && UBSAN_OPTIONS=handle_segv=0:handle_sigbus=0:handle_sigfpe=0 %run %t 2>&1 | FileCheck %s +// RUN: %run %t 2>&1 | FileCheck %s #include #include From ea06384bf667c635f78660f0bcfaa01372735b99 Mon Sep 17 00:00:00 2001 From: Daniel Hoekwater Date: Wed, 14 Feb 2024 18:58:07 +0000 Subject: [PATCH 172/240] [CodeGen][AArch64] Only split safe blocks in BBSections (#81553) Some types of machine function and machine basic block are unsafe to split on AArch64: basic blocks that contain jump table dispatch or targets (D157124), and blocks that contain inline ASM GOTO blocks or their targets (D158647) all cause issues and have been excluded from Machine Function Splitting on AArch64. These issues are caused by any transformation pass that places same-function basic blocks in different text sections (MachineFunctionSplitter and BasicBlockSections) and must be special-cased in both passes. --- llvm/lib/CodeGen/BasicBlockSections.cpp | 11 +- .../AArch64/basic-block-sections-cold.ll | 51 ++++++++ .../AArch64/basic-block-sections-unsafe.ll | 121 ++++++++++++++++++ 3 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/basic-block-sections-cold.ll create mode 100644 llvm/test/CodeGen/AArch64/basic-block-sections-unsafe.ll diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index eb3f9e7078f1ac..09e45ea5794b76 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -208,9 +208,14 @@ assignSections(MachineFunction &MF, if (I != FuncClusterInfo.end()) { MBB.setSectionID(I->second.ClusterID); } else { - // BB goes into the special cold section if it is not specified in the - // cluster info map. - MBB.setSectionID(MBBSectionID::ColdSectionID); + const TargetInstrInfo &TII = + *MBB.getParent()->getSubtarget().getInstrInfo(); + + if (TII.isMBBSafeToSplitToCold(MBB)) { + // BB goes into the special cold section if it is not specified in the + // cluster info map. + MBB.setSectionID(MBBSectionID::ColdSectionID); + } } } diff --git a/llvm/test/CodeGen/AArch64/basic-block-sections-cold.ll b/llvm/test/CodeGen/AArch64/basic-block-sections-cold.ll new file mode 100644 index 00000000000000..6641ef6a51c144 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/basic-block-sections-cold.ll @@ -0,0 +1,51 @@ +;; Check if basic blocks that don't get unique sections are placed in cold sections. +;; Basic block with id 1 and 2 must be in the cold section. +;; +;; Profile for version 0 +; RUN: echo '!_Z3bazb' > %t1 +; RUN: echo '!!0' >> %t1 +;; +;; Profile for version 1 +; RUN: echo 'v1' > %t2 +; RUN: echo 'f _Z3bazb' >> %t2 +; RUN: echo 'c 0' >> %t2 +;; +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names | FileCheck %s -check-prefix=SECTIONS +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t2 -unique-basic-block-section-names | FileCheck %s -check-prefix=SECTIONS +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s -check-prefix=SPLIT + +define void @_Z3bazb(i1 zeroext %0) nounwind { + br i1 %0, label %2, label %4 + +2: ; preds = %1 + %3 = call i32 @_Z3barv() + br label %6 + +4: ; preds = %1 + %5 = call i32 @_Z3foov() + br label %6 + +6: ; preds = %2, %4 + ret void +} + +declare i32 @_Z3barv() #1 + +declare i32 @_Z3foov() #1 + +; SECTIONS: .section .text.hot._Z3bazb,"ax",@progbits +; SECTIONS: _Z3bazb: +; Check that the basic block with id 1 doesn't get a section. +; SECTIONS-NOT: .section .text{{.*}}._Z3bazb.1,"ax",@progbits,unique +; Check that a single cold section is started here and id 1 and 2 blocks are placed here. +; SECTIONS: .section .text.split._Z3bazb,"ax",@progbits +; SECTIONS: _Z3bazb.cold: +; SECTIONS-NOT: .section .text.hot._Z3bazb._Z3bazb.2,"ax",@progbits,unique +; SECTIONS: .LBB0_2: +; SECTIONS: .size _Z3bazb, .Lfunc_end{{[0-9]}}-_Z3bazb + +; SPLIT: .section .text.unlikely._Z3bazb,"ax",@progbits +; SPLIT-NEXT: _Z3bazb.cold: +; SPLIT-NEXT: bl _Z3barv +; SPLIT: .LBB0_2: +; SPLIT: .LBB_END0_2: diff --git a/llvm/test/CodeGen/AArch64/basic-block-sections-unsafe.ll b/llvm/test/CodeGen/AArch64/basic-block-sections-unsafe.ll new file mode 100644 index 00000000000000..a83a47c9c129c5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/basic-block-sections-unsafe.ll @@ -0,0 +1,121 @@ +;; Check if basic blocks without unique sections are only placed in cold sections if it is safe +;; to do so. +;; +;; Profile for version 0. +; RUN: echo 'v1' > %t1 +; RUN: echo 'f _Z3asm_goto' >> %t1 +; RUN: echo 'c 0' >> %t1 +; RUN: echo 'f _Z3jump_table' >> %t1 +; RUN: echo 'c 0' >> %t1 +; RUN: echo 'f _Z3red_zone' >> %t1 +; RUN: echo 'c 0' >> %t1 +;; +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -function-sections -aarch64-min-jump-table-entries=4 -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s -check-prefix=JUMP-TABLES +; RUN: llc < %s -mtriple=aarch64 -function-sections -basic-block-sections=%t1 -unique-basic-block-section-names -bbsections-cold-text-prefix=".text.unlikely." | FileCheck %s -check-prefix=RED-ZONE + +define void @_Z3asm_goto(i1 zeroext %0, i1 zeroext %1) nounwind { + ;; Check that blocks containing or targeted by asm goto aren't split. + ; CHECK-LABEL: _Z3asm_goto + ; CHECK: .section .text.unlikely._Z3asm_goto,"ax",@progbits + ; CHECK-NEXT: _Z3asm_goto.cold: + ; CHECK-NEXT: bl bam + ; CHECK: .LBB0_4: + ; CHECK: ret + ; CHECK: .LBB_END0_4: + + br i1 %0, label %3, label %5 + +3: ; preds = %2 + %4 = call i32 @bar() + callbr void asm sideeffect "nop", "!i"() #3 + to label %asm.fallthrough [label %5] + + +asm.fallthrough: ; preds = %3 + br label %5 + +5: ; preds = %2, %asm.fallthrough + %6 = call i32 @bar() + br i1 %1, label %7, label %9 + +7: + %8 = call i32 @bam() + br label %9 + +9: ; preds = %7 + ret void +} + +define i32 @_Z3jump_table(i32 %in) nounwind { + ;; Check that a cold block that contains a jump table dispatch or + ;; that is targeted by a jump table is not split. + ; JUMP-TABLES-LABEL: _Z3jump_table + ; JUMP-TABLES: .section .text.unlikely._Z3jump_table,"ax",@progbits + ; JUMP-TABLES-NEXT: _Z3jump_table.cold: + ; JUMP-TABLES-SAME: %common.ret + ; JUMP-TABLES-NOT: b bar + ; JUMP-TABLES-NOT: b baz + ; JUMP-TABLES-NOT: b qux + ; JUMP-TABLES-NOT: b bam + + switch i32 %in, label %common.ret [ + i32 0, label %cold1 + i32 1, label %cold2 + i32 2, label %cold3 + i32 3, label %cold4 + ] + + common.ret: ; preds = %0 + ret i32 0 + + cold1: ; preds = %0 + %1 = tail call i32 @bar() + ret i32 %1 + + cold2: ; preds = %0 + %2 = tail call i32 @baz() + ret i32 %2 + + cold3: ; preds = %0 + %3 = tail call i32 @bam() + ret i32 %3 + + cold4: ; preds = %0 + %4 = tail call i32 @qux() + ret i32 %4 +} + +define i32 @_Z3red_zone(i1 zeroext %0, i32 %a, i32 %b) nounwind { +;; Check that cold blocks in functions with red zones aren't split. +; RED-ZONE-LABEL: _Z3red_zone +; MFS-REDZONE-AARCH64-NOT: _Z3red_zone.cold: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %x = alloca i32, align 4 + + br i1 %0, label %2, label %3 + +2: ; preds = %1 + store i32 %a, ptr %a.addr, align 4 + store i32 %b, ptr %b.addr, align 4 + br label %4 + +3: ; preds = %1 + store i32 %a, ptr %b.addr, align 4 + store i32 %b, ptr %a.addr, align 4 + br label %4 + +4: ; preds = %3, %2 + %tmp = load i32, ptr %a.addr, align 4 + %tmp1 = load i32, ptr %b.addr, align 4 + %add = add nsw i32 %tmp, %tmp1 + store i32 %add, ptr %x, align 4 + %tmp2 = load i32, ptr %x, align 4 + ret i32 %tmp2 +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux() From de16a05af025da99009f314018ac4f361ac6faa4 Mon Sep 17 00:00:00 2001 From: sgundapa Date: Wed, 14 Feb 2024 13:10:18 -0600 Subject: [PATCH 173/240] [Hexagon] Fix zero extension of bit predicates with vtrunehb (#81772) vector extension from v4i1 to v4i8 generates an incorrect word. This patch uses a vtrunehb for truncation to fix the bug. --- llvm/lib/Target/Hexagon/HexagonPatterns.td | 4 +- llvm/test/CodeGen/Hexagon/vector-zext-v4i8.ll | 112 ++++++++++++++++++ 2 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/vector-zext-v4i8.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 9de50b405445c2..ea7c4acd0e8308 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -525,11 +525,11 @@ def Vsplatpi: OutPatFrag<(ops node:$V), (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>; def: Pat<(v2i16 (azext V2I1:$Pu)), - (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>; + (A2_andir (S2_vtrunehb (C2_mask V2I1:$Pu)), (i32 0x00010001))>; def: Pat<(v2i32 (azext V2I1:$Pu)), (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>; def: Pat<(v4i8 (azext V4I1:$Pu)), - (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>; + (A2_andir (S2_vtrunehb (C2_mask V4I1:$Pu)), (i32 0x01010101))>; def: Pat<(v4i16 (azext V4I1:$Pu)), (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>; def: Pat<(v8i8 (azext V8I1:$Pu)), diff --git a/llvm/test/CodeGen/Hexagon/vector-zext-v4i8.ll b/llvm/test/CodeGen/Hexagon/vector-zext-v4i8.ll new file mode 100644 index 00000000000000..4d0e6db22c3ba8 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vector-zext-v4i8.ll @@ -0,0 +1,112 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that when we extract a byte from the result of a mask from predicate +; that the results of the mask all fit in the same word. +; CHECK: [[PRED:p[0-9]+]] = vcmpb.gtu(r{{.*}},#0) +; CHECK: [[REG1:r[0-9]*:[0-9]*]] = mask([[PRED]]) +; CHECK: [[REG2:r[0-9]*]] = vtrunehb([[REG1]]) +; CHECK: {{r[0-9]*}} = extractu([[REG2]],#1,#8) + +target triple = "hexagon" + +%struct.pluto = type { [12 x %struct.pluto.0], [4 x %struct.pluto.0], [2 x %struct.pluto.0], [4 x %struct.pluto.0], [6 x %struct.pluto.0], [2 x [7 x %struct.pluto.0]], [4 x %struct.pluto.0], [3 x [4 x %struct.pluto.0]], [3 x %struct.pluto.0], [3 x %struct.pluto.0] } +%struct.pluto.0 = type { i8, i8 } + +@global = internal unnamed_addr constant [3 x [4 x [2 x i8]]] [[4 x [2 x i8]] [[2 x i8] c"\FAV", [2 x i8] c"\EF_", [2 x i8] c"\FA=", [2 x i8] c"\09-"], [4 x [2 x i8]] [[2 x i8] c"\06E", [2 x i8] c"\F3Z", [2 x i8] c"\004", [2 x i8] c"\08+"], [4 x [2 x i8]] [[2 x i8] c"\FA]", [2 x i8] c"\F2X", [2 x i8] c"\FA,", [2 x i8] c"\047"]], align 8 + +; Function Attrs: nofree noinline norecurse nosync nounwind memory(write) +define dso_local void @eggs(ptr nocapture %arg, ptr nocapture readnone %arg1, i32 %arg2, i32 %arg3, i32 %arg4) local_unnamed_addr #0 { +bb: + %icmp = icmp sgt i32 %arg3, 0 + %select = select i1 %icmp, i32 %arg3, i32 0 + br i1 false, label %bb33, label %bb5 + +bb5: ; preds = %bb + %insertelement = insertelement <4 x i32> poison, i32 %select, i32 0 + %shufflevector = shufflevector <4 x i32> %insertelement, <4 x i32> poison, <4 x i32> zeroinitializer + br label %bb6 + +bb6: ; preds = %bb6, %bb5 + %phi = phi i32 [ 0, %bb5 ], [ %add29, %bb6 ] + %insertelement7 = insertelement <4 x i32> poison, i32 %phi, i32 0 + %shufflevector8 = shufflevector <4 x i32> %insertelement7, <4 x i32> poison, <4 x i32> zeroinitializer + %add = add <4 x i32> %shufflevector8, + %add9 = add i32 %phi, 0 + %getelementptr = getelementptr inbounds [3 x [4 x [2 x i8]]], ptr @global, i32 0, i32 %arg2, i32 %add9, i32 0 + %getelementptr10 = getelementptr inbounds i8, ptr %getelementptr, i32 0 + %bitcast = bitcast ptr %getelementptr10 to ptr + %load = load <8 x i8>, ptr %bitcast, align 1 + %shufflevector11 = shufflevector <8 x i8> %load, <8 x i8> poison, <4 x i32> + %shufflevector12 = shufflevector <8 x i8> %load, <8 x i8> poison, <4 x i32> + %getelementptr13 = getelementptr [3 x [4 x [2 x i8]]], ptr @global, i32 0, i32 %arg2, i32 %add9, i32 1 + %sext = sext <4 x i8> %shufflevector11 to <4 x i32> + %mul = mul nsw <4 x i32> %shufflevector, %sext + %ashr = ashr <4 x i32> %mul, + %sext14 = sext <4 x i8> %shufflevector12 to <4 x i32> + %add15 = add nsw <4 x i32> %ashr, %sext14 + %icmp16 = icmp sgt <4 x i32> %add15, + %select17 = select <4 x i1> %icmp16, <4 x i32> %add15, <4 x i32> + %icmp18 = icmp slt <4 x i32> %select17, + %select19 = select <4 x i1> %icmp18, <4 x i32> %select17, <4 x i32> + %icmp20 = icmp sgt <4 x i32> %select19, + %trunc = trunc <4 x i32> %select19 to <4 x i8> + %add21 = add nsw <4 x i8> %trunc, + %getelementptr22 = getelementptr inbounds %struct.pluto, ptr %arg, i32 0, i32 1, i32 %add9, i32 0 + %sub = sub nsw <4 x i8> , %trunc + %select23 = select <4 x i1> %icmp20, <4 x i8> %add21, <4 x i8> %sub + %getelementptr24 = getelementptr inbounds %struct.pluto, ptr %arg, i32 0, i32 1, i32 %add9, i32 1 + %zext = zext <4 x i1> %icmp20 to <4 x i8> + %getelementptr25 = getelementptr inbounds i8, ptr %getelementptr24, i32 -1 + %bitcast26 = bitcast ptr %getelementptr25 to ptr + %shufflevector27 = shufflevector <4 x i8> %select23, <4 x i8> %zext, <8 x i32> + %shufflevector28 = shufflevector <8 x i8> %shufflevector27, <8 x i8> poison, <8 x i32> + store <8 x i8> %shufflevector28, ptr %bitcast26, align 1 + %add29 = add nuw i32 %phi, 4 + %icmp30 = icmp eq i32 %add29, 4 + br i1 %icmp30, label %bb31, label %bb6 + +bb31: ; preds = %bb6 + %icmp32 = icmp eq i32 4, 4 + br i1 %icmp32, label %bb61, label %bb33 + +bb33: ; preds = %bb31, %bb + %phi34 = phi i32 [ 4, %bb31 ], [ 0, %bb ] + br label %bb35 + +bb35: ; preds = %bb35, %bb33 + %phi36 = phi i32 [ %phi34, %bb33 ], [ %add58, %bb35 ] + %getelementptr37 = getelementptr inbounds [3 x [4 x [2 x i8]]], ptr @global, i32 0, i32 %arg2, i32 %phi36, i32 0 + %load38 = load i8, ptr %getelementptr37, align 2 + %getelementptr39 = getelementptr [3 x [4 x [2 x i8]]], ptr @global, i32 0, i32 %arg2, i32 %phi36, i32 1 + %load40 = load i8, ptr %getelementptr39, align 1 + %sext41 = sext i8 %load38 to i32 + %mul42 = mul nsw i32 %select, %sext41 + %ashr43 = ashr i32 %mul42, 4 + %sext44 = sext i8 %load40 to i32 + %add45 = add nsw i32 %ashr43, %sext44 + %icmp46 = icmp sgt i32 %add45, 1 + %select47 = select i1 %icmp46, i32 %add45, i32 1 + %icmp48 = icmp slt i32 %select47, 126 + %select49 = select i1 %icmp48, i32 %select47, i32 126 + %icmp50 = icmp sgt i32 %select49, 63 + %trunc51 = trunc i32 %select49 to i8 + %add52 = add nsw i8 %trunc51, -64 + %getelementptr53 = getelementptr inbounds %struct.pluto, ptr %arg, i32 0, i32 1, i32 %phi36, i32 0 + %sub54 = sub nsw i8 63, %trunc51 + %select55 = select i1 %icmp50, i8 %add52, i8 %sub54 + store i8 %select55, ptr %getelementptr53, align 1 + %getelementptr56 = getelementptr inbounds %struct.pluto, ptr %arg, i32 0, i32 1, i32 %phi36, i32 1 + %zext57 = zext i1 %icmp50 to i8 + store i8 %zext57, ptr %getelementptr56, align 1 + %add58 = add nuw nsw i32 %phi36, 1 + %icmp59 = icmp eq i32 %add58, 4 + br i1 %icmp59, label %bb60, label %bb35 + +bb60: ; preds = %bb35 + br label %bb61 + +bb61: ; preds = %bb60, %bb31 + ret void +} + +attributes #0 = { nofree noinline norecurse nosync nounwind memory(write) "target-cpu"="hexagonv73" "target-features"="+hvx-length64b,+hvxv73,+v73" } From 6297479ff0808e7c5335ec9ec837513e1cff610f Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 14 Feb 2024 11:10:48 -0800 Subject: [PATCH 174/240] [libc][stdbit] implement stdc_first_trailing_one (C23) (#81768) --- libc/config/linux/x86_64/entrypoints.txt | 5 +++++ libc/docs/stdbit.rst | 12 +++++----- libc/include/llvm-libc-macros/stdbit-macros.h | 22 +++++++++++++++++++ libc/spec/stdc.td | 13 ++++++----- libc/src/__support/CPP/bit.h | 5 +++++ libc/src/stdbit/CMakeLists.txt | 1 + .../src/stdbit/stdc_first_trailing_one_uc.cpp | 21 ++++++++++++++++++ libc/src/stdbit/stdc_first_trailing_one_uc.h | 18 +++++++++++++++ .../src/stdbit/stdc_first_trailing_one_ui.cpp | 20 +++++++++++++++++ libc/src/stdbit/stdc_first_trailing_one_ui.h | 18 +++++++++++++++ .../src/stdbit/stdc_first_trailing_one_ul.cpp | 21 ++++++++++++++++++ libc/src/stdbit/stdc_first_trailing_one_ul.h | 18 +++++++++++++++ .../stdbit/stdc_first_trailing_one_ull.cpp | 21 ++++++++++++++++++ libc/src/stdbit/stdc_first_trailing_one_ull.h | 18 +++++++++++++++ .../src/stdbit/stdc_first_trailing_one_us.cpp | 21 ++++++++++++++++++ libc/src/stdbit/stdc_first_trailing_one_us.h | 18 +++++++++++++++ libc/test/include/stdbit_test.cpp | 15 +++++++++++++ libc/test/src/__support/CPP/bit_test.cpp | 6 +++++ libc/test/src/stdbit/CMakeLists.txt | 1 + .../stdc_first_trailing_one_uc_test.cpp | 20 +++++++++++++++++ .../stdc_first_trailing_one_ui_test.cpp | 20 +++++++++++++++++ .../stdc_first_trailing_one_ul_test.cpp | 20 +++++++++++++++++ .../stdc_first_trailing_one_ull_test.cpp | 20 +++++++++++++++++ .../stdc_first_trailing_one_us_test.cpp | 20 +++++++++++++++++ 24 files changed, 362 insertions(+), 12 deletions(-) create mode 100644 libc/src/stdbit/stdc_first_trailing_one_uc.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_one_uc.h create mode 100644 libc/src/stdbit/stdc_first_trailing_one_ui.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_one_ui.h create mode 100644 libc/src/stdbit/stdc_first_trailing_one_ul.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_one_ul.h create mode 100644 libc/src/stdbit/stdc_first_trailing_one_ull.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_one_ull.h create mode 100644 libc/src/stdbit/stdc_first_trailing_one_us.cpp create mode 100644 libc/src/stdbit/stdc_first_trailing_one_us.h create mode 100644 libc/test/src/stdbit/stdc_first_trailing_one_uc_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_trailing_one_ui_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_trailing_one_ul_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_trailing_one_ull_test.cpp create mode 100644 libc/test/src/stdbit/stdc_first_trailing_one_us_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index d53b225e4d87e2..57b4a1e0f93d4f 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -127,6 +127,11 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_first_trailing_zero_ui libc.src.stdbit.stdc_first_trailing_zero_ul libc.src.stdbit.stdc_first_trailing_zero_ull + libc.src.stdbit.stdc_first_trailing_one_uc + libc.src.stdbit.stdc_first_trailing_one_us + libc.src.stdbit.stdc_first_trailing_one_ui + libc.src.stdbit.stdc_first_trailing_one_ul + libc.src.stdbit.stdc_first_trailing_one_ull # stdlib.h entrypoints libc.src.stdlib.abs diff --git a/libc/docs/stdbit.rst b/libc/docs/stdbit.rst index 790a747baaa2da..3bd83ff70c8924 100644 --- a/libc/docs/stdbit.rst +++ b/libc/docs/stdbit.rst @@ -66,11 +66,11 @@ stdc_first_trailing_zero_us |check| stdc_first_trailing_zero_ui |check| stdc_first_trailing_zero_ul |check| stdc_first_trailing_zero_ull |check| -stdc_first_trailing_one_uc -stdc_first_trailing_one_us -stdc_first_trailing_one_ui -stdc_first_trailing_one_ul -stdc_first_trailing_one_ull +stdc_first_trailing_one_uc |check| +stdc_first_trailing_one_us |check| +stdc_first_trailing_one_ui |check| +stdc_first_trailing_one_ul |check| +stdc_first_trailing_one_ull |check| stdc_count_zeros_uc stdc_count_zeros_us stdc_count_zeros_ui @@ -121,7 +121,7 @@ stdc_trailing_ones |check| stdc_first_leading_zero |check| stdc_first_leading_one |check| stdc_first_trailing_zero |check| -stdc_first_trailing_one +stdc_first_trailing_one |check| stdc_count_zeros stdc_count_ones stdc_has_single_bit diff --git a/libc/include/llvm-libc-macros/stdbit-macros.h b/libc/include/llvm-libc-macros/stdbit-macros.h index 0ec8aef10d3e0a..0c97da96ebba29 100644 --- a/libc/include/llvm-libc-macros/stdbit-macros.h +++ b/libc/include/llvm-libc-macros/stdbit-macros.h @@ -116,6 +116,21 @@ inline unsigned stdc_first_trailing_zero(unsigned long x) { inline unsigned stdc_first_trailing_zero(unsigned long long x) { return stdc_first_trailing_zero_ull(x); } +inline unsigned stdc_first_trailing_one(unsigned char x) { + return stdc_first_trailing_one_uc(x); +} +inline unsigned stdc_first_trailing_one(unsigned short x) { + return stdc_first_trailing_one_us(x); +} +inline unsigned stdc_first_trailing_one(unsigned x) { + return stdc_first_trailing_one_ui(x); +} +inline unsigned stdc_first_trailing_one(unsigned long x) { + return stdc_first_trailing_one_ul(x); +} +inline unsigned stdc_first_trailing_one(unsigned long long x) { + return stdc_first_trailing_one_ull(x); +} #else #define stdc_leading_zeros(x) \ _Generic((x), \ @@ -166,6 +181,13 @@ inline unsigned stdc_first_trailing_zero(unsigned long long x) { unsigned: stdc_first_trailing_zero_ui, \ unsigned long: stdc_first_trailing_zero_ul, \ unsigned long long: stdc_first_trailing_zero_ull)(x) +#define stdc_first_trailing_one(x) \ + _Generic((x), \ + unsigned char: stdc_first_trailing_one_uc, \ + unsigned short: stdc_first_trailing_one_us, \ + unsigned: stdc_first_trailing_one_ui, \ + unsigned long: stdc_first_trailing_one_ul, \ + unsigned long long: stdc_first_trailing_one_ull)(x) #endif // __cplusplus #endif // __LLVM_LIBC_MACROS_STDBIT_MACROS_H diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index b97e90d1a8ae34..9ed94638f522ca 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -783,7 +783,8 @@ def StdC : StandardSpec<"stdc"> { Macro<"stdc_trailing_ones">, Macro<"stdc_first_leading_zero">, Macro<"stdc_first_leading_one">, - Macro<"stdc_first_trailing_zero"> + Macro<"stdc_first_trailing_zero">, + Macro<"stdc_first_trailing_one"> ], // Macros [], // Types [], // Enumerations @@ -818,11 +819,11 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"stdc_first_leading_one_ui", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_first_leading_one_ul", RetValSpec, [ArgSpec]>, FunctionSpec<"stdc_first_leading_one_ull", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_zero_uc", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_zero_us", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_zero_ui", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_zero_ul", RetValSpec, [ArgSpec]>, - FunctionSpec<"stdc_first_trailing_zero_ull", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_one_uc", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_one_us", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_one_ui", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_one_ul", RetValSpec, [ArgSpec]>, + FunctionSpec<"stdc_first_trailing_one_ull", RetValSpec, [ArgSpec]> ] // Functions >; diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 5afcc875752cff..f5e50262371f26 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -243,6 +243,11 @@ template >> : countr_zero(static_cast(~value)) + 1; } +template >> +[[nodiscard]] LIBC_INLINE constexpr int first_trailing_one(T value) { + return value == cpp::numeric_limits::max() ? 0 : countr_zero(value) + 1; +} + } // namespace LIBC_NAMESPACE::cpp #endif // LLVM_LIBC_SRC___SUPPORT_CPP_BIT_H diff --git a/libc/src/stdbit/CMakeLists.txt b/libc/src/stdbit/CMakeLists.txt index fa68d4a90dbb7e..14cc26e206e0d3 100644 --- a/libc/src/stdbit/CMakeLists.txt +++ b/libc/src/stdbit/CMakeLists.txt @@ -6,6 +6,7 @@ set(prefixes first_leading_zero first_leading_one first_trailing_zero + first_trailing_one ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/src/stdbit/stdc_first_trailing_one_uc.cpp b/libc/src/stdbit/stdc_first_trailing_one_uc.cpp new file mode 100644 index 00000000000000..6ed35966be61a0 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_uc.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_trailing_one_uc ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_one_uc.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_uc, + (unsigned char value)) { + return static_cast(cpp::first_trailing_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_one_uc.h b/libc/src/stdbit/stdc_first_trailing_one_uc.h new file mode 100644 index 00000000000000..d733ce850de0fb --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_uc.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_one_uc ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UC_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UC_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_one_uc(unsigned char value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UC_H diff --git a/libc/src/stdbit/stdc_first_trailing_one_ui.cpp b/libc/src/stdbit/stdc_first_trailing_one_ui.cpp new file mode 100644 index 00000000000000..a89083bd49507a --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_ui.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of stdc_first_trailing_one_ui ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_one_ui.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_ui, (unsigned value)) { + return static_cast(cpp::first_trailing_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_one_ui.h b/libc/src/stdbit/stdc_first_trailing_one_ui.h new file mode 100644 index 00000000000000..6a6a5046709aec --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_ui.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_one_ui ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UI_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UI_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_one_ui(unsigned value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UI_H diff --git a/libc/src/stdbit/stdc_first_trailing_one_ul.cpp b/libc/src/stdbit/stdc_first_trailing_one_ul.cpp new file mode 100644 index 00000000000000..f30078d0f5ffaa --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_ul.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_trailing_one_ul ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_one_ul.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_ul, + (unsigned long value)) { + return static_cast(cpp::first_trailing_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_one_ul.h b/libc/src/stdbit/stdc_first_trailing_one_ul.h new file mode 100644 index 00000000000000..09b6a9bbbe34a1 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_ul.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_one_ul ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UL_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_one_ul(unsigned long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_UL_H diff --git a/libc/src/stdbit/stdc_first_trailing_one_ull.cpp b/libc/src/stdbit/stdc_first_trailing_one_ull.cpp new file mode 100644 index 00000000000000..2e526a890cda9c --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_ull.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_trailing_one_ull ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_one_ull.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_ull, + (unsigned long long value)) { + return static_cast(cpp::first_trailing_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_one_ull.h b/libc/src/stdbit/stdc_first_trailing_one_ull.h new file mode 100644 index 00000000000000..3e12a1d7456691 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_ull.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_one_ull --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_ULL_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_ULL_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_one_ull(unsigned long long value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_ULL_H diff --git a/libc/src/stdbit/stdc_first_trailing_one_us.cpp b/libc/src/stdbit/stdc_first_trailing_one_us.cpp new file mode 100644 index 00000000000000..e4c88e0d7906b9 --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_us.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of stdc_first_trailing_one_us ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdbit/stdc_first_trailing_one_us.h" + +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(unsigned, stdc_first_trailing_one_us, + (unsigned short value)) { + return static_cast(cpp::first_trailing_one(value)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdbit/stdc_first_trailing_one_us.h b/libc/src/stdbit/stdc_first_trailing_one_us.h new file mode 100644 index 00000000000000..f380898fc68cde --- /dev/null +++ b/libc/src/stdbit/stdc_first_trailing_one_us.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stdc_first_trailing_one_us ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_US_H +#define LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_US_H + +namespace LIBC_NAMESPACE { + +unsigned stdc_first_trailing_one_us(unsigned short value); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDBIT_STDC_FIRST_TRAILING_ONE_US_H diff --git a/libc/test/include/stdbit_test.cpp b/libc/test/include/stdbit_test.cpp index 238c2d44fac45a..22d5533df1e854 100644 --- a/libc/test/include/stdbit_test.cpp +++ b/libc/test/include/stdbit_test.cpp @@ -64,6 +64,13 @@ unsigned stdc_first_trailing_zero_ul(unsigned long) noexcept { return 0x0DU; } unsigned stdc_first_trailing_zero_ull(unsigned long long) noexcept { return 0x0FU; } +unsigned stdc_first_trailing_one_uc(unsigned char) noexcept { return 0x1AU; } +unsigned stdc_first_trailing_one_us(unsigned short) noexcept { return 0x1BU; } +unsigned stdc_first_trailing_one_ui(unsigned) noexcept { return 0x1CU; } +unsigned stdc_first_trailing_one_ul(unsigned long) noexcept { return 0x1DU; } +unsigned stdc_first_trailing_one_ull(unsigned long long) noexcept { + return 0x1FU; +} } #include "include/llvm-libc-macros/stdbit-macros.h" @@ -123,3 +130,11 @@ TEST(LlvmLibcStdbitTest, TypeGenericMacroFirstTrailingZero) { EXPECT_EQ(stdc_first_trailing_zero(0UL), 0x0DU); EXPECT_EQ(stdc_first_trailing_zero(0ULL), 0x0FU); } + +TEST(LlvmLibcStdbitTest, TypeGenericMacroFirstTrailingOne) { + EXPECT_EQ(stdc_first_trailing_one(static_cast(0U)), 0x1AU); + EXPECT_EQ(stdc_first_trailing_one(static_cast(0U)), 0x1BU); + EXPECT_EQ(stdc_first_trailing_one(0U), 0x1CU); + EXPECT_EQ(stdc_first_trailing_one(0UL), 0x1DU); + EXPECT_EQ(stdc_first_trailing_one(0ULL), 0x1FU); +} diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 914c6ca823c8ef..5d1f451776a5fe 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -226,4 +226,10 @@ TYPED_TEST(LlvmLibcBitTest, FirstTrailingZero, UnsignedTypes) { EXPECT_EQ(first_trailing_zero(~(T(1) << i)), i + 1); } +TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypes) { + EXPECT_EQ(first_trailing_one(cpp::numeric_limits::max()), 0); + for (int i = 0U; i != cpp::numeric_limits::digits; ++i) + EXPECT_EQ(first_trailing_one(T(1) << i), i + 1); +} + } // namespace LIBC_NAMESPACE::cpp diff --git a/libc/test/src/stdbit/CMakeLists.txt b/libc/test/src/stdbit/CMakeLists.txt index d5896ad83dbfab..203f48bda99a44 100644 --- a/libc/test/src/stdbit/CMakeLists.txt +++ b/libc/test/src/stdbit/CMakeLists.txt @@ -8,6 +8,7 @@ set(prefixes first_leading_zero first_leading_one first_trailing_zero + first_trailing_one ) set(suffixes c s i l ll) foreach(prefix IN LISTS prefixes) diff --git a/libc/test/src/stdbit/stdc_first_trailing_one_uc_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_one_uc_test.cpp new file mode 100644 index 00000000000000..ed2b4921cdada4 --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_one_uc_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_one_uc -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_one_uc.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingOneUcTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_uc(UCHAR_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingOneUcTest, OneHot) { + for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_uc(1U << i), i + 1); +} diff --git a/libc/test/src/stdbit/stdc_first_trailing_one_ui_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_one_ui_test.cpp new file mode 100644 index 00000000000000..137c8a42e407df --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_one_ui_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_one_ui -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_one_ui.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingOneUiTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_ui(UINT_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingOneUiTest, OneHot) { + for (unsigned i = 0U; i != UINT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_ui(1U << i), i + 1); +} diff --git a/libc/test/src/stdbit/stdc_first_trailing_one_ul_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_one_ul_test.cpp new file mode 100644 index 00000000000000..3fc1f3f16c60de --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_one_ul_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_one_ul -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_one_ul.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingOneUlTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_ul(ULONG_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingOneUlTest, OneHot) { + for (unsigned i = 0U; i != ULONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_ul(1UL << i), i + 1); +} diff --git a/libc/test/src/stdbit/stdc_first_trailing_one_ull_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_one_ull_test.cpp new file mode 100644 index 00000000000000..5719e09a5120a0 --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_one_ull_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_one_ull ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_one_ull.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingOneUllTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_ull(ULLONG_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingOneUllTest, OneHot) { + for (unsigned i = 0U; i != ULLONG_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_ull(1ULL << i), i + 1); +} diff --git a/libc/test/src/stdbit/stdc_first_trailing_one_us_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_one_us_test.cpp new file mode 100644 index 00000000000000..60021552310bee --- /dev/null +++ b/libc/test/src/stdbit/stdc_first_trailing_one_us_test.cpp @@ -0,0 +1,20 @@ +//===-- Unittests for stdc_first_trailing_one_us -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/limits.h" +#include "src/stdbit/stdc_first_trailing_one_us.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdcFirstTrailingOneUsTest, ALL) { + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_us(USHRT_MAX), 0U); +} + +TEST(LlvmLibcStdcFirstTrailingOneUsTest, OneHot) { + for (unsigned i = 0U; i != USHRT_WIDTH; ++i) + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_us(1U << i), i + 1); +} From c9e8e91acae73c84a30311c6c745361251cf5146 Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich <43973793+ayermolo@users.noreply.github.com> Date: Wed, 14 Feb 2024 11:23:57 -0800 Subject: [PATCH 175/240] [BOLT][DWARF] Fix out of order rangelists/loclists (#81645) GCC can generate rangelists/loclists that are out of order. Fixed so that we don't assert, and instead generate partially optimized list. Through most code paths we do sort rnglists/loclists, but not for loclist for a path where BOLT does not modify a function. Although it's nice to have lists sorted, this implementation shouldn't rely on it. This also fixes an issue if we partially capture a list we would write out *end_of_list in helper function. So tools won't see the rest of the addresses being written out. --- bolt/lib/Core/DebugData.cpp | 45 +- bolt/test/X86/dwarf5-loclist-out-of-order.s | 485 ++++++++++++++++++++ 2 files changed, 504 insertions(+), 26 deletions(-) create mode 100644 bolt/test/X86/dwarf5-loclist-out-of-order.s diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp index 8c3f6bd2052f9e..2942f0b9190fa2 100644 --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -230,7 +230,7 @@ template static bool emitWithBase(raw_ostream &OS, const DebugVector &Entries, DebugAddrWriter &AddrWriter, DWARFUnit &CU, uint32_t &Index, const ListEntry BaseAddressx, - const ListEntry OffsetPair, const ListEntry EndOfList, + const ListEntry OffsetPair, const std::function &Func) { if (Entries.size() < 2) return false; @@ -241,7 +241,9 @@ static bool emitWithBase(raw_ostream &OS, const DebugVector &Entries, const DebugAddressEntry &Entry = Entries[Index]; if (Entry.LowPC == 0) break; - assert(Base <= Entry.LowPC && "Entry base is higher than low PC"); + // In case rnglists or loclists are not sorted. + if (Base > Entry.LowPC) + break; uint32_t StartOffset = Entry.LowPC - Base; uint32_t EndOffset = Entry.HighPC - Base; if (encodeULEB128(EndOffset, TempBuffer) > 2) @@ -266,8 +268,6 @@ static bool emitWithBase(raw_ostream &OS, const DebugVector &Entries, encodeULEB128(OffsetEntry.EndOffset, OS); Func(OffsetEntry.Index); } - support::endian::write(OS, static_cast(EndOfList), - llvm::endianness::little); return true; } @@ -276,19 +276,17 @@ DebugRangeListsSectionWriter::addRanges(DebugAddressRangesVector &Ranges) { std::lock_guard Lock(WriterMutex); RangeEntries.push_back(CurrentOffset); - bool WrittenStartxLength = false; std::sort( Ranges.begin(), Ranges.end(), [](const DebugAddressRange &R1, const DebugAddressRange &R2) -> bool { return R1.LowPC < R2.LowPC; }); for (unsigned I = 0; I < Ranges.size();) { - WrittenStartxLength = false; if (emitWithBase( - *CUBodyStream, Ranges, *AddrWriter, *CU, I, - dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair, - dwarf::DW_RLE_end_of_list, [](uint32_t Index) -> void {})) + DebugAddressRange>(*CUBodyStream, Ranges, *AddrWriter, *CU, + I, dwarf::DW_RLE_base_addressx, + dwarf::DW_RLE_offset_pair, + [](uint32_t Index) -> void {})) continue; const DebugAddressRange &Range = Ranges[I]; @@ -299,12 +297,11 @@ DebugRangeListsSectionWriter::addRanges(DebugAddressRangesVector &Ranges) { encodeULEB128(Index, *CUBodyStream); encodeULEB128(Range.HighPC - Range.LowPC, *CUBodyStream); ++I; - WrittenStartxLength = true; } - if (WrittenStartxLength) - support::endian::write(*CUBodyStream, - static_cast(dwarf::DW_RLE_end_of_list), - llvm::endianness::little); + + support::endian::write(*CUBodyStream, + static_cast(dwarf::DW_RLE_end_of_list), + llvm::endianness::little); CurrentOffset = CUBodyBuffer->size(); return RangeEntries.size() - 1; } @@ -688,7 +685,6 @@ static void writeDWARF5LocList(uint32_t &NumberOfEntries, DIEValue &AttrInfo, } std::vector OffsetsArray; - bool WrittenStartxLength = false; auto writeExpression = [&](uint32_t Index) -> void { const DebugLocationEntry &Entry = LocList[Index]; encodeULEB128(Entry.Expr.size(), LocBodyStream); @@ -696,12 +692,11 @@ static void writeDWARF5LocList(uint32_t &NumberOfEntries, DIEValue &AttrInfo, reinterpret_cast(Entry.Expr.data()), Entry.Expr.size()); }; for (unsigned I = 0; I < LocList.size();) { - WrittenStartxLength = false; if (emitWithBase( - LocBodyStream, LocList, AddrWriter, CU, I, - dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair, - dwarf::DW_LLE_end_of_list, writeExpression)) + DebugLocationEntry>(LocBodyStream, LocList, AddrWriter, CU, + I, dwarf::DW_LLE_base_addressx, + dwarf::DW_LLE_offset_pair, + writeExpression)) continue; const DebugLocationEntry &Entry = LocList[I]; @@ -713,13 +708,11 @@ static void writeDWARF5LocList(uint32_t &NumberOfEntries, DIEValue &AttrInfo, encodeULEB128(Entry.HighPC - Entry.LowPC, LocBodyStream); writeExpression(I); ++I; - WrittenStartxLength = true; } - if (WrittenStartxLength) - support::endian::write(LocBodyStream, - static_cast(dwarf::DW_LLE_end_of_list), - llvm::endianness::little); + support::endian::write(LocBodyStream, + static_cast(dwarf::DW_LLE_end_of_list), + llvm::endianness::little); } void DebugLoclistWriter::addList(DIEBuilder &DIEBldr, DIE &Die, diff --git a/bolt/test/X86/dwarf5-loclist-out-of-order.s b/bolt/test/X86/dwarf5-loclist-out-of-order.s new file mode 100644 index 00000000000000..acd0bfa5bbef2b --- /dev/null +++ b/bolt/test/X86/dwarf5-loclist-out-of-order.s @@ -0,0 +1,485 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections --skip-funcs=main +# RUN: llvm-dwarfdump --show-form --verbose --debug-loclists --debug-addr %t.bolt > %t.txt +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t.txt +# RUN: cat %t.txt | FileCheck --check-prefix=POSTCHECK %s + +## Tests to make sure BOLT handles correctly locations that are out of order, and the function is not being processed. + +# POSTCHECK: DW_LLE_base_addressx +# POSTCHECK-NEXT: DW_LLE_offset_pair +# POSTCHECK-NEXT: DW_LLE_offset_pair +# POSTCHECK-NEXT: DW_LLE_startx_length +# POSTCHECK-NEXT: DW_LLE_end_of_list +# POSTCHECK: DW_LLE_base_addressx +# POSTCHECK-NEXT: DW_LLE_offset_pair +# POSTCHECK-NEXT: DW_LLE_offset_pair +# POSTCHECK-NEXT: DW_LLE_end_of_list + +# POSTCHECK: Addrs: [ +# POSTCHECK-NEXT: 0x[[#%.16x,ADDR:]] +# POSTCHECK-NEXT: 0x +# POSTCHECK-NEXT: 0x +# POSTCHECK-NEXT: 0x[[#%.16x,ADDR1:]] +# POSTCHECK-NEXT: 0x +# POSTCHECK-NEXT: 0x + +# POSTCHECK: DW_TAG_formal_parameter +# POSTCHECK: DW_TAG_formal_parameter +# POSTCHECK-NEXT: DW_AT_location +# POSTCHECK-NEXT: [0x[[#ADDR1]], 0x[[#ADDR1 + 0x1a]]): DW_OP_reg3 RBX +# POSTCHECK-NEXT: [0x[[#ADDR1 + 0x1a]], 0x[[#ADDR1 + 0x1d]]): DW_OP_entry_value(DW_OP_reg5 RDI), DW_OP_stack_value +# POSTCHECK-NEXT: [0x[[#ADDR]], 0x[[#ADDR + 0x12]]): DW_OP_reg5 RDI) + +## clang++ main.cpp -fno-inline-functions -g2 -O2 -S +## void use(int * x) { +## *x += 4; +## } +## int main(int argc, char *argv[]) { +## int x = argc; +## use(&x); +## x = x + argc; +## use(&x); +## return x; +## } + +## Test was manually modified to re-order locations. + .text + .file "main.cpp" + .globl _Z3usePi # -- Begin function _Z3usePi + .p2align 4, 0x90 + .type _Z3usePi,@function +_Z3usePi: # @_Z3usePi +.Lfunc_begin0: + .file 0 "/repro" "main.cpp" md5 0xe24a1d6afb5e23ce0028f1f33bc08cd7 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: use:x <- $rdi + .loc 0 2 8 prologue_end # main.cpp:2:8 + addl $4, (%rdi) + .loc 0 3 1 # main.cpp:3:1 + retq +.Ltmp0: +.Lfunc_end0: + .size _Z3usePi, .Lfunc_end0-_Z3usePi + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 4 0 # main.cpp:4:0 + .cfi_startproc +# %bb.0: # %entry + #DEBUG_VALUE: main:argc <- $edi + #DEBUG_VALUE: main:argv <- $rsi + pushq %r14 + .cfi_def_cfa_offset 16 + pushq %rbx + .cfi_def_cfa_offset 24 + pushq %rax + .cfi_def_cfa_offset 32 + .cfi_offset %rbx, -24 + .cfi_offset %r14, -16 + movl %edi, %ebx +.Ltmp1: + .loc 0 5 7 prologue_end # main.cpp:5:7 + movl %edi, 4(%rsp) + leaq 4(%rsp), %r14 + .loc 0 6 3 # main.cpp:6:3 + movq %r14, %rdi +.Ltmp2: + #DEBUG_VALUE: main:argc <- $ebx + callq _Z3usePi +.Ltmp3: + #DEBUG_VALUE: main:argv <- [DW_OP_LLVM_entry_value 1] $rsi + .loc 0 7 5 # main.cpp:7:5 + addl %ebx, 4(%rsp) + .loc 0 8 3 # main.cpp:8:3 + movq %r14, %rdi + callq _Z3usePi +.Ltmp4: + .loc 0 9 10 # main.cpp:9:10 + movl 4(%rsp), %eax + .loc 0 9 3 epilogue_begin is_stmt 0 # main.cpp:9:3 + addq $8, %rsp + .cfi_def_cfa_offset 24 + popq %rbx +.Ltmp5: + #DEBUG_VALUE: main:argc <- [DW_OP_LLVM_entry_value 1] $edi + .cfi_def_cfa_offset 16 + popq %r14 + .cfi_def_cfa_offset 8 + retq +.Ltmp6: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .section .debug_loclists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 2 # Offset entry count +.Lloclists_table_base0: + .long .Ldebug_loc0-.Lloclists_table_base0 + .long .Ldebug_loc1-.Lloclists_table_base0 +.Ldebug_loc0: + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp2-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp5-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 83 # super-register DW_OP_reg3 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp5-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end1-.Lfunc_begin0 # ending offset + .byte 4 # Loc expr size + .byte 163 # DW_OP_entry_value + .byte 1 # 1 + .byte 85 # super-register DW_OP_reg5 + .byte 159 # DW_OP_stack_value + .byte 4 # DW_LLE_offset_pair #manually moved out of order + .uleb128 .Lfunc_begin1-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp2-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 85 # super-register DW_OP_reg5 + .byte 0 # DW_LLE_end_of_list +.Ldebug_loc1: + .byte 4 # DW_LLE_offset_pair + .uleb128 .Lfunc_begin1-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp3-.Lfunc_begin0 # ending offset + .byte 1 # Loc expr size + .byte 84 # DW_OP_reg4 + .byte 4 # DW_LLE_offset_pair + .uleb128 .Ltmp3-.Lfunc_begin0 # starting offset + .uleb128 .Lfunc_end1-.Lfunc_begin0 # ending offset + .byte 4 # Loc expr size + .byte 163 # DW_OP_entry_value + .byte 1 # 1 + .byte 84 # DW_OP_reg4 + .byte 159 # DW_OP_stack_value + .byte 0 # DW_LLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .ascii "\214\001" # DW_AT_loclists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 34 # DW_FORM_loclistx + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 72 # DW_TAG_call_site + .byte 1 # DW_CHILDREN_yes + .byte 127 # DW_AT_call_origin + .byte 19 # DW_FORM_ref4 + .byte 125 # DW_AT_call_return_pc + .byte 27 # DW_FORM_addrx + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 73 # DW_TAG_call_site_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 126 # DW_AT_call_value + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x91 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lloclists_table_base0 # DW_AT_loclists_base + .byte 2 # Abbrev [2] 0x27:0x17 DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_call_all_calls + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 3 # Abbrev [3] 0x33:0xa DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 85 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 137 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x3e:0x47 DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_call_all_calls + .byte 5 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 133 # DW_AT_type + # DW_AT_external + .byte 5 # Abbrev [5] 0x4d:0x9 DW_TAG_formal_parameter + .byte 0 # DW_AT_location + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 133 # DW_AT_type + .byte 5 # Abbrev [5] 0x56:0x9 DW_TAG_formal_parameter + .byte 1 # DW_AT_location + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .long 142 # DW_AT_type + .byte 6 # Abbrev [6] 0x5f:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 4 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .long 133 # DW_AT_type + .byte 7 # Abbrev [7] 0x6a:0xd DW_TAG_call_site + .long 39 # DW_AT_call_origin + .byte 2 # DW_AT_call_return_pc + .byte 8 # Abbrev [8] 0x70:0x6 DW_TAG_call_site_parameter + .byte 1 # DW_AT_location + .byte 85 + .byte 2 # DW_AT_call_value + .byte 126 + .byte 0 + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x77:0xd DW_TAG_call_site + .long 39 # DW_AT_call_origin + .byte 3 # DW_AT_call_return_pc + .byte 8 # Abbrev [8] 0x7d:0x6 DW_TAG_call_site_parameter + .byte 1 # DW_AT_location + .byte 85 + .byte 2 # DW_AT_call_value + .byte 126 + .byte 0 + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 9 # Abbrev [9] 0x85:0x4 DW_TAG_base_type + .byte 6 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 10 # Abbrev [10] 0x89:0x5 DW_TAG_pointer_type + .long 133 # DW_AT_type + .byte 10 # Abbrev [10] 0x8e:0x5 DW_TAG_pointer_type + .long 147 # DW_AT_type + .byte 10 # Abbrev [10] 0x93:0x5 DW_TAG_pointer_type + .long 152 # DW_AT_type + .byte 9 # Abbrev [9] 0x98:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 48 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 19.0.0git (git@github.com:ayermolo/llvm-project.git a1e412af2bf4bf613021f72205f249ab2469f08b)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=108 +.Linfo_string2: + .asciz "/repro" # string offset=117 +.Linfo_string3: + .asciz "_Z3usePi" # string offset=161 +.Linfo_string4: + .asciz "use" # string offset=170 +.Linfo_string5: + .asciz "main" # string offset=174 +.Linfo_string6: + .asciz "int" # string offset=179 +.Linfo_string7: + .asciz "x" # string offset=183 +.Linfo_string8: + .asciz "argc" # string offset=185 +.Linfo_string9: + .asciz "argv" # string offset=190 +.Linfo_string10: + .asciz "char" # string offset=195 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 + .quad .Ltmp3 + .quad .Ltmp4 +.Ldebug_addr_end0: + .ident "clang version 19.0.0git (git@github.com:ayermolo/llvm-project.git a1e412af2bf4bf613021f72205f249ab2469f08b)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: From 7d28f19f68c82b65cf1180b170f33d6f82422d64 Mon Sep 17 00:00:00 2001 From: Jacob Lambert Date: Wed, 14 Feb 2024 11:25:58 -0800 Subject: [PATCH 176/240] [clang][CodeGen] Add missing error check (#81777) Add missing error check. This resolves "error: variable 'Err' set but not used" warnings --- clang/lib/CodeGen/CodeGenAction.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index ab08a875e7e9c1..bb9aaba025fa59 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -291,6 +291,9 @@ bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) { }); } else Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags); + + if (Err) + return true; } LinkModules.clear(); From 0eedc85baad495fa916d1da7b20db93a29b443e1 Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Wed, 14 Feb 2024 14:26:55 -0500 Subject: [PATCH 177/240] [libc][stdfix] Add FXRep helper class for fixed point types. (#81272) --- libc/src/__support/CMakeLists.txt | 2 + libc/src/__support/fixed_point/CMakeLists.txt | 8 + libc/src/__support/fixed_point/fx_rep.h | 175 ++++++++++++++++++ 3 files changed, 185 insertions(+) create mode 100644 libc/src/__support/fixed_point/CMakeLists.txt create mode 100644 libc/src/__support/fixed_point/fx_rep.h diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 013627788940d8..1a4b3e9a2145c0 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -280,3 +280,5 @@ add_subdirectory(threads) add_subdirectory(File) add_subdirectory(HashTable) + +add_subdirectory(fixed_point) diff --git a/libc/src/__support/fixed_point/CMakeLists.txt b/libc/src/__support/fixed_point/CMakeLists.txt new file mode 100644 index 00000000000000..644cbff37aaade --- /dev/null +++ b/libc/src/__support/fixed_point/CMakeLists.txt @@ -0,0 +1,8 @@ +add_header_library( + fx_rep + HDRS + fx_rep.h + DEPENDS + libc.include.llvm-libc-macros.stdfix_macros + libc.src.__support.macros.attributes +) diff --git a/libc/src/__support/fixed_point/fx_rep.h b/libc/src/__support/fixed_point/fx_rep.h new file mode 100644 index 00000000000000..88cba3c95c6656 --- /dev/null +++ b/libc/src/__support/fixed_point/fx_rep.h @@ -0,0 +1,175 @@ +//===-- Utility class to manipulate fixed point numbers. --*- C++ -*-=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_FIXEDPOINT_FXREP_H +#define LLVM_LIBC_SRC___SUPPORT_FIXEDPOINT_FXREP_H + +#include "include/llvm-libc-macros/stdfix-macros.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR + +#ifdef LIBC_COMPILER_HAS_FIXED_POINT + +namespace LIBC_NAMESPACE::fixed_point { + +template struct FXRep; + +template <> struct FXRep { + using Type = short _Fract; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = 0; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SFRACT_FBIT; + LIBC_INLINE static constexpr Type MIN() { return SFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return SFRACT_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0HR; } + LIBC_INLINE static constexpr Type EPS() { return SFRACT_EPSILON; } +}; + +template <> struct FXRep { + using Type = unsigned short fract; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 0; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = 0; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = USFRACT_FBIT; + LIBC_INLINE static constexpr Type MIN() { return USFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return USFRACT_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0UHR; } + LIBC_INLINE static constexpr Type EPS() { return USFRACT_EPSILON; } +}; + +template <> struct FXRep { + using Type = fract; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = 0; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = FRACT_FBIT; + LIBC_INLINE static constexpr Type MIN() { return FRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return FRACT_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0R; } + LIBC_INLINE static constexpr Type EPS() { return FRACT_EPSILON; } +}; + +template <> struct FXRep { + using Type = unsigned fract; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 0; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = 0; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = UFRACT_FBIT; + LIBC_INLINE static constexpr Type MIN() { return UFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return UFRACT_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0UR; } + LIBC_INLINE static constexpr Type EPS() { return UFRACT_EPSILON; } +}; + +template <> struct FXRep { + using Type = long fract; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = 0; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = LFRACT_FBIT; + LIBC_INLINE static constexpr Type MIN() { return LFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return LFRACT_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0LR; } + LIBC_INLINE static constexpr Type EPS() { return LFRACT_EPSILON; } +}; + +template <> struct FXRep { + using Type = unsigned long fract; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 0; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = 0; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = ULFRACT_FBIT; + LIBC_INLINE static constexpr Type MIN() { return ULFRACT_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0ULR; } + LIBC_INLINE static constexpr Type EPS() { return ULFRACT_EPSILON; } +}; + +template <> struct FXRep { + using Type = short accum; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = SACCUM_IBIT; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SACCUM_FBIT; + LIBC_INLINE static constexpr Type MIN() { return SACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return SACCUM_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0HK; } + LIBC_INLINE static constexpr Type EPS() { return SACCUM_EPSILON; } +}; + +template <> struct FXRep { + using Type = unsigned short accum; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 0; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = UACCUM_IBIT; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = USACCUM_FBIT; + LIBC_INLINE static constexpr Type MIN() { return USACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return USACCUM_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0UHK; } + LIBC_INLINE static constexpr Type EPS() { return USACCUM_EPSILON; } +}; + +template <> struct FXRep { + using Type = accum; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = ACCUM_IBIT; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = ACCUM_FBIT; + LIBC_INLINE static constexpr Type MIN() { return ACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ACCUM_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0K; } + LIBC_INLINE static constexpr Type EPS() { return ACCUM_EPSILON; } +}; + +template <> struct FXRep { + using Type = unsigned accum; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 0; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = UACCUM_IBIT; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = UACCUM_FBIT; + LIBC_INLINE static constexpr Type MIN() { return UACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return UACCUM_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0UK; } + LIBC_INLINE static constexpr Type EPS() { return UACCUM_EPSILON; } +}; + +template <> struct FXRep { + using Type = long accum; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = LACCUM_IBIT; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = LACCUM_FBIT; + LIBC_INLINE static constexpr Type MIN() { return LACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return LACCUM_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0LK; } + LIBC_INLINE static constexpr Type EPS() { return LACCUM_EPSILON; } +}; + +template <> struct FXRep { + using Type = unsigned long accum; + LIBC_INLINE_VAR static constexpr int SIGN_LEN = 0; + LIBC_INLINE_VAR static constexpr int INTEGRAL_LEN = ULACCUM_IBIT; + LIBC_INLINE_VAR static constexpr int FRACTION_LEN = ULACCUM_FBIT; + LIBC_INLINE static constexpr Type MIN() { return ULACCUM_MIN; } + LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MIN; } + LIBC_INLINE static constexpr Type ZERO() { return 0.0ULK; } + LIBC_INLINE static constexpr Type EPS() { return ULACCUM_EPSILON; } +}; + +template <> struct FXRep : FXRep {}; +template <> struct FXRep : FXRep {}; +template <> struct FXRep : FXRep {}; +template <> +struct FXRep : FXRep {}; +template <> struct FXRep : FXRep {}; +template <> +struct FXRep : FXRep {}; + +template <> struct FXRep : FXRep {}; +template <> struct FXRep : FXRep {}; +template <> struct FXRep : FXRep {}; +template <> +struct FXRep : FXRep {}; +template <> struct FXRep : FXRep {}; +template <> +struct FXRep : FXRep {}; + +} // namespace LIBC_NAMESPACE::fixed_point + +#endif // LIBC_COMPILER_HAS_FIXED_POINT + +#endif // LLVM_LIBC_SRC___SUPPORT_FIXEDPOINT_FXREP_H From 9b80ab4332bbe336ab8b9f2082eadf6b8d223150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix-Antoine=20Constantin?= <60141446+felix642@users.noreply.github.com> Date: Wed, 14 Feb 2024 14:30:21 -0500 Subject: [PATCH 178/240] =?UTF-8?q?[clang-tidy]=C2=A0Removed=20redundant-i?= =?UTF-8?q?nline-specifier=20warning=20on=20static=20data=20members=20(#81?= =?UTF-8?q?423)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated the check to ignore point static data members with in class initializer since removing the inline specifier would generate a compilation error Fixes #80684 --- .../readability/RedundantInlineSpecifierCheck.cpp | 10 +++++++--- clang-tools-extra/docs/ReleaseNotes.rst | 4 ++++ .../readability/redundant-inline-specifier.cpp | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.cpp index 0e8d17d4442478..1693e5c5e9cd45 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantInlineSpecifierCheck.cpp @@ -88,10 +88,14 @@ void RedundantInlineSpecifierCheck::registerMatchers(MatchFinder *Finder) { this); if (getLangOpts().CPlusPlus17) { + const auto IsPartOfRecordDecl = hasAncestor(recordDecl()); Finder->addMatcher( - varDecl(isInlineSpecified(), - anyOf(isInternalLinkage(StrictMode), - allOf(isConstexpr(), hasAncestor(recordDecl())))) + varDecl( + isInlineSpecified(), + anyOf(allOf(isInternalLinkage(StrictMode), + unless(allOf(hasInitializer(expr()), IsPartOfRecordDecl, + isStaticStorageClass()))), + allOf(isConstexpr(), IsPartOfRecordDecl))) .bind("var_decl"), this); } diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index f2fba9aa1450d6..2f874d17da430d 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -164,6 +164,10 @@ Changes in existing checks `AllowStringArrays` option, enabling the exclusion of array types with deduced length initialized from string literals. +- Improved :doc:`readability-redundant-inline-specifier + ` check to properly + emit warnings for static data member with an in-class initializer. + Removed checks ^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-inline-specifier.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-inline-specifier.cpp index cdd98d8fdc20f5..14f9e88f7e7218 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-inline-specifier.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-inline-specifier.cpp @@ -135,3 +135,17 @@ INLINE_MACRO() #define INLINE_KW inline INLINE_KW void fn10() { } + +namespace { +class A +{ +public: + static inline float test = 3.0F; + static inline double test2 = 3.0; + static inline int test3 = 3; + + static inline float test4; + // CHECK-MESSAGES-STRICT: :[[@LINE-1]]:10: warning: variable 'test4' has inline specifier but is implicitly inlined [readability-redundant-inline-specifier] + // CHECK-FIXES-STRICT: static float test4; +}; +} From d592c8ec8f7138dcbde6f0890d048e59cba95041 Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Wed, 14 Feb 2024 11:38:52 -0800 Subject: [PATCH 179/240] Reapply "[mlir][vector] Drop inner unit dims for transfer ops on dynamic shapes." (#80712) (#81778) This reverts commit b4c7152eb4f7971c111e3e2f60b55892def58d5d. Downstream regression due to another issue that this PR exposes. We have identified the work-items to fix the new issue here: https://github.com/openxla/iree/issues/16406 Co-authored-by: Han-Chung Wang --- .../Vector/Transforms/VectorTransforms.cpp | 14 ++++++++------ ...tor-transfer-collapse-inner-most-dims.mlir | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index 53ae138d1e43a0..74dd1dfaca0da8 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -1237,7 +1237,7 @@ class DropInnerMostUnitDimsTransferRead return failure(); auto srcType = dyn_cast(readOp.getSource().getType()); - if (!srcType || !srcType.hasStaticShape()) + if (!srcType) return failure(); if (!readOp.getPermutationMap().isMinorIdentity()) @@ -1261,19 +1261,21 @@ class DropInnerMostUnitDimsTransferRead targetType.getElementType()); auto loc = readOp.getLoc(); + SmallVector sizes = + memref::getMixedSizes(rewriter, loc, readOp.getSource()); + SmallVector offsets(srcType.getRank(), + rewriter.getIndexAttr(0)); + SmallVector strides(srcType.getRank(), + rewriter.getIndexAttr(1)); MemRefType resultMemrefType = getMemRefTypeWithDroppingInnerDims(rewriter, srcType, dimsToDrop); - SmallVector offsets(srcType.getRank(), 0); - SmallVector strides(srcType.getRank(), 1); - ArrayAttr inBoundsAttr = readOp.getInBounds() ? rewriter.getArrayAttr( readOp.getInBoundsAttr().getValue().drop_back(dimsToDrop)) : ArrayAttr(); Value rankedReducedView = rewriter.create( - loc, resultMemrefType, readOp.getSource(), offsets, srcType.getShape(), - strides); + loc, resultMemrefType, readOp.getSource(), offsets, sizes, strides); auto permMap = getTransferMinorIdentityMap( cast(rankedReducedView.getType()), resultTargetVecType); Value result = rewriter.create( diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir index 750879df129b14..3984f17f9e8cdb 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir @@ -16,6 +16,25 @@ func.func @contiguous_inner_most_view(%in: memref<1x1x8x1xf32, strided<[3072, 8, // ----- +func.func @contiguous_outer_dyn_inner_most_view(%in: memref>) -> vector<1x8x1xf32>{ + %c0 = arith.constant 0 : index + %cst = arith.constant 0.0 : f32 + %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref>, vector<1x8x1xf32> + return %0 : vector<1x8x1xf32> +} +// CHECK: func @contiguous_outer_dyn_inner_most_view( +// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]] +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[D0:.+]] = memref.dim %[[SRC]], %[[C0]] +// CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]][0, 0, 0, 0] [%[[D0]], 1, 8, 1] [1, 1, 1, 1] +// CHECK-SAME: memref> to memref> +// CHECK: %[[VEC:.+]] = vector.transfer_read %[[SRC_0]] +// CHECK-SAME: memref>, vector<1x8xf32> +// CHECK: %[[RESULT:.+]] = vector.shape_cast %[[VEC]] +// CHECK: return %[[RESULT]] + +// ----- + func.func @contiguous_inner_most_dim(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<8x1xf32>) { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 From 1301bc46aea14297478bd13bcacff429e2a18c04 Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Wed, 14 Feb 2024 14:44:09 -0500 Subject: [PATCH 180/240] [libc] Add is_fixed_point type trait. (#81263) --- libc/include/llvm-libc-macros/stdfix-macros.h | 2 +- libc/src/__support/CPP/CMakeLists.txt | 2 + libc/src/__support/CPP/type_traits.h | 1 + .../CPP/type_traits/is_fixed_point.h | 46 +++++++++++++++++++ .../llvm-project-overlay/libc/BUILD.bazel | 1 + 5 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 libc/src/__support/CPP/type_traits/is_fixed_point.h diff --git a/libc/include/llvm-libc-macros/stdfix-macros.h b/libc/include/llvm-libc-macros/stdfix-macros.h index 7cb74adc3999fe..9c83dbc8ef5463 100644 --- a/libc/include/llvm-libc-macros/stdfix-macros.h +++ b/libc/include/llvm-libc-macros/stdfix-macros.h @@ -11,7 +11,7 @@ #ifdef __clang__ #if (!defined(__cplusplus) || (__clang_major__ >= 18)) -// _Fract and _Accum types are avaiable +// _Fract and _Accum types are available #define LIBC_COMPILER_HAS_FIXED_POINT #endif // __cplusplus #endif // __clang__ diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt index f10bb936047eb9..d747412791bd8e 100644 --- a/libc/src/__support/CPP/CMakeLists.txt +++ b/libc/src/__support/CPP/CMakeLists.txt @@ -122,6 +122,7 @@ add_header_library( type_traits/is_convertible.h type_traits/is_destructible.h type_traits/is_enum.h + type_traits/is_fixed_point.h type_traits/is_floating_point.h type_traits/is_function.h type_traits/is_integral.h @@ -155,6 +156,7 @@ add_header_library( libc.src.__support.macros.attributes libc.src.__support.macros.config libc.src.__support.macros.properties.float + libc.include.llvm-libc-macros.stdfix_macros ) add_header_library( diff --git a/libc/src/__support/CPP/type_traits.h b/libc/src/__support/CPP/type_traits.h index 1eb2f34ebee37f..697cf79d6ccc59 100644 --- a/libc/src/__support/CPP/type_traits.h +++ b/libc/src/__support/CPP/type_traits.h @@ -28,6 +28,7 @@ #include "src/__support/CPP/type_traits/is_convertible.h" #include "src/__support/CPP/type_traits/is_destructible.h" #include "src/__support/CPP/type_traits/is_enum.h" +#include "src/__support/CPP/type_traits/is_fixed_point.h" #include "src/__support/CPP/type_traits/is_floating_point.h" #include "src/__support/CPP/type_traits/is_function.h" #include "src/__support/CPP/type_traits/is_integral.h" diff --git a/libc/src/__support/CPP/type_traits/is_fixed_point.h b/libc/src/__support/CPP/type_traits/is_fixed_point.h new file mode 100644 index 00000000000000..317ba39748b7de --- /dev/null +++ b/libc/src/__support/CPP/type_traits/is_fixed_point.h @@ -0,0 +1,46 @@ +//===-- is_fixed_point type_traits ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_IS_FIXED_POINT_H +#define LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_IS_FIXED_POINT_H + +#include "src/__support/CPP/type_traits/is_same.h" +#include "src/__support/CPP/type_traits/remove_cv.h" +#include "src/__support/macros/attributes.h" + +#include "include/llvm-libc-macros/stdfix-macros.h" + +namespace LIBC_NAMESPACE::cpp { + +// is_fixed_point +#ifdef LIBC_COMPILER_HAS_FIXED_POINT +template struct is_fixed_point { +private: + template + LIBC_INLINE static constexpr bool __is_unqualified_any_of() { + return (... || is_same_v, Args>); + } + +public: + LIBC_INLINE_VAR static constexpr bool value = __is_unqualified_any_of< + T, short fract, fract, long fract, unsigned short fract, unsigned fract, + unsigned long fract, short accum, accum, long accum, unsigned short accum, + unsigned accum, unsigned long accum, short sat fract, sat fract, + long sat fract, unsigned short sat fract, unsigned sat fract, + unsigned long sat fract, short sat accum, sat accum, long sat accum, + unsigned short sat accum, unsigned sat accum, unsigned long sat accum>(); +}; +#else +template struct is_fixed_point : false_type {}; +#endif // LIBC_COMPILER_HAS_FIXED_POINT + +template +LIBC_INLINE_VAR constexpr bool is_fixed_point_v = is_fixed_point::value; + +} // namespace LIBC_NAMESPACE::cpp + +#endif // LLVM_LIBC_SRC___SUPPORT_CPP_TYPE_TRAITS_IS_INTEGRAL_H diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index fde2bac746f4f8..64e788eae34ebd 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -296,6 +296,7 @@ libc_support_library( "src/__support/CPP/type_traits/is_convertible.h", "src/__support/CPP/type_traits/is_destructible.h", "src/__support/CPP/type_traits/is_enum.h", + "src/__support/CPP/type_traits/is_fixed_point.h", "src/__support/CPP/type_traits/is_floating_point.h", "src/__support/CPP/type_traits/is_function.h", "src/__support/CPP/type_traits/is_integral.h", From 5992b3272b29e071f6f5a4807a4e0c23e88c310d Mon Sep 17 00:00:00 2001 From: Kevin Joseph Date: Wed, 14 Feb 2024 11:59:21 -0800 Subject: [PATCH 181/240] [clangd] Clean formatting modernize-use-override (#81435) When applying the recommended fix for the "modernize-use-override" clang-tidy diagnostic there was a stray whitespace. This PR fixes that. Resolves https://github.com/clangd/clangd/issues/1704 --- .../clang-tidy/modernize/UseOverrideCheck.cpp | 12 ++++-- .../clangd/unittests/DiagnosticsTests.cpp | 40 +++++++++++++++++++ clang-tools-extra/docs/ReleaseNotes.rst | 4 ++ .../checkers/modernize/use-override.cpp | 5 +++ 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp index e348968b325a5a..fd5bd9f0b181b1 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "UseOverrideCheck.h" +#include "../utils/LexerUtils.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/Lexer.h" @@ -228,9 +229,14 @@ void UseOverrideCheck::check(const MatchFinder::MatchResult &Result) { if (HasVirtual) { for (Token Tok : Tokens) { if (Tok.is(tok::kw_virtual)) { - Diag << FixItHint::CreateRemoval(CharSourceRange::getTokenRange( - Tok.getLocation(), Tok.getLocation())); - break; + std::optional NextToken = + utils::lexer::findNextTokenIncludingComments( + Tok.getEndLoc(), Sources, getLangOpts()); + if (NextToken.has_value()) { + Diag << FixItHint::CreateRemoval(CharSourceRange::getCharRange( + Tok.getLocation(), NextToken->getLocation())); + break; + } } } } diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index f302dcf5f09db0..4839879e1b78c8 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -898,6 +898,46 @@ TEST(DiagnosticTest, ClangTidySelfContainedDiags) { withFix(equalToFix(ExpectedDFix)))))); } +TEST(DiagnosticTest, ClangTidySelfContainedDiagsFormatting) { + Annotations Main(R"cpp( + class Interface { + public: + virtual void Reset1() = 0; + virtual void Reset2() = 0; + }; + class A : public Interface { + // This will be marked by clangd to use override instead of virtual + $virtual1[[virtual ]]void $Reset1[[Reset1]]()$override1[[]]; + $virtual2[[virtual ]]/**/void $Reset2[[Reset2]]()$override2[[]]; + }; + )cpp"); + TestTU TU = TestTU::withCode(Main.code()); + TU.ClangTidyProvider = + addTidyChecks("cppcoreguidelines-explicit-virtual-functions,"); + clangd::Fix const ExpectedFix1{ + "prefer using 'override' or (rarely) 'final' " + "instead of 'virtual'", + {TextEdit{Main.range("override1"), " override"}, + TextEdit{Main.range("virtual1"), ""}}}; + clangd::Fix const ExpectedFix2{ + "prefer using 'override' or (rarely) 'final' " + "instead of 'virtual'", + {TextEdit{Main.range("override2"), " override"}, + TextEdit{Main.range("virtual2"), ""}}}; + // Note that in the Fix we expect the "virtual" keyword and the following + // whitespace to be deleted + EXPECT_THAT(TU.build().getDiagnostics(), + ifTidyChecks(UnorderedElementsAre( + AllOf(Diag(Main.range("Reset1"), + "prefer using 'override' or (rarely) 'final' " + "instead of 'virtual'"), + withFix(equalToFix(ExpectedFix1))), + AllOf(Diag(Main.range("Reset2"), + "prefer using 'override' or (rarely) 'final' " + "instead of 'virtual'"), + withFix(equalToFix(ExpectedFix2)))))); +} + TEST(DiagnosticsTest, Preprocessor) { // This looks like a preamble, but there's an #else in the middle! // Check that: diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 2f874d17da430d..a1b95d2a2020fe 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -164,6 +164,10 @@ Changes in existing checks `AllowStringArrays` option, enabling the exclusion of array types with deduced length initialized from string literals. +- Improved :doc:`modernize-use-override + ` check to also remove any trailing + whitespace when deleting the ``virtual`` keyword. + - Improved :doc:`readability-redundant-inline-specifier ` check to properly emit warnings for static data member with an in-class initializer. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp index 55f226be70869e..89d1aa48c46a3c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp @@ -27,6 +27,7 @@ struct Base { virtual void f() = 0; virtual void f2() const = 0; virtual void g() = 0; + virtual void g2() = 0; virtual void j() const; virtual MustUseResultObject k(); @@ -126,6 +127,10 @@ struct SimpleCases : public Base { virtual void t() throw(); // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer using // CHECK-FIXES: {{^}} void t() throw() override; + + virtual /* */ void g2(); + // CHECK-MESSAGES: :[[@LINE-1]]:33: warning: prefer using 'override' or (rarely) 'final' instead of 'virtual' + // CHECK-FIXES: {{^}} /* */ void g2() override; }; // CHECK-MESSAGES-NOT: warning: From a78d13d0786bc81058ee9aaa7d1c854ee19cee48 Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich <43973793+ayermolo@users.noreply.github.com> Date: Wed, 14 Feb 2024 12:22:53 -0800 Subject: [PATCH 182/240] [LLVM][DWARF] Change .debug_names abbrev to be an index (#81200) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the discussion in https://github.com/llvm/llvm-project/pull/80229 changed implementation to align with how .debug_abbrev is handled. So that .debug_names abbrev tag is a monotonically increasing index. This allows for tools like LLDB to access it in constant time using array like data structure. clang-19 debug build before change
 [41] .debug_names PROGBITS 0000000000000000 8f9e0350 137fdbe0 00 0 0 4 after change [41] .debug_names PROGBITS 0000000000000000 8f9e0350 125bfdec 00 0 0 4 Reduction ~19.1MB --- llvm/include/llvm/CodeGen/AccelTable.h | 49 +++++-- llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 130 +++++++----------- .../test/DebugInfo/X86/debug-names-dwarf64.ll | 10 +- llvm/test/DebugInfo/X86/debug-names-types.ll | 42 +++--- .../ARM/dwarf5-dwarf4-combination-macho.test | 8 +- 5 files changed, 120 insertions(+), 119 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h index e6a661696354b7..6ee817a7124dc3 100644 --- a/llvm/include/llvm/CodeGen/AccelTable.h +++ b/llvm/include/llvm/CodeGen/AccelTable.h @@ -275,11 +275,6 @@ struct DenseMapInfo : DenseMapInfo {}; /// emitDWARF5AccelTable function. class DWARF5AccelTableData : public AccelTableData { public: - struct AttributeEncoding { - dwarf::Index Index; - dwarf::Form Form; - }; - static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); } DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, @@ -289,7 +284,7 @@ class DWARF5AccelTableData : public AccelTableData { const unsigned DieTag, const unsigned UnitID, const bool IsTU = false) : OffsetVal(DieOffset), ParentOffset(DefiningParentOffset), - DieTag(DieTag), UnitID(UnitID), IsTU(IsTU) {} + DieTag(DieTag), AbbrevNumber(0), IsTU(IsTU), UnitID(UnitID) {} #ifndef NDEBUG void print(raw_ostream &OS) const override; @@ -330,6 +325,12 @@ class DWARF5AccelTableData : public AccelTableData { return OffsetAndUnitID(*ParentOffset, getUnitID()); } + /// Sets AbbrevIndex for an Entry. + void setAbbrevNumber(uint16_t AbbrevNum) { AbbrevNumber = AbbrevNum; } + + /// Returns AbbrevIndex for an Entry. + uint16_t getAbbrevNumber() const { return AbbrevNumber; } + /// If `Die` has a non-null parent and the parent is not a declaration, /// return its offset. static std::optional getDefiningParentDieOffset(const DIE &Die); @@ -338,12 +339,42 @@ class DWARF5AccelTableData : public AccelTableData { std::variant OffsetVal; std::optional ParentOffset; uint32_t DieTag : 16; - uint32_t UnitID : 15; + uint32_t AbbrevNumber : 15; uint32_t IsTU : 1; - + uint32_t UnitID; uint64_t order() const override { return getDieOffset(); } }; +class DebugNamesAbbrev : public FoldingSetNode { +public: + uint32_t DieTag; + uint32_t Number; + struct AttributeEncoding { + dwarf::Index Index; + dwarf::Form Form; + }; + DebugNamesAbbrev(uint32_t DieTag) : DieTag(DieTag) {} + /// Add attribute encoding to an abbreviation. + void addAttribute(const DebugNamesAbbrev::AttributeEncoding &Attr) { + AttrVect.push_back(Attr); + } + /// Set abbreviation tag index. + void setNumber(uint32_t AbbrevNumber) { Number = AbbrevNumber; } + /// Get abbreviation tag index. + uint32_t getNumber() const { return Number; } + /// Get DIE Tag. + uint32_t getDieTag() const { return DieTag; } + /// Used to gather unique data for the abbreviation folding set. + void Profile(FoldingSetNodeID &ID) const; + /// Returns attributes for an abbreviation. + const SmallVector &getAttributes() const { + return AttrVect; + } + +private: + SmallVector AttrVect; +}; + struct TypeUnitMetaInfo { // Symbol for start of the TU section or signature if this is SplitDwarf. std::variant LabelOrSignature; @@ -358,7 +389,7 @@ class DWARF5AccelTable : public AccelTable { public: struct UnitIndexAndEncoding { unsigned Index; - DWARF5AccelTableData::AttributeEncoding Encoding; + DebugNamesAbbrev::AttributeEncoding Encoding; }; /// Returns type units that were constructed. const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 1024aabf2ab0f6..230d7add6d37cf 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -208,8 +208,13 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { }; Header Header; - DenseMap> - Abbreviations; + /// FoldingSet that uniques the abbreviations. + FoldingSet AbbreviationsSet; + /// Vector containing DebugNames abbreviations for iteration in order. + SmallVector AbbreviationsVector; + /// The bump allocator to use when creating DIEAbbrev objects in the uniqued + /// storage container. + BumpPtrAllocator Alloc; ArrayRef> CompUnits; ArrayRef> TypeUnits; llvm::function_ref( @@ -234,7 +239,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { void emitEntry( const DWARF5AccelTableData &Entry, const DenseMap &DIEOffsetToAccelEntryLabel, - DenseSet &EmittedAccelEntrySymbols) const; + DenseSet &EmittedAccelEntrySymbols); void emitData(); public: @@ -370,7 +375,7 @@ void AppleAccelTableWriter::emit() const { DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, const bool IsTU) - : OffsetVal(&Die), DieTag(Die.getTag()), UnitID(UnitID), IsTU(IsTU) {} + : OffsetVal(&Die), DieTag(Die.getTag()), IsTU(IsTU), UnitID(UnitID) {} void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) { assert(CompUnitCount > 0 && "Index must have at least one CU."); @@ -409,51 +414,6 @@ DWARF5AccelTableData::getDefiningParentDieOffset(const DIE &Die) { return {}; } -enum IdxParentEncoding : uint8_t { - NoIndexedParent = 0, /// Parent information present but parent isn't indexed. - Ref4 = 1, /// Parent information present and parent is indexed. - NoParent = 2, /// Parent information missing. -}; - -static uint32_t constexpr NumBitsIdxParent = 2; - -uint8_t encodeIdxParent(const std::optional MaybeParentForm) { - if (!MaybeParentForm) - return NoParent; - switch (*MaybeParentForm) { - case dwarf::Form::DW_FORM_flag_present: - return NoIndexedParent; - case dwarf::Form::DW_FORM_ref4: - return Ref4; - default: - // This is not crashing on bad input: we should only reach this if the - // internal compiler logic is faulty; see getFormForIdxParent. - llvm_unreachable("Bad form for IDX_parent"); - } -} - -static uint32_t constexpr ParentBitOffset = dwarf::DW_IDX_type_hash; -static uint32_t constexpr TagBitOffset = ParentBitOffset + NumBitsIdxParent; -static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) { - return AbbrvTag >> TagBitOffset; -} - -/// Constructs a unique AbbrevTag that captures what a DIE accesses. -/// Using this tag we can emit a unique abbreviation for each DIE. -static uint32_t constructAbbreviationTag( - const unsigned Tag, - const std::optional &EntryRet, - std::optional MaybeParentForm) { - uint32_t AbbrvTag = 0; - if (EntryRet) - AbbrvTag |= 1 << EntryRet->Encoding.Index; - AbbrvTag |= 1 << dwarf::DW_IDX_die_offset; - AbbrvTag |= 1 << dwarf::DW_IDX_parent; - AbbrvTag |= encodeIdxParent(MaybeParentForm) << ParentBitOffset; - AbbrvTag |= Tag << TagBitOffset; - return AbbrvTag; -} - static std::optional getFormForIdxParent(const DenseSet &IndexedOffsets, std::optional ParentOffset) { @@ -467,26 +427,42 @@ getFormForIdxParent(const DenseSet &IndexedOffsets, return dwarf::Form::DW_FORM_flag_present; } +void DebugNamesAbbrev::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(DieTag); + for (const DebugNamesAbbrev::AttributeEncoding &Enc : AttrVect) { + ID.AddInteger(Enc.Index); + ID.AddInteger(Enc.Form); + } +} + void Dwarf5AccelTableWriter::populateAbbrevsMap() { for (auto &Bucket : Contents.getBuckets()) { for (auto *Hash : Bucket) { for (auto *Value : Hash->getValues()) { std::optional EntryRet = getIndexForEntry(*Value); - unsigned Tag = Value->getDieTag(); std::optional MaybeParentForm = getFormForIdxParent( IndexedOffsets, Value->getParentDieOffsetAndUnitID()); - uint32_t AbbrvTag = - constructAbbreviationTag(Tag, EntryRet, MaybeParentForm); - if (Abbreviations.count(AbbrvTag) == 0) { - SmallVector UA; - if (EntryRet) - UA.push_back(EntryRet->Encoding); - UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); - if (MaybeParentForm) - UA.push_back({dwarf::DW_IDX_parent, *MaybeParentForm}); - Abbreviations.try_emplace(AbbrvTag, UA); + DebugNamesAbbrev Abbrev(Value->getDieTag()); + if (EntryRet) + Abbrev.addAttribute(EntryRet->Encoding); + Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); + if (MaybeParentForm) + Abbrev.addAttribute({dwarf::DW_IDX_parent, *MaybeParentForm}); + FoldingSetNodeID ID; + Abbrev.Profile(ID); + void *InsertPos; + if (DebugNamesAbbrev *Existing = + AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) { + Value->setAbbrevNumber(Existing->getNumber()); + continue; } + DebugNamesAbbrev *NewAbbrev = + new (Alloc) DebugNamesAbbrev(std::move(Abbrev)); + AbbreviationsVector.push_back(NewAbbrev); + NewAbbrev->setNumber(AbbreviationsVector.size()); + AbbreviationsSet.InsertNode(NewAbbrev, InsertPos); + Value->setAbbrevNumber(NewAbbrev->getNumber()); } } } @@ -536,14 +512,13 @@ void Dwarf5AccelTableWriter::emitStringOffsets() const { void Dwarf5AccelTableWriter::emitAbbrevs() const { Asm->OutStreamer->emitLabel(AbbrevStart); - for (const auto &Abbrev : Abbreviations) { + for (const DebugNamesAbbrev *Abbrev : AbbreviationsVector) { Asm->OutStreamer->AddComment("Abbrev code"); - uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first); - assert(Tag != 0); - Asm->emitULEB128(Abbrev.first); - Asm->OutStreamer->AddComment(dwarf::TagString(Tag)); - Asm->emitULEB128(Tag); - for (const auto &AttrEnc : Abbrev.second) { + Asm->emitULEB128(Abbrev->getNumber()); + Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev->getDieTag())); + Asm->emitULEB128(Abbrev->getDieTag()); + for (const DebugNamesAbbrev::AttributeEncoding &AttrEnc : + Abbrev->getAttributes()) { Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data()); Asm->emitULEB128(AttrEnc.Form, dwarf::FormEncodingString(AttrEnc.Form).data()); @@ -558,21 +533,15 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const { void Dwarf5AccelTableWriter::emitEntry( const DWARF5AccelTableData &Entry, const DenseMap &DIEOffsetToAccelEntryLabel, - DenseSet &EmittedAccelEntrySymbols) const { + DenseSet &EmittedAccelEntrySymbols) { + unsigned AbbrevIndex = Entry.getAbbrevNumber() - 1; + assert(AbbrevIndex < AbbreviationsVector.size() && + "Entry abbrev index is outside of abbreviations vector range."); + DebugNamesAbbrev *Abbrev = AbbreviationsVector[AbbrevIndex]; std::optional EntryRet = getIndexForEntry(Entry); std::optional MaybeParentOffset = Entry.getParentDieOffsetAndUnitID(); - std::optional MaybeParentForm = - getFormForIdxParent(IndexedOffsets, MaybeParentOffset); - uint32_t AbbrvTag = - constructAbbreviationTag(Entry.getDieTag(), EntryRet, MaybeParentForm); - auto AbbrevIt = Abbreviations.find(AbbrvTag); - assert(AbbrevIt != Abbreviations.end() && - "Why wasn't this abbrev generated?"); - assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() && - "Invalid Tag"); - auto EntrySymbolIt = DIEOffsetToAccelEntryLabel.find(Entry.getDieOffsetAndUnitID()); assert(EntrySymbolIt != DIEOffsetToAccelEntryLabel.end()); @@ -584,9 +553,10 @@ void Dwarf5AccelTableWriter::emitEntry( if (EmittedAccelEntrySymbols.insert(EntrySymbol).second) Asm->OutStreamer->emitLabel(EntrySymbol); - Asm->emitULEB128(AbbrevIt->first, "Abbreviation code"); + Asm->emitULEB128(Entry.getAbbrevNumber(), "Abbreviation code"); - for (const auto &AttrEnc : AbbrevIt->second) { + for (const DebugNamesAbbrev::AttributeEncoding &AttrEnc : + Abbrev->getAttributes()) { Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index)); switch (AttrEnc.Index) { case dwarf::DW_IDX_compile_unit: diff --git a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll index c15e2ad1d56b0c..9a5fd073358733 100644 --- a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll +++ b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll @@ -30,11 +30,6 @@ ; CHECK-NEXT: CU[0]: 0x00000000 ; CHECK-NEXT: ] ; CHECK-NEXT: Abbreviations [ -; CHECK-NEXT: Abbreviation [[ABBREV_LABEL:0x[0-9a-f]*]] { -; CHECK-NEXT: Tag: DW_TAG_label -; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 -; CHECK-NEXT: DW_IDX_parent: DW_FORM_ref4 -; CHECK-NEXT: } ; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] { ; CHECK-NEXT: Tag: DW_TAG_base_type ; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 @@ -50,6 +45,11 @@ ; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 ; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present ; CHECK-NEXT: } +; CHECK-NEXT: Abbreviation [[ABBREV_LABEL:0x[0-9a-f]*]] { +; CHECK-NEXT: Tag: DW_TAG_label +; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +; CHECK-NEXT: DW_IDX_parent: DW_FORM_ref4 +; CHECK-NEXT: } ; CHECK-NEXT: ] ; CHECK-NEXT: Bucket 0 [ ; CHECK-NEXT: Name 1 { diff --git a/llvm/test/DebugInfo/X86/debug-names-types.ll b/llvm/test/DebugInfo/X86/debug-names-types.ll index f41bb5524b9c33..ff0d4d52c1f071 100644 --- a/llvm/test/DebugInfo/X86/debug-names-types.ll +++ b/llvm/test/DebugInfo/X86/debug-names-types.ll @@ -37,20 +37,14 @@ ; CHECK-NEXT: LocalTU[0]: 0x00000000 ; CHECK-NEXT: ] ; CHECK: Abbreviations [ -; CHECK-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] { -; CHECK-NEXT: Tag: DW_TAG_structure_type -; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1 -; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 -; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present -; CHECK-NEXT: } -; CHECK-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] { +; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] { ; CHECK-NEXT: Tag: DW_TAG_base_type -; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1 ; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 ; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present ; CHECK-NEXT: } -; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] { -; CHECK-NEXT: Tag: DW_TAG_base_type +; CHECK-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] { +; CHECK-NEXT: Tag: DW_TAG_structure_type +; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1 ; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 ; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present ; CHECK-NEXT: } @@ -64,6 +58,12 @@ ; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 ; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present ; CHECK-NEXT: } +; CHECK-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] { +; CHECK-NEXT: Tag: DW_TAG_base_type +; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1 +; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present +; CHECK-NEXT: } ; CHECK-NEXT: ] ; CHECK-NEXT: Bucket 0 [ ; CHECK-NEXT: Name 1 { @@ -130,7 +130,7 @@ ; CHECK-SPLIT: Foreign TU count: 1 ; CHECK-SPLIT-NEXT: Bucket count: 4 ; CHECK-SPLIT-NEXT: Name count: 4 -; CHECK-SPLIT-NEXT: Abbreviations table size: 0x32 +; CHECK-SPLIT-NEXT: Abbreviations table size: 0x2D ; CHECK-SPLIT-NEXT: Augmentation: 'LLVM0700' ; CHECK-SPLIT-NEXT: } ; CHECK-SPLIT-NEXT: Compilation Unit offsets [ @@ -140,20 +140,14 @@ ; CHECK-SPLIT-NEXT: ForeignTU[0]: 0x675d23e4f33235f2 ; CHECK-SPLIT-NEXT: ] ; CHECK-SPLIT-NEXT: Abbreviations [ -; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] { -; CHECK-SPLIT-NEXT: Tag: DW_TAG_structure_type -; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1 -; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 -; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present -; CHECK-SPLIT-NEXT: } -; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] { +; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] { ; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type -; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1 ; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 ; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present ; CHECK-SPLIT-NEXT: } -; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] { -; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type +; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] { +; CHECK-SPLIT-NEXT: Tag: DW_TAG_structure_type +; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1 ; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 ; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present ; CHECK-SPLIT-NEXT: } @@ -167,6 +161,12 @@ ; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 ; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present ; CHECK-SPLIT-NEXT: } +; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] { +; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type +; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1 +; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4 +; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present +; CHECK-SPLIT-NEXT: } ; CHECK-SPLIT-NEXT: ] ; CHECK-SPLIT-NEXT: Bucket 0 [ ; CHECK-SPLIT-NEXT: Name 1 { diff --git a/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test b/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test index 5a37b4247b5bf3..fb15f46ce0b245 100644 --- a/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test +++ b/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test @@ -221,8 +221,8 @@ CHECK-NEXT: Name Index @ 0x0 { CHECK-NEXT: Header { ; FIXME: when the parallel dwarf linker is able to generate DW_IDX_parent, ; these headers should be the same. -WITH-PARENTS-NEXT: Length: 0xC8 -NO-PARENTS-NEXT: Length: 0xC4 +WITH-PARENTS-NEXT: Length: 0xC0 +NO-PARENTS-NEXT: Length: 0xBC CHECK-NEXT: Format: DWARF32 CHECK-NEXT: Version: 5 CHECK-NEXT: CU count: 2 @@ -230,7 +230,7 @@ CHECK-NEXT: Local TU count: 0 CHECK-NEXT: Foreign TU count: 0 CHECK-NEXT: Bucket count: 5 CHECK-NEXT: Name count: 5 -WITH-PARENTS-NEXT: Abbreviations table size: 0x17 -NO-PARENTS-NEXT: Abbreviations table size: 0x13 +WITH-PARENTS-NEXT: Abbreviations table size: 0x15 +NO-PARENTS-NEXT: Abbreviations table size: 0x11 CHECK-NEXT: Augmentation: 'LLVM0700' CHECK-NEXT: } From c007fbb19879f9b597b47ae772c53e53cdc65f29 Mon Sep 17 00:00:00 2001 From: YunQiang Su Date: Thu, 15 Feb 2024 04:48:55 +0800 Subject: [PATCH 183/240] MipsAsmParser/O32: Don't add redundant $ to $-prefixed symbol in the la macro (#80644) When parsing the `la` macro, we add a duplicate `$` prefix in `getOrCreateSymbol`, leading to `error: Undefined temporary symbol $$yy` for code like: ``` xx: la $2,$yy $yy: nop ``` Remove the duplicate prefix. In addition, recognize `.L`-prefixed symbols as local for O32. See: #65020. --------- Co-authored-by: Fangrui Song --- .../Target/Mips/AsmParser/MipsAsmParser.cpp | 7 +++++- llvm/test/CodeGen/Mips/hf1_body.ll | 4 ++-- llvm/test/MC/Mips/macro-la-pic.s | 22 +++++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 3c673ae938fdec..36aab383da68d2 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -2920,6 +2920,11 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, (Res.getSymA()->getSymbol().isELF() && cast(Res.getSymA()->getSymbol()).getBinding() == ELF::STB_LOCAL); + // For O32, "$"-prefixed symbols are recognized as temporary while + // .L-prefixed symbols are not (PrivateGlobalPrefix is "$"). Recognize ".L" + // manually. + if (ABI.IsO32() && Res.getSymA()->getSymbol().getName().starts_with(".L")) + IsLocalSym = true; bool UseXGOT = STI->hasFeature(Mips::FeatureXGOT) && !IsLocalSym; // The case where the result register is $25 is somewhat special. If the @@ -6359,7 +6364,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { return true; SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); // Otherwise create a symbol reference. const MCExpr *SymRef = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); diff --git a/llvm/test/CodeGen/Mips/hf1_body.ll b/llvm/test/CodeGen/Mips/hf1_body.ll index 184ea31bddc9d2..c3dea67896210a 100644 --- a/llvm/test/CodeGen/Mips/hf1_body.ll +++ b/llvm/test/CodeGen/Mips/hf1_body.ll @@ -23,8 +23,8 @@ entry: ; ALL: .set reorder ; ALL: .reloc 0, R_MIPS_NONE, v_sf ; GAS: la $25, $__fn_local_v_sf -; IAS: lw $25, %got($$__fn_local_v_sf)($gp) -; IAS: addiu $25, $25, %lo($$__fn_local_v_sf) +; IAS: lw $25, %got($__fn_local_v_sf)($gp) +; IAS: addiu $25, $25, %lo($__fn_local_v_sf) ; ALL: mfc1 $4, $f12 ; ALL: jr $25 ; ALL: .end __fn_stub_v_sf diff --git a/llvm/test/MC/Mips/macro-la-pic.s b/llvm/test/MC/Mips/macro-la-pic.s index 2303f34c35bcfe..1875952d80c4e7 100644 --- a/llvm/test/MC/Mips/macro-la-pic.s +++ b/llvm/test/MC/Mips/macro-la-pic.s @@ -255,3 +255,25 @@ la $25, 2f # XN32: lw $25, %got_disp(.Ltmp1)($gp) # encoding: [0x8f,0x99,A,A] # XN32: # fixup A - offset: 0, value: %got_disp(.Ltmp1), kind: fixup_Mips_GOT_DISP 2: + +la $2,.Lstr +# O32: lw $2, %got(.Lstr)($gp) # encoding: [0x8f,0x82,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %got(.Lstr), kind: fixup_Mips_GOT +# O32-NEXT: addiu $2, $2, %lo(.Lstr) # encoding: [0x24,0x42,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %lo(.Lstr), kind: fixup_Mips_LO16 + +# N32: lw $2, %got_disp(.Lstr)($gp) # encoding: [0x8f,0x82,A,A] +# N32-NEXT: # fixup A - offset: 0, value: %got_disp(.Lstr), kind: fixup_Mips_GOT_DISP + +la $2,$str2 +# O32: lw $2, %got($str2)($gp) # encoding: [0x8f,0x82,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %got($str2), kind: fixup_Mips_GOT +# O32-NEXT: addiu $2, $2, %lo($str2) # encoding: [0x24,0x42,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %lo($str2), kind: fixup_Mips_LO16 + +# N32: lw $2, %got_disp($str2)($gp) # encoding: [0x8f,0x82,A,A] +# N32-NEXT: # fixup A - offset: 0, value: %got_disp($str2), kind: fixup_Mips_GOT_DISP + +.rodata +.Lstr: .4byte 0 +$str2: .4byte 0 From f3b92fae138f47fb78a55254d73913f1e7935852 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 14 Feb 2024 21:51:16 +0100 Subject: [PATCH 184/240] [bazel] Port for 1301bc46aea14297478bd13bcacff429e2a18c04 --- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 64e788eae34ebd..198c110b7e304e 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -333,6 +333,7 @@ libc_support_library( ":__support_macros_attributes", ":__support_macros_config", ":__support_macros_properties_float", + ":llvm_libc_macros_stdfix_macros", ], ) @@ -979,6 +980,12 @@ libc_support_library( hdrs = ["include/llvm-libc-macros/float-macros.h"], ) +libc_support_library( + name = "llvm_libc_macros_stdfix_macros", + hdrs = ["include/llvm-libc-macros/stdfix-macros.h"], + deps = [":llvm_libc_macros_float_macros"], +) + libc_support_library( name = "llvm_libc_types_float128", hdrs = ["include/llvm-libc-types/float128.h"], From aab48c99c2234e348aa37657accfb6110c84c9b7 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Wed, 14 Feb 2024 13:06:20 -0800 Subject: [PATCH 185/240] [lldb] Detect a Darwin kernel issue and work around it (#81573) On arm64 machines, when there is a hardware breakpoint or watchpoint set, and lldb has instruction-stepped a thread, and then done a Process::Resume, we will sometimes receive an extra "instruction step completed" mach exception and the pc has not advanced. From a user's perspective, they hit Continue and lldb stops again at the same spot. From the testsuite's perspective, this has been a constant source of testsuite failures for any test using hardware watchpoints and breakpoints, the arm64 CI bots seem especially good at hitting this issue. Jim and I have been slowly looking at this for a few months now, and finally I decided to try to detect this situation in lldb and silently resume the process again when it happens. We were already detecting this "got an insn-step finished mach exception but this thread was not instruction stepping" combination in StopInfoMachException where we take the mach exception and create a StopInfo object for it. We had a lot of logging we used to understand the failure as it was hit on the bots in assert builds. This patch adds a new case to `Thread::GetPrivateStopInfo()` to call the StopInfo's (new) `IsContinueInterrupted()` method. In StopInfoMachException, where we previously had logging for assert builds, I now note it in an ivar, and when `Thread::GetPrivateStopInfo()` asks if this has happened, we check all of the combination of events that this comes up: We have a hardware breakpoint or watchpoint, we were not instruction stepping this thread but got an insn-step mach exception, the pc is the same as the previous stop's pc. And in that case, `Thread::GetPrivateStopInfo()` returns no StopInfo -- indicating that this thread would like to resume execution. The `Thread` object has two StackFrameLists, `m_curr_frames_sp` and `m_prev_frames_sp`. When a thread resumes execution, we move `m_curr_frames_sp` in to `m_prev_frames_sp` and when it stops executing, w euse `m_prev_frames_sp` to seed the new `m_curr_frames_sp` if most of the stack is the same as before. In this same location, I now save the Thread's RegisterContext::GetPC into an ivar, `m_prev_framezero_pc`. StopInfoMachException needs this information to check all of the conditions I outlined above for `IsContinueInterrupted`. This has passed exhaustive testing and we do not have any testsuite failures for hardware watchpoints and breakpoints due to this kernel bug with the patch in place. In focusing on these tests for thousands of runs, I have found two other uncommon race conditions for the TestConcurrent* tests on arm64. TestConcurrentManyBreakpoints.py (which uses no hardware watchpoint/breakpoints) will sometimes only have 99 breakpoints when it expects 100, and any of the concurrent tests using the shared harness (I've seen it in TestConcurrentWatchBreakDelay.py, TestConcurrentTwoBreakpointsOneSignal.py, TestConcurrentSignalDelayWatch.py) can fail when the test harness checks that there is only one thread still running at the end, and it finds two -- one of them under pthread_exit / pthread_terminate. Both of these failures happen on github main without my changes, and with my changes - they are unrelated race conditions in these tests, and I'm sure I'll be looking into them at some point if they hit the CI bots with frequency. On my computer, these are in the 0.3-0.5% of the time class. But the CI bots do have different timing. --- lldb/include/lldb/Target/StopInfo.h | 5 ++ lldb/include/lldb/Target/Thread.h | 14 ++++ .../Process/Utility/StopInfoMachException.cpp | 83 +++++++++++++------ .../Process/Utility/StopInfoMachException.h | 11 ++- lldb/source/Target/Thread.cpp | 19 ++++- 5 files changed, 101 insertions(+), 31 deletions(-) diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h index 305fc5d0e0fbe5..d1848fcbbbdb19 100644 --- a/lldb/include/lldb/Target/StopInfo.h +++ b/lldb/include/lldb/Target/StopInfo.h @@ -79,6 +79,11 @@ class StopInfo : public std::enable_shared_from_this { virtual bool IsValidForOperatingSystemThread(Thread &thread) { return true; } + /// A Continue operation can result in a false stop event + /// before any execution has happened. We need to detect this + /// and silently continue again one more time. + virtual bool WasContinueInterrupted(Thread &thread) { return false; } + // Sometimes the thread plan logic will know that it wants a given stop to // stop or not, regardless of what the ordinary logic for that StopInfo would // dictate. The main example of this is the ThreadPlanCallFunction, which diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h index 30863ad4c90299..96ca95ad233ff7 100644 --- a/lldb/include/lldb/Target/Thread.h +++ b/lldb/include/lldb/Target/Thread.h @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -1226,6 +1227,16 @@ class Thread : public std::enable_shared_from_this, lldb::ValueObjectSP GetSiginfoValue(); + /// Request the pc value the thread had when previously stopped. + /// + /// When the thread performs execution, it copies the current RegisterContext + /// GetPC() value. This method returns that value, if it is available. + /// + /// \return + /// The PC value before execution was resumed. May not be available; + /// an empty std::optional is returned in that case. + std::optional GetPreviousFrameZeroPC(); + protected: friend class ThreadPlan; friend class ThreadList; @@ -1306,6 +1317,9 @@ class Thread : public std::enable_shared_from_this, ///populated after a thread stops. lldb::StackFrameListSP m_prev_frames_sp; ///< The previous stack frames from ///the last time this thread stopped. + std::optional + m_prev_framezero_pc; ///< Frame 0's PC the last + /// time this thread was stopped. int m_resume_signal; ///< The signal that should be used when continuing this ///thread. lldb::StateType m_resume_state; ///< This state is used to force a thread to diff --git a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp index d756354f9bd278..75504323b4fdf9 100644 --- a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp +++ b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp @@ -26,6 +26,8 @@ #include "lldb/Target/Thread.h" #include "lldb/Target/ThreadPlan.h" #include "lldb/Target/UnixSignals.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" #include "lldb/Utility/StreamString.h" #include @@ -596,6 +598,7 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException( if (exc_type == 0) return StopInfoSP(); + bool not_stepping_but_got_singlestep_exception = false; uint32_t pc_decrement = 0; ExecutionContext exe_ctx(thread.shared_from_this()); Target *target = exe_ctx.GetTargetPtr(); @@ -720,30 +723,8 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException( // is set is_actual_breakpoint = true; is_trace_if_actual_breakpoint_missing = true; -#ifndef NDEBUG - if (thread.GetTemporaryResumeState() != eStateStepping) { - StreamString s; - s.Printf("CreateStopReasonWithMachException got EXC_BREAKPOINT [1,0] " - "indicating trace event, but thread is not tracing, it has " - "ResumeState %d", - thread.GetTemporaryResumeState()); - if (RegisterContextSP regctx = thread.GetRegisterContext()) { - if (const RegisterInfo *ri = regctx->GetRegisterInfoByName("esr")) { - uint32_t esr = - (uint32_t)regctx->ReadRegisterAsUnsigned(ri, UINT32_MAX); - if (esr != UINT32_MAX) { - s.Printf(" esr value: 0x%" PRIx32, esr); - } - } - } - thread.GetProcess()->DumpPluginHistory(s); - llvm::report_fatal_error(s.GetData()); - lldbassert( - false && - "CreateStopReasonWithMachException got EXC_BREAKPOINT [1,0] " - "indicating trace event, but thread was not doing a step."); - } -#endif + if (thread.GetTemporaryResumeState() != eStateStepping) + not_stepping_but_got_singlestep_exception = true; } if (exc_code == 0x102) // EXC_ARM_DA_DEBUG { @@ -825,6 +806,56 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException( break; } - return StopInfoSP(new StopInfoMachException(thread, exc_type, exc_data_count, - exc_code, exc_sub_code)); + return std::make_shared( + thread, exc_type, exc_data_count, exc_code, exc_sub_code, + not_stepping_but_got_singlestep_exception); +} + +// Detect an unusual situation on Darwin where: +// +// 0. We did an instruction-step before this. +// 1. We have a hardware breakpoint or watchpoint set. +// 2. We resumed the process, but not with an instruction-step. +// 3. The thread gets an "instruction-step completed" mach exception. +// 4. The pc has not advanced - it is the same as before. +// +// This method returns true for that combination of events. +bool StopInfoMachException::WasContinueInterrupted(Thread &thread) { + Log *log = GetLog(LLDBLog::Step); + + // We got an instruction-step completed mach exception but we were not + // doing an instruction step on this thread. + if (!m_not_stepping_but_got_singlestep_exception) + return false; + + RegisterContextSP reg_ctx_sp(thread.GetRegisterContext()); + std::optional prev_pc = thread.GetPreviousFrameZeroPC(); + if (!reg_ctx_sp || !prev_pc) + return false; + + // The previous pc value and current pc value are the same. + if (*prev_pc != reg_ctx_sp->GetPC()) + return false; + + // We have a watchpoint -- this is the kernel bug. + ProcessSP process_sp = thread.GetProcess(); + if (process_sp->GetWatchpointResourceList().GetSize()) { + LLDB_LOGF(log, + "Thread stopped with insn-step completed mach exception but " + "thread was not stepping; there is a hardware watchpoint set."); + return true; + } + + // We have a hardware breakpoint -- this is the kernel bug. + auto &bp_site_list = process_sp->GetBreakpointSiteList(); + for (auto &site : bp_site_list.Sites()) { + if (site->IsHardware() && site->IsEnabled()) { + LLDB_LOGF(log, + "Thread stopped with insn-step completed mach exception but " + "thread was not stepping; there is a hardware breakpoint set."); + return true; + } + } + + return false; } diff --git a/lldb/source/Plugins/Process/Utility/StopInfoMachException.h b/lldb/source/Plugins/Process/Utility/StopInfoMachException.h index 541ef5e69565de..c612ac400b4c4c 100644 --- a/lldb/source/Plugins/Process/Utility/StopInfoMachException.h +++ b/lldb/source/Plugins/Process/Utility/StopInfoMachException.h @@ -31,9 +31,12 @@ class StopInfoMachException : public StopInfo { // Constructors and Destructors StopInfoMachException(Thread &thread, uint32_t exc_type, uint32_t exc_data_count, uint64_t exc_code, - uint64_t exc_subcode) + uint64_t exc_subcode, + bool not_stepping_but_got_singlestep_exception) : StopInfo(thread, exc_type), m_exc_data_count(exc_data_count), - m_exc_code(exc_code), m_exc_subcode(exc_subcode) {} + m_exc_code(exc_code), m_exc_subcode(exc_subcode), + m_not_stepping_but_got_singlestep_exception( + not_stepping_but_got_singlestep_exception) {} ~StopInfoMachException() override = default; @@ -58,10 +61,14 @@ class StopInfoMachException : public StopInfo { uint64_t exc_code, uint64_t exc_sub_code, uint64_t exc_sub_sub_code, bool pc_already_adjusted = true, bool adjust_pc_if_needed = false); + bool WasContinueInterrupted(Thread &thread) override; + protected: uint32_t m_exc_data_count; uint64_t m_exc_code; uint64_t m_exc_subcode; + + bool m_not_stepping_but_got_singlestep_exception; }; } // namespace lldb_private diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 8ae2179c1281d0..4dfad23b56e2cb 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -221,7 +221,7 @@ Thread::Thread(Process &process, lldb::tid_t tid, bool use_invalid_index_id) : process.GetNextThreadIndexID(tid)), m_reg_context_sp(), m_state(eStateUnloaded), m_state_mutex(), m_frame_mutex(), m_curr_frames_sp(), m_prev_frames_sp(), - m_resume_signal(LLDB_INVALID_SIGNAL_NUMBER), + m_prev_framezero_pc(), m_resume_signal(LLDB_INVALID_SIGNAL_NUMBER), m_resume_state(eStateRunning), m_temporary_resume_state(eStateRunning), m_unwinder_up(), m_destroy_called(false), m_override_should_notify(eLazyBoolCalculate), @@ -250,6 +250,7 @@ void Thread::DestroyThread() { std::lock_guard guard(m_frame_mutex); m_curr_frames_sp.reset(); m_prev_frames_sp.reset(); + m_prev_framezero_pc.reset(); } void Thread::BroadcastSelectedFrameChange(StackID &new_frame_id) { @@ -422,6 +423,12 @@ lldb::StopInfoSP Thread::GetPrivateStopInfo(bool calculate) { } } } + + // If we were resuming the process and it was interrupted, + // return no stop reason. This thread would like to resume. + if (m_stop_info_sp && m_stop_info_sp->WasContinueInterrupted(*this)) + return {}; + return m_stop_info_sp; } @@ -1408,16 +1415,22 @@ StackFrameListSP Thread::GetStackFrameList() { return m_curr_frames_sp; } +std::optional Thread::GetPreviousFrameZeroPC() { + return m_prev_framezero_pc; +} + void Thread::ClearStackFrames() { std::lock_guard guard(m_frame_mutex); GetUnwinder().Clear(); + m_prev_framezero_pc.reset(); + if (RegisterContextSP reg_ctx_sp = GetRegisterContext()) + m_prev_framezero_pc = reg_ctx_sp->GetPC(); // Only store away the old "reference" StackFrameList if we got all its // frames: // FIXME: At some point we can try to splice in the frames we have fetched - // into - // the new frame as we make it, but let's not try that now. + // into the new frame as we make it, but let's not try that now. if (m_curr_frames_sp && m_curr_frames_sp->GetAllFramesFetched()) m_prev_frames_sp.swap(m_curr_frames_sp); m_curr_frames_sp.reset(); From 1da4494184566d68f32206e3ac5a8b90bc05889d Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Wed, 14 Feb 2024 13:13:23 -0800 Subject: [PATCH 186/240] [lldb] Add comment on cross printing of summary/value (#81681) Adds a comment to indicate intention of a piece of value printing code. I was initially surprised to see this code (distilled for emphasis): ```cpp if (str.empty()) { if (style == eValueObjectRepresentationStyleValue) str = GetSummaryAsCString(); else if (style == eValueObjectRepresentationStyleSummary) str = GetValueAsCString(); } ``` My first thought was "is this a bug?", but I realized it was likely intentional. This change adds a comment to indicate yes, this is intentional. --- lldb/source/Core/ValueObject.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp index e80042826f7d64..840b100c70ddaa 100644 --- a/lldb/source/Core/ValueObject.cpp +++ b/lldb/source/Core/ValueObject.cpp @@ -1312,6 +1312,8 @@ bool ValueObject::DumpPrintableRepresentation( break; } + // If the requested display style produced no output, try falling back to + // alternative presentations. if (str.empty()) { if (val_obj_display == eValueObjectRepresentationStyleValue) str = GetSummaryAsCString(); From 8d326542926d4fba89cfb0ec01a0c1a1bd0789d6 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 14 Feb 2024 13:21:56 -0800 Subject: [PATCH 187/240] [RISCV] Add coverage for an upcoming set of vector narrowing changes --- .../CodeGen/RISCV/rvv/fixed-vectors-abs.ll | 51 ++++++++++++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll | 54 +++++++++++++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll | 54 +++++++++++++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll | 54 +++++++++++++++++++ 4 files changed, 213 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll index 37d05f08d0ff3d..d2e0113e69b900 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll @@ -148,3 +148,54 @@ define void @abs_v4i64(ptr %x) { ret void } declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) + +define void @abs_v4i64_of_sext_v4i8(ptr %x) { +; CHECK-LABEL: abs_v4i64_of_sext_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsext.vf8 v10, v8 +; CHECK-NEXT: vrsub.vi v8, v10, 0 +; CHECK-NEXT: vmax.vv v8, v10, v8 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %x + %a.ext = sext <4 x i8> %a to <4 x i64> + %b = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a.ext, i1 false) + store <4 x i64> %b, ptr %x + ret void +} + +define void @abs_v4i64_of_sext_v4i16(ptr %x) { +; CHECK-LABEL: abs_v4i64_of_sext_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vrsub.vi v8, v10, 0 +; CHECK-NEXT: vmax.vv v8, v10, v8 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %x + %a.ext = sext <4 x i16> %a to <4 x i64> + %b = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a.ext, i1 false) + store <4 x i64> %b, ptr %x + ret void +} + +define void @abs_v4i64_of_sext_v4i32(ptr %x) { +; CHECK-LABEL: abs_v4i64_of_sext_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vrsub.vi v8, v10, 0 +; CHECK-NEXT: vmax.vv v8, v10, v8 +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, ptr %x + %a.ext = sext <4 x i32> %a to <4 x i64> + %b = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a.ext, i1 false) + store <4 x i64> %b, ptr %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll index c8de041a26f430..7bffbaa1c21ea6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll @@ -880,3 +880,57 @@ define <2 x i64> @vwadd_vx_v2i64_i64(ptr %x, ptr %y) nounwind { %g = add <2 x i64> %e, %f ret <2 x i64> %g } + +define <2 x i32> @vwadd_v2i32_of_v2i8(ptr %x, ptr %y) { +; CHECK-LABEL: vwadd_v2i32_of_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i8>, ptr %x + %b = load <2 x i8>, ptr %y + %c = sext <2 x i8> %a to <2 x i32> + %d = sext <2 x i8> %b to <2 x i32> + %e = add <2 x i32> %c, %d + ret <2 x i32> %e +} + +define <2 x i64> @vwadd_v2i64_of_v2i8(ptr %x, ptr %y) { +; CHECK-LABEL: vwadd_v2i64_of_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i8>, ptr %x + %b = load <2 x i8>, ptr %y + %c = sext <2 x i8> %a to <2 x i64> + %d = sext <2 x i8> %b to <2 x i64> + %e = add <2 x i64> %c, %d + ret <2 x i64> %e +} + +define <2 x i64> @vwadd_v2i64_of_v2i16(ptr %x, ptr %y) { +; CHECK-LABEL: vwadd_v2i64_of_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %x + %b = load <2 x i16>, ptr %y + %c = sext <2 x i16> %a to <2 x i64> + %d = sext <2 x i16> %b to <2 x i64> + %e = add <2 x i64> %c, %d + ret <2 x i64> %e +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll index e51ca9f153dcb1..8779c6dd9fc38a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -908,3 +908,57 @@ define <4 x i64> @crash(<4 x i16> %x, <4 x i16> %y) { %c = add <4 x i64> %a, %b ret <4 x i64> %c } + +define <2 x i32> @vwaddu_v2i32_of_v2i8(ptr %x, ptr %y) { +; CHECK-LABEL: vwaddu_v2i32_of_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i8>, ptr %x + %b = load <2 x i8>, ptr %y + %c = zext <2 x i8> %a to <2 x i32> + %d = zext <2 x i8> %b to <2 x i32> + %e = add <2 x i32> %c, %d + ret <2 x i32> %e +} + +define <2 x i64> @vwaddu_v2i64_of_v2i8(ptr %x, ptr %y) { +; CHECK-LABEL: vwaddu_v2i64_of_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i8>, ptr %x + %b = load <2 x i8>, ptr %y + %c = zext <2 x i8> %a to <2 x i64> + %d = zext <2 x i8> %b to <2 x i64> + %e = add <2 x i64> %c, %d + ret <2 x i64> %e +} + +define <2 x i64> @vwaddu_v2i64_of_v2i16(ptr %x, ptr %y) { +; CHECK-LABEL: vwaddu_v2i64_of_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %x + %b = load <2 x i16>, ptr %y + %c = zext <2 x i16> %a to <2 x i64> + %d = zext <2 x i16> %b to <2 x i64> + %e = add <2 x i64> %c, %d + ret <2 x i64> %e +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll index cf00fe14858d91..d2d54796069bb1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -895,3 +895,57 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { %g = sub <2 x i64> %e, %f ret <2 x i64> %g } + +define <2 x i32> @vwsubu_v2i32_of_v2i8(ptr %x, ptr %y) { +; CHECK-LABEL: vwsubu_v2i32_of_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwsubu.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i8>, ptr %x + %b = load <2 x i8>, ptr %y + %c = zext <2 x i8> %a to <2 x i32> + %d = zext <2 x i8> %b to <2 x i32> + %e = sub <2 x i32> %c, %d + ret <2 x i32> %e +} + +define <2 x i64> @vwsubu_v2i64_of_v2i8(ptr %x, ptr %y) { +; CHECK-LABEL: vwsubu_v2i64_of_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwsubu.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i8>, ptr %x + %b = load <2 x i8>, ptr %y + %c = zext <2 x i8> %a to <2 x i64> + %d = zext <2 x i8> %b to <2 x i64> + %e = sub <2 x i64> %c, %d + ret <2 x i64> %e +} + +define <2 x i64> @vwsubu_v2i64_of_v2i16(ptr %x, ptr %y) { +; CHECK-LABEL: vwsubu_v2i64_of_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwsubu.vv v8, v11, v10 +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %x + %b = load <2 x i16>, ptr %y + %c = zext <2 x i16> %a to <2 x i64> + %d = zext <2 x i16> %b to <2 x i64> + %e = sub <2 x i64> %c, %d + ret <2 x i64> %e +} From 0fc578664809d9f808d24a91e50a68d6bfa22118 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 14 Feb 2024 13:53:19 -0800 Subject: [PATCH 188/240] [lldb][test] Remove expectedFailureIfFn (#81703) Switching to modern `unittest` in 5b386158aacac4b41126983a5379d36ed413d0ea needs xfail annotations to be known prior to test running. In contrast, skipping can happen at any time, even during test execution. Thus, `expectedFailureIfFn` inherently doesn't work. Either we eagerly evaluate the function and use `expectedFailureIf` instead, or we use a skip annotation to lazily evaluate the function and potentially skip the test right before it starts. - For `expectedFailureAndroid`, the intent seems to be that certain tests _should_ work on android, but don't. Thus, xfail is appropriate, to ensure the test is re-enabled once those bugs are ever fixed. - For the other uses in individual tests, those generally seem to be cases where the test environment doesn't support the setup required by the test, and so it isn't meaningful to run the test at all. For those, a drop-in replacement to `skipTestIfFn` works. --- .../Python/lldbsuite/test/decorators.py | 39 +++---------------- .../commands/platform/sdk/TestPlatformSDK.py | 4 +- .../TestRequireHWBreakpoints.py | 8 ++-- .../launch_stop_at_entry/TestStopAtEntry.py | 8 ++-- 4 files changed, 16 insertions(+), 43 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index a5e1fa51cf6e63..86594c2b409e79 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -128,33 +128,6 @@ def expectedFailure_impl(func): return expectedFailure_impl -def expectedFailureIfFn(expected_fn, bugnumber=None): - def expectedFailure_impl(func): - if isinstance(func, type) and issubclass(func, unittest.TestCase): - raise Exception("Decorator can only be used to decorate a test method") - - @wraps(func) - def wrapper(*args, **kwargs): - xfail_reason = expected_fn(*args, **kwargs) - if xfail_reason is not None: - xfail_func = unittest.expectedFailure(func) - xfail_func(*args, **kwargs) - else: - func(*args, **kwargs) - - return wrapper - - # Some decorators can be called both with no arguments (e.g. @expectedFailureWindows) - # or with arguments (e.g. @expectedFailureWindows(compilers=['gcc'])). When called - # the first way, the first argument will be the actual function because decorators are - # weird like that. So this is basically a check that says "which syntax was the original - # function decorated with?" - if callable(bugnumber): - return expectedFailure_impl(bugnumber) - else: - return expectedFailure_impl - - def skipTestIfFn(expected_fn, bugnumber=None): def skipTestIfFn_impl(func): if isinstance(func, type) and issubclass(func, unittest.TestCase): @@ -417,8 +390,8 @@ def skipIf( ) -def _skip_for_android(reason, api_levels, archs): - def impl(obj): +def _skip_fn_for_android(reason, api_levels, archs): + def impl(): result = lldbplatformutil.match_android_device( lldbplatformutil.getArchitecture(), valid_archs=archs, @@ -549,8 +522,8 @@ def expectedFailureAndroid(bugnumber=None, api_levels=None, archs=None): arch - A sequence of architecture names specifying the architectures for which a test is expected to fail. None means all architectures. """ - return expectedFailureIfFn( - _skip_for_android("xfailing on android", api_levels, archs), bugnumber + return expectedFailureIf( + _skip_fn_for_android("xfailing on android", api_levels, archs)(), bugnumber ) @@ -612,7 +585,7 @@ def expectedFlakeyNetBSD(bugnumber=None, compilers=None): def expectedFlakeyAndroid(bugnumber=None, api_levels=None, archs=None): return expectedFlakey( - _skip_for_android("flakey on android", api_levels, archs), bugnumber + _skip_fn_for_android("flakey on android", api_levels, archs), bugnumber ) @@ -846,7 +819,7 @@ def skipIfTargetAndroid(bugnumber=None, api_levels=None, archs=None): for which a test is skipped. None means all architectures. """ return skipTestIfFn( - _skip_for_android("skipping for android", api_levels, archs), bugnumber + _skip_fn_for_android("skipping for android", api_levels, archs), bugnumber ) diff --git a/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py b/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py index bf79a5bd5537d9..6af5767e26d3e8 100644 --- a/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py +++ b/lldb/test/API/commands/platform/sdk/TestPlatformSDK.py @@ -39,8 +39,8 @@ def port_not_available(self): @no_debug_info_test @skipUnlessDarwin - @expectedFailureIfFn(no_debugserver) - @expectedFailureIfFn(port_not_available) + @skipTestIfFn(no_debugserver) + @skipTestIfFn(port_not_available) @skipIfRemote def test_macos_sdk(self): self.build() diff --git a/lldb/test/API/functionalities/breakpoint/hardware_breakpoints/require_hw_breakpoints/TestRequireHWBreakpoints.py b/lldb/test/API/functionalities/breakpoint/hardware_breakpoints/require_hw_breakpoints/TestRequireHWBreakpoints.py index ae4f7ea071ed08..5325f0f00affb8 100644 --- a/lldb/test/API/functionalities/breakpoint/hardware_breakpoints/require_hw_breakpoints/TestRequireHWBreakpoints.py +++ b/lldb/test/API/functionalities/breakpoint/hardware_breakpoints/require_hw_breakpoints/TestRequireHWBreakpoints.py @@ -26,7 +26,7 @@ def test_breakpoint(self): breakpoint = target.BreakpointCreateByLocation("main.c", 1) self.assertTrue(breakpoint.IsHardware()) - @expectedFailureIfFn(HardwareBreakpointTestBase.supports_hw_breakpoints) + @skipTestIfFn(HardwareBreakpointTestBase.supports_hw_breakpoints) def test_step_range(self): """Test stepping when hardware breakpoints are required.""" self.build() @@ -49,7 +49,7 @@ def test_step_range(self): "Could not create hardware breakpoint for thread plan" in error.GetCString() ) - @expectedFailureIfFn(HardwareBreakpointTestBase.supports_hw_breakpoints) + @skipTestIfFn(HardwareBreakpointTestBase.supports_hw_breakpoints) def test_step_out(self): """Test stepping out when hardware breakpoints are required.""" self.build() @@ -71,7 +71,7 @@ def test_step_out(self): "Could not create hardware breakpoint for thread plan" in error.GetCString() ) - @expectedFailureIfFn(HardwareBreakpointTestBase.supports_hw_breakpoints) + @skipTestIfFn(HardwareBreakpointTestBase.supports_hw_breakpoints) def test_step_over(self): """Test stepping over when hardware breakpoints are required.""" self.build() @@ -91,7 +91,7 @@ def test_step_over(self): # Was reported to sometimes pass on certain hardware. @skipIf(oslist=["linux"], archs=["arm"]) - @expectedFailureIfFn(HardwareBreakpointTestBase.supports_hw_breakpoints) + @skipTestIfFn(HardwareBreakpointTestBase.supports_hw_breakpoints) def test_step_until(self): """Test stepping until when hardware breakpoints are required.""" self.build() diff --git a/lldb/test/API/functionalities/launch_stop_at_entry/TestStopAtEntry.py b/lldb/test/API/functionalities/launch_stop_at_entry/TestStopAtEntry.py index 496f9c20c2ce7b..b4e2b39e0d5dbd 100644 --- a/lldb/test/API/functionalities/launch_stop_at_entry/TestStopAtEntry.py +++ b/lldb/test/API/functionalities/launch_stop_at_entry/TestStopAtEntry.py @@ -49,15 +49,15 @@ def test_stop_default_platform_async(self): @skipUnlessDarwin @skipIfRemote - @expectedFailureIfFn(no_debugserver) - @expectedFailureIfFn(port_not_available) + @skipTestIfFn(no_debugserver) + @skipTestIfFn(port_not_available) def test_stop_remote_platform_sync(self): self.do_test_stop_at_entry(True, True) @skipUnlessDarwin @skipIfRemote - @expectedFailureIfFn(no_debugserver) - @expectedFailureIfFn(port_not_available) + @skipTestIfFn(no_debugserver) + @skipTestIfFn(port_not_available) def test_stop_remote_platform_async(self): self.do_test_stop_at_entry(False, True) From ad49657a424db5e5979236ef5a474e93d827ab2c Mon Sep 17 00:00:00 2001 From: PiJoules <6019989+PiJoules@users.noreply.github.com> Date: Wed, 14 Feb 2024 14:11:56 -0800 Subject: [PATCH 189/240] [clang] Add fixed point precision macros (#81207) This defines the builtin macros specified in `7.18a.3 Precision macros` of ISO/IEC TR 18037:2008. These are the `__*__` versions of them and the formal definitions in stdfix.h can use them. --- clang/docs/ReleaseNotes.rst | 6 ++ clang/lib/Frontend/InitPreprocessor.cpp | 95 ++++++++++++++++++++++++ clang/test/Preprocessor/fixed-point.c | 67 +++++++++++++++++ clang/test/Preprocessor/no-fixed-point.c | 7 ++ llvm/include/llvm/ADT/APFixedPoint.h | 1 + llvm/lib/Support/APFixedPoint.cpp | 6 ++ 6 files changed, 182 insertions(+) create mode 100644 clang/test/Preprocessor/fixed-point.c create mode 100644 clang/test/Preprocessor/no-fixed-point.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6cf48d63dd512e..a745f20199ceba 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -305,6 +305,12 @@ DWARF Support in Clang Floating Point Support in Clang ------------------------------- +Fixed Point Support in Clang +---------------------------- + +- Support fixed point precision macros according to ``7.18a.3`` of + `ISO/IEC TR 18037:2008 `_. + AST Matchers ------------ diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 877e205e2e9bfa..1b250cda42a4dd 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -768,6 +768,60 @@ void InitializeOpenCLFeatureTestMacros(const TargetInfo &TI, Builder.defineMacro("__opencl_c_int64"); } +llvm::SmallString<32> ConstructFixedPointLiteral(llvm::APFixedPoint Val, + llvm::StringRef Suffix) { + if (Val.isSigned() && Val == llvm::APFixedPoint::getMin(Val.getSemantics())) { + // When representing the min value of a signed fixed point type in source + // code, we cannot simply write `-`. For example, the min + // value of a `short _Fract` cannot be written as `-1.0hr`. This is because + // the parser will read this (and really any negative numerical literal) as + // a UnaryOperator that owns a FixedPointLiteral with a positive value + // rather than just a FixedPointLiteral with a negative value. Compiling + // `-1.0hr` results in an overflow to the maximal value of that fixed point + // type. The correct way to represent a signed min value is to instead split + // it into two halves, like `(-0.5hr-0.5hr)` which is what the standard + // defines SFRACT_MIN as. + llvm::SmallString<32> Literal; + Literal.push_back('('); + llvm::SmallString<32> HalfStr = + ConstructFixedPointLiteral(Val.shr(1), Suffix); + Literal += HalfStr; + Literal += HalfStr; + Literal.push_back(')'); + return Literal; + } + + llvm::SmallString<32> Str(Val.toString()); + Str += Suffix; + return Str; +} + +void DefineFixedPointMacros(const TargetInfo &TI, MacroBuilder &Builder, + llvm::StringRef TypeName, llvm::StringRef Suffix, + unsigned Width, unsigned Scale, bool Signed) { + // Saturation doesn't affect the size or scale of a fixed point type, so we + // don't need it here. + llvm::FixedPointSemantics FXSema( + Width, Scale, Signed, /*IsSaturated=*/false, + !Signed && TI.doUnsignedFixedPointTypesHavePadding()); + llvm::SmallString<32> MacroPrefix("__"); + MacroPrefix += TypeName; + Builder.defineMacro(MacroPrefix + "_EPSILON__", + ConstructFixedPointLiteral( + llvm::APFixedPoint::getEpsilon(FXSema), Suffix)); + Builder.defineMacro(MacroPrefix + "_FBIT__", Twine(Scale)); + Builder.defineMacro( + MacroPrefix + "_MAX__", + ConstructFixedPointLiteral(llvm::APFixedPoint::getMax(FXSema), Suffix)); + + // ISO/IEC TR 18037:2008 doesn't specify MIN macros for unsigned types since + // they're all just zero. + if (Signed) + Builder.defineMacro( + MacroPrefix + "_MIN__", + ConstructFixedPointLiteral(llvm::APFixedPoint::getMin(FXSema), Suffix)); +} + static void InitializePredefinedMacros(const TargetInfo &TI, const LangOptions &LangOpts, const FrontendOptions &FEOpts, @@ -1097,6 +1151,47 @@ static void InitializePredefinedMacros(const TargetInfo &TI, TI.getTypeWidth(TI.getIntMaxType()) && "uintmax_t and intmax_t have different widths?"); + if (LangOpts.FixedPoint) { + // Each unsigned type has the same width as their signed type. + DefineFixedPointMacros(TI, Builder, "SFRACT", "HR", TI.getShortFractWidth(), + TI.getShortFractScale(), /*Signed=*/true); + DefineFixedPointMacros(TI, Builder, "USFRACT", "UHR", + TI.getShortFractWidth(), + TI.getUnsignedShortFractScale(), /*Signed=*/false); + DefineFixedPointMacros(TI, Builder, "FRACT", "R", TI.getFractWidth(), + TI.getFractScale(), /*Signed=*/true); + DefineFixedPointMacros(TI, Builder, "UFRACT", "UR", TI.getFractWidth(), + TI.getUnsignedFractScale(), /*Signed=*/false); + DefineFixedPointMacros(TI, Builder, "LFRACT", "LR", TI.getLongFractWidth(), + TI.getLongFractScale(), /*Signed=*/true); + DefineFixedPointMacros(TI, Builder, "ULFRACT", "ULR", + TI.getLongFractWidth(), + TI.getUnsignedLongFractScale(), /*Signed=*/false); + DefineFixedPointMacros(TI, Builder, "SACCUM", "HK", TI.getShortAccumWidth(), + TI.getShortAccumScale(), /*Signed=*/true); + DefineFixedPointMacros(TI, Builder, "USACCUM", "UHK", + TI.getShortAccumWidth(), + TI.getUnsignedShortAccumScale(), /*Signed=*/false); + DefineFixedPointMacros(TI, Builder, "ACCUM", "K", TI.getAccumWidth(), + TI.getAccumScale(), /*Signed=*/true); + DefineFixedPointMacros(TI, Builder, "UACCUM", "UK", TI.getAccumWidth(), + TI.getUnsignedAccumScale(), /*Signed=*/false); + DefineFixedPointMacros(TI, Builder, "LACCUM", "LK", TI.getLongAccumWidth(), + TI.getLongAccumScale(), /*Signed=*/true); + DefineFixedPointMacros(TI, Builder, "ULACCUM", "ULK", + TI.getLongAccumWidth(), + TI.getUnsignedLongAccumScale(), /*Signed=*/false); + + Builder.defineMacro("__SACCUM_IBIT__", Twine(TI.getShortAccumIBits())); + Builder.defineMacro("__USACCUM_IBIT__", + Twine(TI.getUnsignedShortAccumIBits())); + Builder.defineMacro("__ACCUM_IBIT__", Twine(TI.getAccumIBits())); + Builder.defineMacro("__UACCUM_IBIT__", Twine(TI.getUnsignedAccumIBits())); + Builder.defineMacro("__LACCUM_IBIT__", Twine(TI.getLongAccumIBits())); + Builder.defineMacro("__ULACCUM_IBIT__", + Twine(TI.getUnsignedLongAccumIBits())); + } + if (TI.hasFloat16Type()) DefineFloatMacros(Builder, "FLT16", &TI.getHalfFormat(), "F16"); DefineFloatMacros(Builder, "FLT", &TI.getFloatFormat(), "F"); diff --git a/clang/test/Preprocessor/fixed-point.c b/clang/test/Preprocessor/fixed-point.c new file mode 100644 index 00000000000000..3adf36d3338649 --- /dev/null +++ b/clang/test/Preprocessor/fixed-point.c @@ -0,0 +1,67 @@ +/// Assert the fixed point precision macros according to ISO/IEC TR 18037:2008 7.18a.3 are +/// defined when -ffixed-point is provided. + +// RUN: %clang_cc1 -triple=x86_64 -E -dM -ffixed-point -x c < /dev/null | FileCheck -match-full-lines %s +// RUN: %clang_cc1 -triple=x86_64 -E -dM -ffixed-point -x c++ < /dev/null | FileCheck -match-full-lines %s + +/// These are the implementation-defined values for x86_64. +// CHECK-DAG:#define __SFRACT_EPSILON__ 0.0078125HR +// CHECK-DAG:#define __SFRACT_FBIT__ 7 +// CHECK-DAG:#define __SFRACT_MAX__ 0.9921875HR +// CHECK-DAG:#define __SFRACT_MIN__ (-0.5HR-0.5HR) + +// CHECK-DAG:#define __USFRACT_EPSILON__ 0.00390625UHR +// CHECK-DAG:#define __USFRACT_FBIT__ 8 +// CHECK-DAG:#define __USFRACT_MAX__ 0.99609375UHR + +// CHECK-DAG:#define __FRACT_EPSILON__ 0.000030517578125R +// CHECK-DAG:#define __FRACT_FBIT__ 15 +// CHECK-DAG:#define __FRACT_MAX__ 0.999969482421875R +// CHECK-DAG:#define __FRACT_MIN__ (-0.5R-0.5R) + +// CHECK-DAG:#define __UFRACT_EPSILON__ 0.0000152587890625UR +// CHECK-DAG:#define __UFRACT_FBIT__ 16 +// CHECK-DAG:#define __UFRACT_MAX__ 0.9999847412109375UR + +// CHECK-DAG:#define __LFRACT_EPSILON__ 0.0000000004656612873077392578125LR +// CHECK-DAG:#define __LFRACT_FBIT__ 31 +// CHECK-DAG:#define __LFRACT_MAX__ 0.9999999995343387126922607421875LR +// CHECK-DAG:#define __LFRACT_MIN__ (-0.5LR-0.5LR) + +// CHECK-DAG:#define __ULFRACT_EPSILON__ 0.00000000023283064365386962890625ULR +// CHECK-DAG:#define __ULFRACT_FBIT__ 32 +// CHECK-DAG:#define __ULFRACT_MAX__ 0.99999999976716935634613037109375ULR + +// CHECK-DAG:#define __SACCUM_EPSILON__ 0.0078125HK +// CHECK-DAG:#define __SACCUM_FBIT__ 7 +// CHECK-DAG:#define __SACCUM_MAX__ 255.9921875HK +// CHECK-DAG:#define __SACCUM_MIN__ (-128.0HK-128.0HK) + +// CHECK-DAG:#define __USACCUM_EPSILON__ 0.00390625UHK +// CHECK-DAG:#define __USACCUM_FBIT__ 8 +// CHECK-DAG:#define __USACCUM_MAX__ 255.99609375UHK + +// CHECK-DAG:#define __ACCUM_EPSILON__ 0.000030517578125K +// CHECK-DAG:#define __ACCUM_FBIT__ 15 +// CHECK-DAG:#define __ACCUM_MAX__ 65535.999969482421875K +// CHECK-DAG:#define __ACCUM_MIN__ (-32768.0K-32768.0K) + +// CHECK-DAG:#define __UACCUM_EPSILON__ 0.0000152587890625UK +// CHECK-DAG:#define __UACCUM_FBIT__ 16 +// CHECK-DAG:#define __UACCUM_MAX__ 65535.9999847412109375UK + +// CHECK-DAG:#define __LACCUM_EPSILON__ 0.0000000004656612873077392578125LK +// CHECK-DAG:#define __LACCUM_FBIT__ 31 +// CHECK-DAG:#define __LACCUM_MAX__ 4294967295.9999999995343387126922607421875LK +// CHECK-DAG:#define __LACCUM_MIN__ (-2147483648.0LK-2147483648.0LK) + +// CHECK-DAG:#define __ULACCUM_EPSILON__ 0.00000000023283064365386962890625ULK +// CHECK-DAG:#define __ULACCUM_FBIT__ 32 +// CHECK-DAG:#define __ULACCUM_MAX__ 4294967295.99999999976716935634613037109375ULK + +// CHECK-DAG:#define __SACCUM_IBIT__ 8 +// CHECK-DAG:#define __USACCUM_IBIT__ 8 +// CHECK-DAG:#define __ACCUM_IBIT__ 16 +// CHECK-DAG:#define __UACCUM_IBIT__ 16 +// CHECK-DAG:#define __LACCUM_IBIT__ 32 +// CHECK-DAG:#define __ULACCUM_IBIT__ 32 diff --git a/clang/test/Preprocessor/no-fixed-point.c b/clang/test/Preprocessor/no-fixed-point.c new file mode 100644 index 00000000000000..fe88ca22c9faa2 --- /dev/null +++ b/clang/test/Preprocessor/no-fixed-point.c @@ -0,0 +1,7 @@ +/// Assert the fixed point precision macros according to ISO/IEC TR 18037:2008 7.18a.3 are not +/// defined when -ffixed-point is not provided. + +// RUN: %clang_cc1 -triple=x86_64 -E -dM -x c < /dev/null | FileCheck -match-full-lines %s +// RUN: %clang_cc1 -triple=x86_64 -E -dM -x c++ < /dev/null | FileCheck -match-full-lines %s + +// CHECK-NOT:#define __SFRACT_FBIT__ 7 diff --git a/llvm/include/llvm/ADT/APFixedPoint.h b/llvm/include/llvm/ADT/APFixedPoint.h index b0c510865f444e..0c014e76aa7126 100644 --- a/llvm/include/llvm/ADT/APFixedPoint.h +++ b/llvm/include/llvm/ADT/APFixedPoint.h @@ -260,6 +260,7 @@ class APFixedPoint { static APFixedPoint getMax(const FixedPointSemantics &Sema); static APFixedPoint getMin(const FixedPointSemantics &Sema); + static APFixedPoint getEpsilon(const FixedPointSemantics &Sema); /// Given a floating point semantic, return the next floating point semantic /// with a larger exponent and larger or equal mantissa. diff --git a/llvm/lib/Support/APFixedPoint.cpp b/llvm/lib/Support/APFixedPoint.cpp index 3eea01bc98093a..249c4f1e2153da 100644 --- a/llvm/lib/Support/APFixedPoint.cpp +++ b/llvm/lib/Support/APFixedPoint.cpp @@ -129,6 +129,12 @@ APFixedPoint APFixedPoint::getMin(const FixedPointSemantics &Sema) { return APFixedPoint(Val, Sema); } +APFixedPoint APFixedPoint::getEpsilon(const FixedPointSemantics &Sema) { + APSInt Val(Sema.getWidth(), !Sema.isSigned()); + Val.setBit(/*BitPosition=*/0); + return APFixedPoint(Val, Sema); +} + bool FixedPointSemantics::fitsInFloatSemantics( const fltSemantics &FloatSema) const { // A fixed point semantic fits in a floating point semantic if the maximum From 271e07321bd0673f5472055012b4cfd1671be9ec Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Wed, 14 Feb 2024 17:12:51 -0500 Subject: [PATCH 190/240] [libc] Fix fixed point detection and add compile option. (#81788) --- libc/cmake/modules/LLVMLibCObjectRules.cmake | 4 ++++ libc/cmake/modules/compiler_features/check_fixed_point.cpp | 2 +- libc/test/UnitTest/CMakeLists.txt | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index ef1f24863f61ab..54c7e1eaed0bfd 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -49,6 +49,10 @@ function(_get_common_compile_options output_var flags) list(APPEND compile_options "-ffreestanding") endif() + if(LIBC_COMPILER_HAS_FIXED_POINT) + list(APPEND compile_options "-ffixed-point") + endif() + list(APPEND compile_options "-fno-builtin") list(APPEND compile_options "-fno-exceptions") list(APPEND compile_options "-fno-lax-vector-conversions") diff --git a/libc/cmake/modules/compiler_features/check_fixed_point.cpp b/libc/cmake/modules/compiler_features/check_fixed_point.cpp index 02932dbf4d722d..a5192697d43f77 100644 --- a/libc/cmake/modules/compiler_features/check_fixed_point.cpp +++ b/libc/cmake/modules/compiler_features/check_fixed_point.cpp @@ -1,4 +1,4 @@ -#include "include/llvm-libc-macross/stdfix_macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #ifndef LIBC_COMPILER_HAS_FIXED_POINT #error unsupported diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 9baa41874a83db..e3099e45154765 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -31,6 +31,9 @@ function(add_unittest_framework_library name) if(TARGET libc.src.time.clock) target_compile_definitions(${lib} PRIVATE TARGET_SUPPORTS_CLOCK) endif() + if(LIBC_COMPILER_HAS_FIXED_POINT) + target_compile_options(${lib} PUBLIC -ffixed-point) + endif() endforeach() target_include_directories(${name}.hermetic PRIVATE ${LIBC_BUILD_DIR}/include) target_compile_options(${name}.hermetic From 3a49dfb28fed8f784484ce2ce6d687550f27ad59 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Wed, 14 Feb 2024 14:44:51 -0800 Subject: [PATCH 191/240] [analyzer] Check the safety of the object argument in a member function call. (#81400) This PR makes alpha.webkit.UncountedCallArgsChecker eplicitly check the safety of the object argument in a member function call. It also removes the exemption of local variables from this checker so that each local variable's safety is checked if it's used in a function call instead of relying on the local variable checker to find those since local variable checker currently has exemption for "for" and "if" statements. --- .../WebKit/UncountedCallArgsChecker.cpp | 64 +++++++++++++------ .../Checkers/WebKit/uncounted-obj-arg.cpp | 18 ++++++ 2 files changed, 63 insertions(+), 19 deletions(-) create mode 100644 clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index f4e6191cf05a3c..c84e1f9c244a88 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -70,6 +70,15 @@ class UncountedCallArgsChecker // or std::function call operator). unsigned ArgIdx = isa(CE) && isa_and_nonnull(F); + if (auto *MemberCallExpr = dyn_cast(CE)) { + auto *E = MemberCallExpr->getImplicitObjectArgument(); + QualType ArgType = MemberCallExpr->getObjectType(); + std::optional IsUncounted = + isUncounted(ArgType->getAsCXXRecordDecl()); + if (IsUncounted && *IsUncounted && !isPtrOriginSafe(E)) + reportBugOnThis(E); + } + for (auto P = F->param_begin(); // FIXME: Also check variadic function parameters. // FIXME: Also check default function arguments. Probably a different @@ -94,25 +103,7 @@ class UncountedCallArgsChecker if (auto *defaultArg = dyn_cast(Arg)) Arg = defaultArg->getExpr(); - std::pair ArgOrigin = - tryToFindPtrOrigin(Arg, true); - - // Temporary ref-counted object created as part of the call argument - // would outlive the call. - if (ArgOrigin.second) - continue; - - if (isa(ArgOrigin.first)) { - // foo(nullptr) - continue; - } - if (isa(ArgOrigin.first)) { - // FIXME: Check the value. - // foo(NULL) - continue; - } - - if (isASafeCallArg(ArgOrigin.first)) + if (isPtrOriginSafe(Arg)) continue; reportBug(Arg, *P); @@ -120,6 +111,28 @@ class UncountedCallArgsChecker } } + bool isPtrOriginSafe(const Expr *Arg) const { + std::pair ArgOrigin = + tryToFindPtrOrigin(Arg, true); + + // Temporary ref-counted object created as part of the call argument + // would outlive the call. + if (ArgOrigin.second) + return true; + + if (isa(ArgOrigin.first)) { + // foo(nullptr) + return true; + } + if (isa(ArgOrigin.first)) { + // FIXME: Check the value. + // foo(NULL) + return true; + } + + return isASafeCallArg(ArgOrigin.first); + } + bool shouldSkipCall(const CallExpr *CE) const { if (CE->getNumArgs() == 0) return false; @@ -196,6 +209,19 @@ class UncountedCallArgsChecker Report->addRange(CallArg->getSourceRange()); BR->emitReport(std::move(Report)); } + + void reportBugOnThis(const Expr *CallArg) const { + assert(CallArg); + + const SourceLocation SrcLocToReport = CallArg->getSourceRange().getBegin(); + + PathDiagnosticLocation BSLoc(SrcLocToReport, BR->getSourceManager()); + auto Report = std::make_unique( + Bug, "Call argument for 'this' parameter is uncounted and unsafe.", + BSLoc); + Report->addRange(CallArg->getSourceRange()); + BR->emitReport(std::move(Report)); + } }; } // namespace diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp new file mode 100644 index 00000000000000..e5e39e3faac714 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s + +#include "mock-types.h" + +class RefCounted { +public: + void ref() const; + void deref() const; + void someFunction(); +}; + +RefCounted* refCountedObj(); + +void test() +{ + refCountedObj()->someFunction(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} +} From cbdc7605edca26ff75a28f080089a835ed9dba92 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Wed, 14 Feb 2024 14:45:29 -0800 Subject: [PATCH 192/240] [analyzer] Add a few more safe functions to call. (#81532) Added checkedDowncast, uncheckedDowncast, & toString as safe functions to call in alpha.webkit.UncountedCallArgsChecker. --- .../Checkers/WebKit/PtrTypesSemantics.cpp | 5 +++-- .../WebKit/UncountedCallArgsChecker.cpp | 11 +++++----- ...ncast.cpp => call-args-safe-functions.cpp} | 21 +++++++++++++++++++ 3 files changed, 30 insertions(+), 7 deletions(-) rename clang/test/Analysis/Checkers/WebKit/{call-args-dynamic-downcast.cpp => call-args-safe-functions.cpp} (55%) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 96784d42d09fa4..08ba553d16ed14 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -194,8 +194,9 @@ bool isPtrConversion(const FunctionDecl *F) { // FIXME: check # of params == 1 const auto FunctionName = safeGetName(F); if (FunctionName == "getPtr" || FunctionName == "WeakPtr" || - FunctionName == "dynamicDowncast" - || FunctionName == "downcast" || FunctionName == "bitwise_cast") + FunctionName == "dynamicDowncast" || FunctionName == "downcast" || + FunctionName == "checkedDowncast" || + FunctionName == "uncheckedDowncast" || FunctionName == "bitwise_cast") return true; return false; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index c84e1f9c244a88..e2e1add31c9b17 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -174,13 +174,14 @@ class UncountedCallArgsChecker auto name = safeGetName(Callee); if (name == "adoptRef" || name == "getPtr" || name == "WeakPtr" || - name == "dynamicDowncast" || name == "downcast" || name == "bitwise_cast" || - name == "is" || name == "equal" || name == "hash" || - name == "isType" + name == "dynamicDowncast" || name == "downcast" || + name == "checkedDowncast" || name == "uncheckedDowncast" || + name == "bitwise_cast" || name == "is" || name == "equal" || + name == "hash" || name == "isType" || // FIXME: Most/all of these should be implemented via attributes. - || name == "equalIgnoringASCIICase" || + name == "equalIgnoringASCIICase" || name == "equalIgnoringASCIICaseCommon" || - name == "equalIgnoringNullity") + name == "equalIgnoringNullity" || name == "toString") return true; return false; diff --git a/clang/test/Analysis/Checkers/WebKit/call-args-dynamic-downcast.cpp b/clang/test/Analysis/Checkers/WebKit/call-args-safe-functions.cpp similarity index 55% rename from clang/test/Analysis/Checkers/WebKit/call-args-dynamic-downcast.cpp rename to clang/test/Analysis/Checkers/WebKit/call-args-safe-functions.cpp index 28156623d9a0fd..a87446564870cd 100644 --- a/clang/test/Analysis/Checkers/WebKit/call-args-dynamic-downcast.cpp +++ b/clang/test/Analysis/Checkers/WebKit/call-args-safe-functions.cpp @@ -23,13 +23,34 @@ class OtherObject { Derived* obj(); }; +class String { +}; + template inline Target* dynamicDowncast(Source* source) { return static_cast(source); } +template +inline Target* checkedDowncast(Source* source) +{ + return static_cast(source); +} + +template +inline Target* uncheckedDowncast(Source* source) +{ + return static_cast(source); +} + +template +String toString(const Types&... values); + void foo(OtherObject* other) { dynamicDowncast(other->obj()); + checkedDowncast(other->obj()); + uncheckedDowncast(other->obj()); + toString(other->obj()); } From 7249692bd24afc81fbbaa24240e3c9bba046f854 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Wed, 14 Feb 2024 14:47:40 -0800 Subject: [PATCH 193/240] [analyzer] Detect a return value of Ref & RefPtr (#81580) This PR makes the checker not emit warning when a function is called with a return value of another function when the return value is of type Ref or RefPtr. --- .../Checkers/WebKit/ASTUtils.cpp | 6 +++++ .../Checkers/WebKit/PtrTypesSemantics.cpp | 20 ++++++++++++++++ .../Checkers/WebKit/PtrTypesSemantics.h | 3 +++ .../call-args-protected-return-value.cpp | 23 +++++++++++++++++++ 4 files changed, 52 insertions(+) create mode 100644 clang/test/Analysis/Checkers/WebKit/call-args-protected-return-value.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index 4526fac64735bf..b76c0551c77bb0 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -19,6 +19,10 @@ namespace clang { std::pair tryToFindPtrOrigin(const Expr *E, bool StopAtFirstRefCountedObj) { while (E) { + if (auto *tempExpr = dyn_cast(E)) { + E = tempExpr->getSubExpr(); + continue; + } if (auto *cast = dyn_cast(E)) { if (StopAtFirstRefCountedObj) { if (auto *ConversionFunc = @@ -62,6 +66,8 @@ tryToFindPtrOrigin(const Expr *E, bool StopAtFirstRefCountedObj) { E = call->getArg(0); continue; } + if (isReturnValueRefCounted(callee)) + return {E, true}; if (isPtrConversion(callee)) { E = call->getArg(0); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 08ba553d16ed14..907244013d0871 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -119,6 +119,26 @@ bool isCtorOfRefCounted(const clang::FunctionDecl *F) { || FunctionName == "Identifier"; } +bool isReturnValueRefCounted(const clang::FunctionDecl *F) { + assert(F); + QualType type = F->getReturnType(); + while (!type.isNull()) { + if (auto *elaboratedT = type->getAs()) { + type = elaboratedT->desugar(); + continue; + } + if (auto *specialT = type->getAs()) { + if (auto *decl = specialT->getTemplateName().getAsTemplateDecl()) { + auto name = decl->getNameAsString(); + return name == "Ref" || name == "RefPtr"; + } + return false; + } + return false; + } + return false; +} + std::optional isUncounted(const CXXRecordDecl* Class) { // Keep isRefCounted first as it's cheaper. diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index 45b21cc0918443..c2c5b74442ba43 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -50,6 +50,9 @@ std::optional isUncountedPtr(const clang::Type* T); /// false if not. bool isCtorOfRefCounted(const clang::FunctionDecl *F); +/// \returns true if \p F returns a ref-counted object, false if not. +bool isReturnValueRefCounted(const clang::FunctionDecl *F); + /// \returns true if \p M is getter of a ref-counted class, false if not. std::optional isGetterOfRefCounted(const clang::CXXMethodDecl* Method); diff --git a/clang/test/Analysis/Checkers/WebKit/call-args-protected-return-value.cpp b/clang/test/Analysis/Checkers/WebKit/call-args-protected-return-value.cpp new file mode 100644 index 00000000000000..1c4b3df211b1e3 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/call-args-protected-return-value.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s +// expected-no-diagnostics + +#include "mock-types.h" + +class RefCounted { +public: + void ref(); + void deref(); +}; + +class Object { +public: + void someFunction(RefCounted&); +}; + +RefPtr object(); +RefPtr protectedTargetObject(); + +void testFunction() { + if (RefPtr obj = object()) + obj->someFunction(*protectedTargetObject()); +} From 82ca75239340c6e2b92125fe39bf872faa044f11 Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich <43973793+ayermolo@users.noreply.github.com> Date: Wed, 14 Feb 2024 15:43:39 -0800 Subject: [PATCH 194/240] [BOLT][DWARF] Add test for DW_AT_ranges input without function output (#81794) Added a test that relies on -fbasic-block-sections=all and --gc-sections that exercises a code path that previously printed a warning. --- bolt/lib/Rewrite/DWARFRewriter.cpp | 9 +- .../dwarf4-subprogram-single-gc-ranges.test | 6 +- bolt/test/X86/dwarf5-empty-function-ranges.s | 538 ++++++++++++++++++ .../dwarf5-subprogram-single-gc-ranges.test | 6 +- 4 files changed, 544 insertions(+), 15 deletions(-) create mode 100644 bolt/test/X86/dwarf5-empty-function-ranges.s diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 27fa937c7508c3..592b2352fc9a13 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -919,15 +919,10 @@ void DWARFRewriter::updateUnitDebugInfo( DIEValue LowPCVal = Die->findAttribute(dwarf::DW_AT_low_pc); DIEValue HighPCVal = Die->findAttribute(dwarf::DW_AT_high_pc); if (FunctionRanges.empty()) { - if (LowPCVal && HighPCVal) { + if (LowPCVal && HighPCVal) FunctionRanges.push_back({0, HighPCVal.getDIEInteger().getValue()}); - } else { - // I haven't seen this case, but who knows what other compilers - // generate. + else FunctionRanges.push_back({0, 1}); - errs() << "BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed " - "by the linker, DW_AT_ranges is used\n"; - } } if (FunctionRanges.size() == 1 && !opts::AlwaysConvertToRanges) { diff --git a/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test b/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test index 9080052a299124..3e7e765f98b197 100644 --- a/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test +++ b/bolt/test/X86/dwarf4-subprogram-single-gc-ranges.test @@ -2,14 +2,12 @@ # RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf4-subprogram-single-gc-ranges-main.s -o %t1.o # RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections &> %t1.txt -# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt > %t1.txt # RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s # This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with single entry, when function was GCed. -# POSTCHECK: BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed by the linker, DW_AT_ranges is used - # POSTCHECK: DW_TAG_subprogram # POSTCHECK-NEXT: DW_AT_frame_base # POSTCHECK-NEXT: DW_AT_linkage_name diff --git a/bolt/test/X86/dwarf5-empty-function-ranges.s b/bolt/test/X86/dwarf5-empty-function-ranges.s new file mode 100644 index 00000000000000..bfa317808163ed --- /dev/null +++ b/bolt/test/X86/dwarf5-empty-function-ranges.s @@ -0,0 +1,538 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q -Wl,-gc-sections -fuse-ld=lld -Wl,--entry=main +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s +# RUN: llvm-dwarfdump --debug-info %t.bolt | FileCheck --check-prefix=POSTCHECK %s + +# PRECHECK: DW_TAG_subprogram +# PRECHECK-NEXT: DW_AT_ranges +# PRECHECK-NEXT: [0x0000000000000000 +# PRECHECK-NEXT: [0x0000000000000000 +# PRECHECK-NEXT: [0x0000000000000000 +# PRECHECK-NEXT: [0x0000000000000000 +# PRECHECK-NEXT: DW_AT_frame_base +# PRECHECK-NEXT: DW_AT_linkage_name ("_Z6helperi") +# PRECHECK-NEXT: DW_AT_name ("helper") + +# POSTCHECK: DW_TAG_subprogram +# POSTCHECK-NEXT: DW_AT_frame_base +# POSTCHECK-NEXT: DW_AT_linkage_name ("_Z6helperi") +# POSTCHECK-NEXT: DW_AT_name ("helper") +# POSTCHECK-NEXT: DW_AT_decl_file +# POSTCHECK-NEXT: DW_AT_decl_line +# POSTCHECK-NEXT: DW_AT_type +# POSTCHECK-NEXT: DW_AT_external +# POSTCHECK-NEXT: DW_AT_low_pc (0x0000000000000000) +# POSTCHECK-NEXT: DW_AT_high_pc (0x0000000000000001) + +## Tests BOLT path that handles DW_AT_ranges with no output function ranges. + +## clang++ main.cpp -O0 -fno-inline-functions -fbasic-block-sections=all -g2 -S +## int helper(int argc) { +## int x = argc; +## if (x == 3) +## x++; +## else +## x--; +## return x; +## } +## int main(int argc, char *argv[]) { +## int x = argc; +## if (x == 3) +## x++; +## else +## x--; +## return x; +## } + + .text + .file "main.cpp" + .section .text._Z6helperi,"ax",@progbits + .globl _Z6helperi # -- Begin function _Z6helperi + .p2align 4, 0x90 + .type _Z6helperi,@function +_Z6helperi: # @_Z6helperi +.Lfunc_begin0: + .file 0 "/repro2" "main.cpp" md5 0x888a2704226ec400f256aa9c2207456c + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl %edi, -4(%rbp) +.Ltmp0: + .loc 0 2 11 prologue_end # main.cpp:2:11 + movl -4(%rbp), %eax + .loc 0 2 7 is_stmt 0 # main.cpp:2:7 + movl %eax, -8(%rbp) +.Ltmp1: + .loc 0 3 9 is_stmt 1 # main.cpp:3:9 + cmpl $3, -8(%rbp) +.Ltmp2: + .loc 0 3 7 is_stmt 0 # main.cpp:3:7 + jne _Z6helperi.__part.2 + jmp _Z6helperi.__part.1 +.LBB_END0_0: + .cfi_endproc + .section .text._Z6helperi,"ax",@progbits,unique,1 +_Z6helperi.__part.1: # %if.then + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 4 6 is_stmt 1 # main.cpp:4:6 + movl -8(%rbp), %eax + addl $1, %eax + movl %eax, -8(%rbp) + .loc 0 4 5 is_stmt 0 # main.cpp:4:5 + jmp _Z6helperi.__part.3 +.LBB_END0_1: + .size _Z6helperi.__part.1, .LBB_END0_1-_Z6helperi.__part.1 + .cfi_endproc + .section .text._Z6helperi,"ax",@progbits,unique,2 +_Z6helperi.__part.2: # %if.else + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 6 6 is_stmt 1 # main.cpp:6:6 + movl -8(%rbp), %eax + addl $-1, %eax + movl %eax, -8(%rbp) + jmp _Z6helperi.__part.3 +.LBB_END0_2: + .size _Z6helperi.__part.2, .LBB_END0_2-_Z6helperi.__part.2 + .cfi_endproc + .section .text._Z6helperi,"ax",@progbits,unique,3 +_Z6helperi.__part.3: # %if.end + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 7 10 # main.cpp:7:10 + movl -8(%rbp), %eax + .loc 0 7 3 epilogue_begin is_stmt 0 # main.cpp:7:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END0_3: + .size _Z6helperi.__part.3, .LBB_END0_3-_Z6helperi.__part.3 + .cfi_endproc + .section .text._Z6helperi,"ax",@progbits +.Lfunc_end0: + .size _Z6helperi, .Lfunc_end0-_Z6helperi + # -- End function + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 0 9 0 is_stmt 1 # main.cpp:9:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl $0, -4(%rbp) + movl %edi, -8(%rbp) + movq %rsi, -16(%rbp) +.Ltmp3: + .loc 0 10 11 prologue_end # main.cpp:10:11 + movl -8(%rbp), %eax + .loc 0 10 7 is_stmt 0 # main.cpp:10:7 + movl %eax, -20(%rbp) +.Ltmp4: + .loc 0 11 9 is_stmt 1 # main.cpp:11:9 + cmpl $3, -20(%rbp) +.Ltmp5: + .loc 0 11 7 is_stmt 0 # main.cpp:11:7 + jne main.__part.2 + jmp main.__part.1 +.LBB_END1_0: + .cfi_endproc + .section .text.main,"ax",@progbits,unique,4 +main.__part.1: # %if.then + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 12 6 is_stmt 1 # main.cpp:12:6 + movl -20(%rbp), %eax + addl $1, %eax + movl %eax, -20(%rbp) + .loc 0 12 5 is_stmt 0 # main.cpp:12:5 + jmp main.__part.3 +.LBB_END1_1: + .size main.__part.1, .LBB_END1_1-main.__part.1 + .cfi_endproc + .section .text.main,"ax",@progbits,unique,5 +main.__part.2: # %if.else + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 14 6 is_stmt 1 # main.cpp:14:6 + movl -20(%rbp), %eax + addl $-1, %eax + movl %eax, -20(%rbp) + jmp main.__part.3 +.LBB_END1_2: + .size main.__part.2, .LBB_END1_2-main.__part.2 + .cfi_endproc + .section .text.main,"ax",@progbits,unique,6 +main.__part.3: # %if.end + .cfi_startproc + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + .loc 0 15 10 # main.cpp:15:10 + movl -20(%rbp), %eax + .loc 0 15 3 epilogue_begin is_stmt 0 # main.cpp:15:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.LBB_END1_3: + .size main.__part.3, .LBB_END1_3-main.__part.3 + .cfi_endproc + .section .text.main,"ax",@progbits +.Lfunc_end1: + .size main, .Lfunc_end1-main + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x82 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .byte 2 # DW_AT_ranges + .long .Laddr_table_base0 # DW_AT_addr_base + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] 0x2b:0x23 DW_TAG_subprogram + .byte 0 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 123 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x37:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 124 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 123 # DW_AT_type + .byte 4 # Abbrev [4] 0x42:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .long 123 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x4e:0x2d DW_TAG_subprogram + .byte 1 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 123 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x59:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 7 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 123 # DW_AT_type + .byte 3 # Abbrev [3] 0x64:0xb DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .byte 9 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 9 # DW_AT_decl_line + .long 127 # DW_AT_type + .byte 4 # Abbrev [4] 0x6f:0xb DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 108 + .byte 8 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 10 # DW_AT_decl_line + .long 123 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 6 # Abbrev [6] 0x7b:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 7 # Abbrev [7] 0x7f:0x5 DW_TAG_pointer_type + .long 132 # DW_AT_type + .byte 7 # Abbrev [7] 0x84:0x5 DW_TAG_pointer_type + .long 137 # DW_AT_type + .byte 6 # Abbrev [6] 0x89:0x4 DW_TAG_base_type + .byte 10 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 3 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 + .long .Ldebug_ranges1-.Lrnglists_table_base0 + .long .Ldebug_ranges2-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .LBB_END0_1-_Z6helperi.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .LBB_END0_2-_Z6helperi.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 2 # start index + .uleb128 .LBB_END0_3-_Z6helperi.__part.3 # length + .byte 3 # DW_RLE_startx_length + .byte 3 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges1: + .byte 3 # DW_RLE_startx_length + .byte 4 # start index + .uleb128 .LBB_END1_1-main.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 5 # start index + .uleb128 .LBB_END1_2-main.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 6 # start index + .uleb128 .LBB_END1_3-main.__part.3 # length + .byte 3 # DW_RLE_startx_length + .byte 7 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges2: + .byte 3 # DW_RLE_startx_length + .byte 0 # start index + .uleb128 .LBB_END0_1-_Z6helperi.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 1 # start index + .uleb128 .LBB_END0_2-_Z6helperi.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 2 # start index + .uleb128 .LBB_END0_3-_Z6helperi.__part.3 # length + .byte 3 # DW_RLE_startx_length + .byte 3 # start index + .uleb128 .Lfunc_end0-.Lfunc_begin0 # length + .byte 3 # DW_RLE_startx_length + .byte 4 # start index + .uleb128 .LBB_END1_1-main.__part.1 # length + .byte 3 # DW_RLE_startx_length + .byte 5 # start index + .uleb128 .LBB_END1_2-main.__part.2 # length + .byte 3 # DW_RLE_startx_length + .byte 6 # start index + .uleb128 .LBB_END1_3-main.__part.3 # length + .byte 3 # DW_RLE_startx_length + .byte 7 # start index + .uleb128 .Lfunc_end1-.Lfunc_begin1 # length + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + .section .debug_str_offsets,"",@progbits + .long 48 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 19.0.0git (git@github.com:ayermolo/llvm-project.git a1d8664d409cac2a923176a8e9a731385bde279e)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=108 +.Linfo_string2: + .asciz "/repro2" # string offset=117 +.Linfo_string3: + .asciz "_Z6helperi" # string offset=162 +.Linfo_string4: + .asciz "helper" # string offset=173 +.Linfo_string5: + .asciz "int" # string offset=180 +.Linfo_string6: + .asciz "main" # string offset=184 +.Linfo_string7: + .asciz "argc" # string offset=189 +.Linfo_string8: + .asciz "x" # string offset=194 +.Linfo_string9: + .asciz "argv" # string offset=196 +.Linfo_string10: + .asciz "char" # string offset=201 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .long .Linfo_string7 + .long .Linfo_string8 + .long .Linfo_string9 + .long .Linfo_string10 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad _Z6helperi.__part.1 + .quad _Z6helperi.__part.2 + .quad _Z6helperi.__part.3 + .quad .Lfunc_begin0 + .quad main.__part.1 + .quad main.__part.2 + .quad main.__part.3 + .quad .Lfunc_begin1 +.Ldebug_addr_end0: + .ident "clang version 19.0.0git (git@github.com:ayermolo/llvm-project.git a1d8664d409cac2a923176a8e9a731385bde279e)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test b/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test index 04b7203a5bea61..9f8f895ed5f16d 100644 --- a/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test +++ b/bolt/test/X86/dwarf5-subprogram-single-gc-ranges.test @@ -2,14 +2,12 @@ # RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-subprogram-single-gc-ranges-main.s -o %t1.o # RUN: %clang %cflags %t1.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections &> %t1.txt -# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t1.txt +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt > %t1.txt # RUN: cat %t1.txt | FileCheck --check-prefix=POSTCHECK %s # This test checks BOLT correctly handles DW_TAG_subprogram with Ranges with single entry, when function was GCed. -# POSTCHECK: BOLT-WARNING: [internal-dwarf-error]: subprogram got GCed by the linker, DW_AT_ranges is used - # POSTCHECK: DW_TAG_subprogram # POSTCHECK-NEXT: DW_AT_frame_base # POSTCHECK-NEXT: DW_AT_linkage_name From 4eb092d9f8999338fd4c7ef65268649636b7f86a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez=20Troiti=C3=B1o?= Date: Thu, 15 Feb 2024 00:49:09 +0100 Subject: [PATCH 195/240] [llvm][compiler-rt] Connect lit dependencies to test-depends targets. (#81783) compiler-rt was creating the test-depends targets and trying to fill its dependencies with a variable, but the variable was empty because it was supposed to take its value from a property. The changes in this commit grab the value of the property and add them as dependencies. The changes in llvm are to remove the usage of `DEPENDS` arguments from `add_custom_target`, which according to the documentation is reserved for files/outputs created by `add_custom_command`. Use `add_dependencies` instead. This is similar to the changes introduced in 4eb84582344f97167b6a2b4cb1fb1d75ae07897e for runtimes. --- compiler-rt/test/CMakeLists.txt | 6 +++++- llvm/CMakeLists.txt | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt index ee2ef907bcae45..c186be1e44fd9a 100644 --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -116,7 +116,11 @@ endif() # Now that we've traversed all the directories and know all the lit testsuites, # introduce a rule to run to run all of them. -add_custom_target(compiler-rt-test-depends DEPENDS ${LLVM_COMPILER_RT_LIT_DEPENDS}) +get_property(LLVM_COMPILER_RT_LIT_DEPENDS GLOBAL PROPERTY LLVM_COMPILER_RT_LIT_DEPENDS) +add_custom_target(compiler-rt-test-depends) +if(LLVM_COMPILER_RT_LIT_DEPENDS) + add_dependencies(compiler-rt-test-depends ${LLVM_COMPILER_RT_LIT_DEPENDS}) +endif() umbrella_lit_testsuite_end(check-compiler-rt) if(COMPILER_RT_STANDALONE_BUILD) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 81f2753a4edd85..a760a19efcb6b1 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1256,8 +1256,10 @@ if( LLVM_INCLUDE_TESTS ) get_property(LLVM_ALL_LIT_DEPENDS GLOBAL PROPERTY LLVM_ALL_LIT_DEPENDS) get_property(LLVM_ALL_ADDITIONAL_TEST_DEPENDS GLOBAL PROPERTY LLVM_ALL_ADDITIONAL_TEST_DEPENDS) - add_custom_target(test-depends - DEPENDS ${LLVM_ALL_LIT_DEPENDS} ${LLVM_ALL_ADDITIONAL_TEST_DEPENDS}) + add_custom_target(test-depends) + if(LLVM_ALL_LIT_DEPENDS OR LLVM_ALL_ADDITIONAL_TEST_DEPENDS) + add_dependencies(test-depends ${LLVM_ALL_LIT_DEPENDS} ${LLVM_ALL_ADDITIONAL_TEST_DEPENDS}) + endif() set_target_properties(test-depends PROPERTIES FOLDER "Tests") add_dependencies(check-all test-depends) endif() From acdb4cdc04ed4d9a130f0fa706ed1b0f42cc1aa0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 14 Feb 2024 15:51:26 -0800 Subject: [PATCH 196/240] [ubsan,test] Disable static-link.cpp for i386 and internal_symbolizer i386 has a `__tls_get_addr` link error. For internal_symbolizer, the x86_64 test would fail as well. --- compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp b/compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp index f26b7b868cad62..c20231cc917d0c 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/Linux/static-link.cpp @@ -1,5 +1,6 @@ // REQUIRES: ubsan-standalone // REQUIRES: target={{x86_64.*}} +// UNSUPPORTED: i386-target-arch, internal_symbolizer // RUN: %clangxx -fsanitize=bool -static %s -o %t && UBSAN_OPTIONS=handle_segv=0:handle_sigbus=0:handle_sigfpe=0 %run %t 2>&1 | FileCheck %s // RUN: %run %t 2>&1 | FileCheck %s #include From fe20a759fcd20e1755ea1b34c5e6447a787925dc Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Feb 2024 16:05:52 -0800 Subject: [PATCH 197/240] Use container on Linux to run llvm-project-tests workflow (#81349) --- .github/workflows/llvm-project-tests.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml index 68b4a68d1af984..43b90193406fc9 100644 --- a/.github/workflows/llvm-project-tests.yml +++ b/.github/workflows/llvm-project-tests.yml @@ -58,6 +58,10 @@ jobs: lit-tests: name: Lit Tests runs-on: ${{ matrix.os }} + container: + image: ${{(startsWith(matrix.os, 'ubuntu') && 'ghcr.io/llvm/ci-ubuntu-22.04:latest') || null}} + volumes: + - /mnt/:/mnt/ strategy: fail-fast: false matrix: @@ -77,6 +81,7 @@ jobs: with: python-version: ${{ inputs.python_version }} - name: Install Ninja + if: runner.os != 'Linux' uses: llvm/actions/install-ninja@main # actions/checkout deletes any existing files in the new git directory, # so this needs to either run before ccache-action or it has to use @@ -108,8 +113,8 @@ jobs: run: | if [ "${{ runner.os }}" == "Linux" ]; then builddir="/mnt/build/" - sudo mkdir -p $builddir - sudo chown `whoami`:`whoami` $builddir + mkdir -p $builddir + extra_cmake_args="-DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang" else builddir="$(pwd)"/build fi @@ -123,6 +128,7 @@ jobs: -DLLDB_INCLUDE_TESTS=OFF \ -DCMAKE_C_COMPILER_LAUNCHER=sccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + $extra_cmake_args \ ${{ inputs.extra_cmake_args }} ninja -C "$builddir" '${{ inputs.build_target }}' From 3369e341288b3d9bb59827f9a2911ebf3d36408d Mon Sep 17 00:00:00 2001 From: Philipp Tomsich Date: Wed, 14 Feb 2024 16:54:08 -0800 Subject: [PATCH 198/240] [NFC][AArch64] fix whitespace in AArch64SchedNeoverseV1 (#81744) One of the whitespace fixes didn't get added to the commit introducing the Ampere1B model. Clean it up. --- llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index 7e041dbd2abaea..613db353cb0aaa 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -29,7 +29,7 @@ def NeoverseV1Model : SchedMachineModel { list UnsupportedFeatures = !listconcat(SVE2Unsupported.F, SMEUnsupported.F, [HasMTE, HasCPA, - HasCSSC]); + HasCSSC]); } //===----------------------------------------------------------------------===// From 4cd7616f6b13513bb13f2b6dd14d140a4c62c937 Mon Sep 17 00:00:00 2001 From: jkorous-apple <32549412+jkorous-apple@users.noreply.github.com> Date: Wed, 14 Feb 2024 17:19:39 -0800 Subject: [PATCH 199/240] [-Wunsafe-buffer-usage] Fixits for array args of func-ptr calls (#80358) Currently we ignore calls on function pointers (unlike direct calls of functions and class methods). This patch adds support for function pointers as well. The change is to simply replace use of forEachArgumentWithParam matcher in UPC gadget with forEachArgumentWithParamType. from the documentation of forEachArgumentWithParamType: /// Matches all arguments and their respective types for a \c CallExpr or /// \c CXXConstructExpr. It is very similar to \c forEachArgumentWithParam but /// it works on calls through function pointers as well. Currently the matcher also uses hasPointerType() which checks that the canonical type of an argument is pointer and won't match on arrays decayed to pointer. Replacing hasPointerType() with isAnyPointerType() which allows implicit casts allows for the arrays to be matched as well and this way we get fixits for array arguments to function pointer calls too. --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 12 +++-- ...ge-fixits-pointer-arg-to-func-ptr-call.cpp | 48 +++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index d00c598c4b9de3..ca346444e047e5 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -281,10 +281,13 @@ isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) { // 4. the operand of a pointer subtraction operation // (i.e., computing the distance between two pointers); or ... - auto CallArgMatcher = - callExpr(forEachArgumentWithParam(InnerMatcher, - hasPointerType() /* array also decays to pointer type*/), - unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))); + // clang-format off + auto CallArgMatcher = callExpr( + forEachArgumentWithParamType( + InnerMatcher, + isAnyPointer() /* array also decays to pointer type*/), + unless(callee( + functionDecl(hasAttr(attr::UnsafeBufferUsage))))); auto CastOperandMatcher = castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral), @@ -306,6 +309,7 @@ isInUnspecifiedPointerContext(internal::Matcher InnerMatcher) { hasRHS(hasPointerType())), eachOf(hasLHS(InnerMatcher), hasRHS(InnerMatcher))); + // clang-format on return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher, PtrSubtractionMatcher)); diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp new file mode 100644 index 00000000000000..0459d6549fd86f --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage \ +// RUN: -fsafe-buffer-usage-suggestions \ +// RUN: -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s + +void unsafe_array_func_ptr_call(void (*fn_ptr)(int *param)) { + int p[32]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array p" + + p[5] = 10; + fn_ptr(p); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()" +} + +void unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) { + int *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span p" + + p[5] = 10; + fn_ptr(p); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()" +} + +void addr_of_unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) { + int *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span p" + + p[5] = 10; + fn_ptr(&p[0]); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:15}:"p.data()" +} + +void addr_of_unsafe_ptr_w_offset_func_ptr_call(void (*fn_ptr)(int *param)) { + int *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span p" + + p[5] = 10; + fn_ptr(&p[3]); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:15}:"&p.data()[3]" +} + +void preincrement_unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) { + int *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span p" + + p[5] = 10; + fn_ptr(++p); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:13}:"(p = p.subspan(1)).data()" +} From 9a1e6373ab3edc38486af504154db2d804e72d3d Mon Sep 17 00:00:00 2001 From: jkorous-apple <32549412+jkorous-apple@users.noreply.github.com> Date: Wed, 14 Feb 2024 18:16:44 -0800 Subject: [PATCH 200/240] [-Wunsafe-buffer-usage] Ignore constant safe indices in array subscripts (#80504) [-Wunsafe-buffer-usage] Ignore safe array subscripts Don't emit warnings for array subscripts on constant size arrays where the index is constant and within bounds. Example: int arr[10]; arr[5] = 0; //safe, no warning This patch recognizes only array indices that are integer literals - it doesn't understand more complex expressions (arithmetic on constants, etc.). -Warray-bounds implemented in Sema::CheckArrayAccess() already solves a similar (opposite) problem, handles complex expressions and is battle-tested. Adding -Wunsafe-buffer-usage diagnostics to Sema is a non-starter as we need to emit both the warnings and fixits and the performance impact of the fixit machine is unacceptable for Sema. CheckArrayAccess() as is doesn't distinguish between "safe" and "unknown" array accesses. It also mixes the analysis that decides if an index is out of bounds with crafting the diagnostics. A refactor of CheckArrayAccess() might serve both the original purpose and help us avoid false-positive with -Wunsafe-buffer-usage on constant size arrrays. --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 47 +++++++++++++--- .../warn-unsafe-buffer-usage-array.cpp | 18 +++++- ...afe-buffer-usage-fixits-pointer-access.cpp | 8 +-- ...ge-fixits-pointer-arg-to-func-ptr-call.cpp | 3 +- .../test/SemaCXX/warn-unsafe-buffer-usage.cpp | 56 +++++++++---------- 5 files changed, 91 insertions(+), 41 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index ca346444e047e5..a74c113e29f1cf 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -406,6 +406,39 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) { } return false; } + +AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { + // FIXME: Proper solution: + // - refactor Sema::CheckArrayAccess + // - split safe/OOB/unknown decision logic from diagnostics emitting code + // - e. g. "Try harder to find a NamedDecl to point at in the note." + // already duplicated + // - call both from Sema and from here + + const auto *BaseDRE = + dyn_cast(Node.getBase()->IgnoreParenImpCasts()); + if (!BaseDRE) + return false; + if (!BaseDRE->getDecl()) + return false; + const auto *CATy = Finder->getASTContext().getAsConstantArrayType( + BaseDRE->getDecl()->getType()); + if (!CATy) + return false; + const APInt ArrSize = CATy->getSize(); + + if (const auto *IdxLit = dyn_cast(Node.getIdx())) { + const APInt ArrIdx = IdxLit->getValue(); + // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a + // bug + if (ArrIdx.isNonNegative() && + ArrIdx.getLimitedValue() < ArrSize.getLimitedValue()) + return true; + } + + return false; +} + } // namespace clang::ast_matchers namespace { @@ -598,16 +631,16 @@ class ArraySubscriptGadget : public WarningGadget { } static Matcher matcher() { - // FIXME: What if the index is integer literal 0? Should this be - // a safe gadget in this case? - // clang-format off + // clang-format off return stmt(arraySubscriptExpr( hasBase(ignoringParenImpCasts( anyOf(hasPointerType(), hasArrayType()))), - unless(hasIndex( - anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) - ))) - .bind(ArraySubscrTag)); + unless(anyOf( + isSafeArraySubscript(), + hasIndex( + anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) + ) + ))).bind(ArraySubscrTag)); // clang-format on } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp index 90c11b1be95c25..8b2f103ec66708 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: %clang_cc1 -std=c++20 -Wno-everything -Wunsafe-buffer-usage \ // RUN: -fsafe-buffer-usage-suggestions \ // RUN: -verify %s @@ -22,3 +22,19 @@ struct Foo { void foo2(Foo& f, unsigned idx) { f.member_buffer[idx] = 0; // expected-warning{{unsafe buffer access}} } + +void constant_idx_safe(unsigned idx) { + int buffer[10]; + buffer[9] = 0; +} + +void constant_idx_safe0(unsigned idx) { + int buffer[10]; + buffer[0] = 0; +} + +void constant_idx_unsafe(unsigned idx) { + int buffer[10]; // expected-warning{{'buffer' is an unsafe buffer that does not perform bounds checks}} + // expected-note@-1{{change type of 'buffer' to 'std::array' to label it for hardening}} + buffer[10] = 0; // expected-note{{used in buffer access here}} +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp index f94072015ff87d..b3c64f1b0d085e 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp @@ -83,11 +83,11 @@ void unsafe_method_invocation_single_param() { } -void unsafe_method_invocation_single_param_array() { +void unsafe_method_invocation_single_param_array(int idx) { int p[32]; // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array p" - int tmp = p[5]; + int tmp = p[idx]; foo(p); // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:8-[[@LINE-1]]:8}:".data()" } @@ -126,14 +126,14 @@ void unsafe_method_invocation_double_param() { m1(q, q, 8); } -void unsafe_method_invocation_double_param_array() { +void unsafe_method_invocation_double_param_array(int idx) { int p[14]; // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array p" int q[40]; // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array q" - q[5] = p[5]; + q[idx] = p[idx]; m1(p, p, 10); // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:7}:".data()" diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp index 0459d6549fd86f..216813ce45bfd5 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp @@ -6,7 +6,8 @@ void unsafe_array_func_ptr_call(void (*fn_ptr)(int *param)) { int p[32]; // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array p" - p[5] = 10; + int idx; + p[idx] = 10; fn_ptr(p); // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()" } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp index 67cdf252d6a8b6..642db0e9d3c632 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp @@ -36,7 +36,7 @@ void testIncrement(char *p) { // expected-warning{{'p' is an unsafe pointer used void * voidPtrCall(void); char * charPtrCall(void); -void testArraySubscripts(int *p, int **pp) { +void testArraySubscripts(int idx, int *p, int **pp) { // expected-warning@-1{{'p' is an unsafe pointer used for buffer access}} // expected-warning@-2{{'pp' is an unsafe pointer used for buffer access}} foo(p[1], // expected-note{{used in buffer access here}} @@ -64,13 +64,14 @@ void testArraySubscripts(int *p, int **pp) { // expected-note@-1{{change type of 'a' to 'std::array' to label it for hardening}} int b[10][10]; // expected-warning{{'b' is an unsafe buffer that does not perform bounds checks}} - foo(a[1], 1[a], // expected-note2{{used in buffer access here}} - b[3][4], // expected-warning{{unsafe buffer access}} - // expected-note@-1{{used in buffer access here}} - 4[b][3], // expected-warning{{unsafe buffer access}} - // expected-note@-1{{used in buffer access here}} - 4[3[b]]); // expected-warning{{unsafe buffer access}} - // expected-note@-1{{used in buffer access here}} + foo(a[idx], idx[a], // expected-note2{{used in buffer access here}} + b[idx][idx + 1], // expected-warning{{unsafe buffer access}} + // expected-note@-1{{used in buffer access here}} + (idx + 1)[b][idx],// expected-warning{{unsafe buffer access}} + // expected-note@-1{{used in buffer access here}} + (idx + 1)[idx[b]]); + // expected-warning@-1{{unsafe buffer access}} + // expected-note@-2{{used in buffer access here}} // Not to warn when index is zero foo(p[0], pp[0][0], 0[0[pp]], 0[pp][0], @@ -158,9 +159,9 @@ void testLambdaCaptureAndGlobal(int * p) { // expected-warning@-1{{'p' is an unsafe pointer used for buffer access}} int a[10]; // expected-warning{{'a' is an unsafe buffer that does not perform bounds checks}} - auto Lam = [p, a]() { + auto Lam = [p, a](int idx) { return p[1] // expected-note{{used in buffer access here}} - + a[1] + garray[1] // expected-note2{{used in buffer access here}} + + a[idx] + garray[idx]// expected-note2{{used in buffer access here}} + gp[1]; // expected-note{{used in buffer access here}} }; @@ -178,31 +179,31 @@ void testLambdaCapture() { // expected-note@-1{{change type of 'b' to 'std::array' to label it for hardening}} int c[10]; - auto Lam1 = [a]() { - return a[1]; // expected-note{{used in buffer access here}} + auto Lam1 = [a](unsigned idx) { + return a[idx]; // expected-note{{used in buffer access here}} }; - auto Lam2 = [x = b[3]]() { // expected-note{{used in buffer access here}} + auto Lam2 = [x = b[c[5]]]() { // expected-note{{used in buffer access here}} return x; }; - auto Lam = [x = c]() { // expected-warning{{'x' is an unsafe pointer used for buffer access}} - return x[3]; // expected-note{{used in buffer access here}} + auto Lam = [x = c](unsigned idx) { // expected-warning{{'x' is an unsafe pointer used for buffer access}} + return x[idx]; // expected-note{{used in buffer access here}} }; } -void testLambdaImplicitCapture() { +void testLambdaImplicitCapture(long idx) { int a[10]; // expected-warning{{'a' is an unsafe buffer that does not perform bounds checks}} // expected-note@-1{{change type of 'a' to 'std::array' to label it for hardening}} int b[10]; // expected-warning{{'b' is an unsafe buffer that does not perform bounds checks}} // expected-note@-1{{change type of 'b' to 'std::array' to label it for hardening}} auto Lam1 = [=]() { - return a[1]; // expected-note{{used in buffer access here}} + return a[idx]; // expected-note{{used in buffer access here}} }; auto Lam2 = [&]() { - return b[1]; // expected-note{{used in buffer access here}} + return b[idx]; // expected-note{{used in buffer access here}} }; } @@ -344,38 +345,37 @@ int testVariableDecls(int * p) { return p[1]; // expected-note{{used in buffer access here}} } -template void fArr(T t[]) { +template void fArr(T t[], long long idx) { // expected-warning@-1{{'t' is an unsafe pointer used for buffer access}} foo(t[1]); // expected-note{{used in buffer access here}} T ar[8]; // expected-warning{{'ar' is an unsafe buffer that does not perform bounds checks}} // expected-note@-1{{change type of 'ar' to 'std::array' to label it for hardening}} - foo(ar[5]); // expected-note{{used in buffer access here}} + foo(ar[idx]); // expected-note{{used in buffer access here}} } -template void fArr(int t[]); // FIXME: expected note {{in instantiation of}} +template void fArr(int t[], long long); // FIXME: expected note {{in instantiation of}} int testReturn(int t[]) {// expected-note{{change type of 't' to 'std::span' to preserve bounds information}} // expected-warning@-1{{'t' is an unsafe pointer used for buffer access}} return t[1]; // expected-note{{used in buffer access here}} } -int testArrayAccesses(int n) { +int testArrayAccesses(int n, int idx) { // auto deduced array type int cArr[2][3] = {{1, 2, 3}, {4, 5, 6}}; // expected-warning@-1{{'cArr' is an unsafe buffer that does not perform bounds checks}} int d = cArr[0][0]; foo(cArr[0][0]); - foo(cArr[1][2]); // expected-note{{used in buffer access here}} - // expected-warning@-1{{unsafe buffer access}} - auto cPtr = cArr[1][2]; // expected-note{{used in buffer access here}} - // expected-warning@-1{{unsafe buffer access}} + foo(cArr[idx][idx + 1]); // expected-note{{used in buffer access here}} + // expected-warning@-1{{unsafe buffer access}} + auto cPtr = cArr[idx][idx * 2]; // expected-note{{used in buffer access here}} + // expected-warning@-1{{unsafe buffer access}} foo(cPtr); // Typdefs typedef int A[3]; const A tArr = {4, 5, 6}; - // expected-warning@-1{{'tArr' is an unsafe buffer that does not perform bounds checks}} - foo(tArr[0], tArr[1]); // expected-note{{used in buffer access here}} + foo(tArr[0], tArr[1]); return cArr[0][1]; // expected-warning{{unsafe buffer access}} } From 8ce144800a7ed7c1e42343b3a9ac5e0ffdbfddbf Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Thu, 15 Feb 2024 10:16:18 +0800 Subject: [PATCH 201/240] [clangd][test] Fix -Wmissing-field-initializers in DiagnosticsTests.cpp (NFC) llvm-project/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp:921:45: error: missing field 'Annotations' initializer [-Werror,-Wmissing-field-initializers] TextEdit{Main.range("virtual1"), ""}}}; ^ llvm-project/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp:926:45: error: missing field 'Annotations' initializer [-Werror,-Wmissing-field-initializers] TextEdit{Main.range("virtual2"), ""}}}; ^ 2 errors generated. --- clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index 4839879e1b78c8..2f6dd0611b6621 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -918,12 +918,14 @@ TEST(DiagnosticTest, ClangTidySelfContainedDiagsFormatting) { "prefer using 'override' or (rarely) 'final' " "instead of 'virtual'", {TextEdit{Main.range("override1"), " override"}, - TextEdit{Main.range("virtual1"), ""}}}; + TextEdit{Main.range("virtual1"), ""}}, + {}}; clangd::Fix const ExpectedFix2{ "prefer using 'override' or (rarely) 'final' " "instead of 'virtual'", {TextEdit{Main.range("override2"), " override"}, - TextEdit{Main.range("virtual2"), ""}}}; + TextEdit{Main.range("virtual2"), ""}}, + {}}; // Note that in the Fix we expect the "virtual" keyword and the following // whitespace to be deleted EXPECT_THAT(TU.build().getDiagnostics(), From ff409d39ce4673c70f474c3fdb7120bab8f94eef Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Wed, 14 Feb 2024 21:35:00 -0500 Subject: [PATCH 202/240] [libc][math] Add C23 ldexpf128 math function and fix DyadicFloat conversions for subnormal ranges and 80-bit floating points. (#81780) --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 + libc/spec/stdc.td | 1 + libc/src/__support/FPUtil/CMakeLists.txt | 37 ++++++------- libc/src/__support/FPUtil/FPBits.h | 9 ++-- .../__support/FPUtil/ManipulationFunctions.h | 42 +++++++++++---- libc/src/__support/FPUtil/dyadic_float.h | 53 ++++++++++++------- libc/src/math/CMakeLists.txt | 1 + libc/src/math/generic/CMakeLists.txt | 23 ++++++-- libc/src/math/generic/ldexpf128.cpp | 19 +++++++ libc/src/math/ldexpf128.h | 20 +++++++ .../__support/FPUtil/dyadic_float_test.cpp | 34 ++++++++++++ libc/test/src/math/smoke/CMakeLists.txt | 18 +++++-- libc/test/src/math/smoke/LdExpTest.h | 4 +- libc/test/src/math/smoke/ldexp_test.cpp | 2 +- libc/test/src/math/smoke/ldexpf128_test.cpp | 13 +++++ libc/test/src/math/smoke/ldexpf_test.cpp | 2 +- libc/test/src/math/smoke/ldexpl_test.cpp | 2 +- 20 files changed, 222 insertions(+), 63 deletions(-) create mode 100644 libc/src/math/generic/ldexpf128.cpp create mode 100644 libc/src/math/ldexpf128.h create mode 100644 libc/test/src/math/smoke/ldexpf128_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index bc09f488122865..6e194682df4bfc 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -387,6 +387,7 @@ if(LIBC_COMPILER_HAS_FLOAT128) libc.src.math.fmaxf128 libc.src.math.fminf128 libc.src.math.frexpf128 + libc.src.math.ldexpf128 libc.src.math.roundf128 libc.src.math.sqrtf128 libc.src.math.truncf128 diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 02412e7549a3d5..71ff4bcfc35195 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -396,6 +396,7 @@ if(LIBC_COMPILER_HAS_FLOAT128) libc.src.math.fmaxf128 libc.src.math.fminf128 libc.src.math.frexpf128 + libc.src.math.ldexpf128 libc.src.math.roundf128 libc.src.math.sqrtf128 libc.src.math.truncf128 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 57b4a1e0f93d4f..33f6e97af0e183 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -435,6 +435,7 @@ if(LIBC_COMPILER_HAS_FLOAT128) libc.src.math.fmaxf128 libc.src.math.fminf128 libc.src.math.frexpf128 + libc.src.math.ldexpf128 libc.src.math.roundf128 libc.src.math.sqrtf128 libc.src.math.truncf128 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index bd2af656d9eecd..c586fe6664e27f 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -191,6 +191,8 @@ Basic Operations +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ | ldexpl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ +| ldexpf128 | |check| | |check| | | |check| | | | | | | | | | ++--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ | llrint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ | llrintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 9ed94638f522ca..79487cb697f320 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -413,6 +413,7 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"ldexp", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"ldexpf", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"ldexpl", RetValSpec, [ArgSpec, ArgSpec]>, + GuardedFunctionSpec<"ldexpf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_COMPILER_HAS_FLOAT128">, FunctionSpec<"log10", RetValSpec, [ArgSpec]>, FunctionSpec<"log10f", RetValSpec, [ArgSpec]>, diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt index 3307d33434f0b3..0c932e8ffcd550 100644 --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -75,24 +75,6 @@ add_header_library( libc.src.__support.common ) -add_header_library( - manipulation_functions - HDRS - ManipulationFunctions.h - DEPENDS - .fenv_impl - .fp_bits - .nearest_integer_operations - .normal_float - libc.src.__support.CPP.bit - libc.src.__support.CPP.limits - libc.src.__support.CPP.type_traits - libc.src.__support.common - libc.src.__support.macros.optimization - libc.include.math - libc.src.errno.errno -) - add_header_library( basic_operations HDRS @@ -221,4 +203,23 @@ add_header_library( libc.src.__support.macros.optimization ) +add_header_library( + manipulation_functions + HDRS + ManipulationFunctions.h + DEPENDS + .fenv_impl + .fp_bits + .dyadic_float + .nearest_integer_operations + .normal_float + libc.src.__support.CPP.bit + libc.src.__support.CPP.limits + libc.src.__support.CPP.type_traits + libc.src.__support.common + libc.src.__support.macros.optimization + libc.include.math + libc.src.errno.errno +) + add_subdirectory(generic) diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h index 6665c90845683b..b3179a24c74749 100644 --- a/libc/src/__support/FPUtil/FPBits.h +++ b/libc/src/__support/FPUtil/FPBits.h @@ -633,13 +633,13 @@ struct FPRepImpl : public FPRepSem { using typename UP::Significand; using UP::FP_MASK; - using UP::SIG_LEN; public: // Constants. using UP::EXP_BIAS; using UP::EXP_MASK; using UP::FRACTION_MASK; + using UP::SIG_LEN; using UP::SIGN_MASK; LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT = (1 << UP::EXP_LEN) - 1; @@ -732,11 +732,14 @@ struct FPRepImpl : public FPRepSem { // Unsafe function to create a floating point representation. // It simply packs the sign, biased exponent and mantissa values without // checking bound nor normalization. + // + // WARNING: For X86 Extended Precision, implicit bit needs to be set correctly + // in the 'mantissa' by the caller. This function will not check for its + // validity. + // // FIXME: Use an uint32_t for 'biased_exp'. LIBC_INLINE static constexpr RetT create_value(Sign sign, StorageType biased_exp, StorageType mantissa) { - static_assert(fp_type != FPType::X86_Binary80, - "This function is not tested for X86 Extended Precision"); return RetT(encode(sign, BiasedExponent(static_cast(biased_exp)), Significand(mantissa))); } diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h index 9becbaa45eadeb..9e760a28f42d75 100644 --- a/libc/src/__support/FPUtil/ManipulationFunctions.h +++ b/libc/src/__support/FPUtil/ManipulationFunctions.h @@ -12,6 +12,8 @@ #include "FPBits.h" #include "NearestIntegerOperations.h" #include "NormalFloat.h" +#include "dyadic_float.h" +#include "rounding_mode.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/limits.h" // INT_MAX, INT_MIN @@ -117,10 +119,8 @@ LIBC_INLINE T logb(T x) { template , int> = 0> LIBC_INLINE T ldexp(T x, int exp) { - if (LIBC_UNLIKELY(exp == 0)) - return x; FPBits bits(x); - if (LIBC_UNLIKELY(bits.is_zero() || bits.is_inf_or_nan())) + if (LIBC_UNLIKELY((exp == 0) || bits.is_zero() || bits.is_inf_or_nan())) return x; // NormalFloat uses int32_t to store the true exponent value. We should ensure @@ -129,18 +129,40 @@ LIBC_INLINE T ldexp(T x, int exp) { // early. Because the result of the ldexp operation can be a subnormal number, // we need to accommodate the (mantissaWidth + 1) worth of shift in // calculating the limit. - int exp_limit = FPBits::MAX_BIASED_EXPONENT + FPBits::FRACTION_LEN + 1; - if (exp > exp_limit) - return FPBits::inf(bits.sign()).get_val(); + constexpr int EXP_LIMIT = + FPBits::MAX_BIASED_EXPONENT + FPBits::FRACTION_LEN + 1; + if (LIBC_UNLIKELY(exp > EXP_LIMIT)) { + int rounding_mode = quick_get_round(); + Sign sign = bits.sign(); + + if ((sign == Sign::POS && rounding_mode == FE_DOWNWARD) || + (sign == Sign::NEG && rounding_mode == FE_UPWARD) || + (rounding_mode == FE_TOWARDZERO)) + return FPBits::max_normal(sign).get_val(); + + set_errno_if_required(ERANGE); + raise_except_if_required(FE_OVERFLOW); + return FPBits::inf(sign).get_val(); + } // Similarly on the negative side we return zero early if |exp| is too small. - if (exp < -exp_limit) - return FPBits::zero(bits.sign()).get_val(); + if (LIBC_UNLIKELY(exp < -EXP_LIMIT)) { + int rounding_mode = quick_get_round(); + Sign sign = bits.sign(); + + if ((sign == Sign::POS && rounding_mode == FE_UPWARD) || + (sign == Sign::NEG && rounding_mode == FE_DOWNWARD)) + return FPBits::min_subnormal(sign).get_val(); + + set_errno_if_required(ERANGE); + raise_except_if_required(FE_UNDERFLOW); + return FPBits::zero(sign).get_val(); + } // For all other values, NormalFloat to T conversion handles it the right way. - NormalFloat normal(bits); + DyadicFloat::STORAGE_LEN> normal(bits.get_val()); normal.exponent += exp; - return normal; + return static_cast(normal); } template struct DyadicFloat { static_assert(FPBits::FRACTION_LEN < Bits); FPBits x_bits(x); sign = x_bits.sign(); - exponent = x_bits.get_exponent() - FPBits::FRACTION_LEN; + exponent = x_bits.get_explicit_exponent() - FPBits::FRACTION_LEN; mantissa = MantissaType(x_bits.get_explicit_mantissa()); normalize(); } @@ -79,25 +79,32 @@ template struct DyadicFloat { return *this; } - // Assume that it is already normalized and output is not underflow. + // Assume that it is already normalized. // Output is rounded correctly with respect to the current rounding mode. - // TODO(lntue): Add support for underflow. - // TODO(lntue): Test or add specialization for x86 long double. template && (FPBits::FRACTION_LEN < Bits), void>> explicit operator T() const { - // TODO(lntue): Do we need to treat signed zeros properly? - if (mantissa.is_zero()) - return 0.0; + if (LIBC_UNLIKELY(mantissa.is_zero())) + return FPBits::zero(sign).get_val(); // Assume that it is normalized, and output is also normal. constexpr uint32_t PRECISION = FPBits::FRACTION_LEN + 1; using output_bits_t = typename FPBits::StorageType; + constexpr output_bits_t IMPLICIT_MASK = + FPBits::SIG_MASK - FPBits::FRACTION_MASK; int exp_hi = exponent + static_cast((Bits - 1) + FPBits::EXP_BIAS); + if (LIBC_UNLIKELY(exp_hi > 2 * FPBits::EXP_BIAS)) { + // Results overflow. + T d_hi = + FPBits::create_value(sign, 2 * FPBits::EXP_BIAS, IMPLICIT_MASK) + .get_val(); + return T(2) * d_hi; + } + bool denorm = false; uint32_t shift = Bits - PRECISION; if (LIBC_UNLIKELY(exp_hi <= 0)) { @@ -112,49 +119,57 @@ template struct DyadicFloat { MantissaType m_hi(mantissa >> shift); - T d_hi = FPBits::create_value(sign, exp_hi, - static_cast(m_hi) & - FPBits::FRACTION_MASK) + T d_hi = FPBits::create_value( + sign, exp_hi, + (static_cast(m_hi) & FPBits::SIG_MASK) | + IMPLICIT_MASK) .get_val(); - const MantissaType round_mask = MantissaType(1) << (shift - 1); - const MantissaType sticky_mask = round_mask - MantissaType(1); + MantissaType round_mask = MantissaType(1) << (shift - 1); + MantissaType sticky_mask = round_mask - MantissaType(1); bool round_bit = !(mantissa & round_mask).is_zero(); bool sticky_bit = !(mantissa & sticky_mask).is_zero(); int round_and_sticky = int(round_bit) * 2 + int(sticky_bit); T d_lo; + if (LIBC_UNLIKELY(exp_lo <= 0)) { // d_lo is denormal, but the output is normal. int scale_up_exponent = 2 * PRECISION; T scale_up_factor = FPBits::create_value(sign, FPBits::EXP_BIAS + scale_up_exponent, - output_bits_t(0)) + IMPLICIT_MASK) .get_val(); T scale_down_factor = FPBits::create_value(sign, FPBits::EXP_BIAS - scale_up_exponent, - output_bits_t(0)) + IMPLICIT_MASK) .get_val(); d_lo = FPBits::create_value(sign, exp_lo + scale_up_exponent, - output_bits_t(0)) + IMPLICIT_MASK) .get_val(); return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) * scale_down_factor; } - d_lo = FPBits::create_value(sign, exp_lo, output_bits_t(0)).get_val(); + d_lo = FPBits::create_value(sign, exp_lo, IMPLICIT_MASK).get_val(); // Still correct without FMA instructions if `d_lo` is not underflow. T r = multiply_add(d_lo, T(round_and_sticky), d_hi); if (LIBC_UNLIKELY(denorm)) { - // Output is denormal, simply clear the exponent field. - output_bits_t clear_exp = output_bits_t(exp_hi) - << FPBits::FRACTION_LEN; + // Exponent before rounding is in denormal range, simply clear the + // exponent field. + output_bits_t clear_exp = (output_bits_t(exp_hi) << FPBits::SIG_LEN); output_bits_t r_bits = FPBits(r).uintval() - clear_exp; + if (!(r_bits & FPBits::EXP_MASK)) { + // Output is denormal after rounding, clear the implicit bit for 80-bit + // long double. + r_bits -= IMPLICIT_MASK; + } + return FPBits(r_bits).get_val(); } diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 985585cbfb8902..05ce51e8fc6503 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -149,6 +149,7 @@ add_math_entrypoint_object(ilogbl) add_math_entrypoint_object(ldexp) add_math_entrypoint_object(ldexpf) add_math_entrypoint_object(ldexpl) +add_math_entrypoint_object(ldexpf128) add_math_entrypoint_object(log10) add_math_entrypoint_object(log10f) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index fdf383f070697e..259ae1c2793439 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1001,10 +1001,10 @@ add_entrypoint_object( ldexp.cpp HDRS ../ldexp.h + COMPILE_OPTIONS + -O3 DEPENDS libc.src.__support.FPUtil.manipulation_functions - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -1013,10 +1013,10 @@ add_entrypoint_object( ldexpf.cpp HDRS ../ldexpf.h + COMPILE_OPTIONS + -O3 DEPENDS libc.src.__support.FPUtil.manipulation_functions - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -1025,10 +1025,23 @@ add_entrypoint_object( ldexpl.cpp HDRS ../ldexpl.h + COMPILE_OPTIONS + -O3 DEPENDS libc.src.__support.FPUtil.manipulation_functions +) + +add_entrypoint_object( + ldexpf128 + SRCS + ldexpf128.cpp + HDRS + ../ldexpf128.h COMPILE_OPTIONS - -O2 + -O3 + DEPENDS + libc.src.__support.macros.properties.float + libc.src.__support.FPUtil.manipulation_functions ) add_object_library( diff --git a/libc/src/math/generic/ldexpf128.cpp b/libc/src/math/generic/ldexpf128.cpp new file mode 100644 index 00000000000000..ed2ebd38dfae75 --- /dev/null +++ b/libc/src/math/generic/ldexpf128.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of ldexpf128 function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/ldexpf128.h" +#include "src/__support/FPUtil/ManipulationFunctions.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float128, ldexpf128, (float128 x, int exp)) { + return fputil::ldexp(x, exp); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/ldexpf128.h b/libc/src/math/ldexpf128.h new file mode 100644 index 00000000000000..adf9d8f56b3566 --- /dev/null +++ b/libc/src/math/ldexpf128.h @@ -0,0 +1,20 @@ +//===-- Implementation header for ldexpf128 ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_LDEXPF128_H +#define LLVM_LIBC_SRC_MATH_LDEXPF128_H + +#include "src/__support/macros/properties/float.h" + +namespace LIBC_NAMESPACE { + +float128 ldexpf128(float128 x, int exp); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_LDEXPF128_H diff --git a/libc/test/src/__support/FPUtil/dyadic_float_test.cpp b/libc/test/src/__support/FPUtil/dyadic_float_test.cpp index a9f9842c503057..625aa70973b9f1 100644 --- a/libc/test/src/__support/FPUtil/dyadic_float_test.cpp +++ b/libc/test/src/__support/FPUtil/dyadic_float_test.cpp @@ -56,3 +56,37 @@ TEST(LlvmLibcDyadicFloatTest, QuickMul) { Float256 z = quick_mul(x, y); EXPECT_FP_EQ_ALL_ROUNDING(double(x) * double(y), double(z)); } + +#define TEST_EDGE_RANGES(Name, Type) \ + TEST(LlvmLibcDyadicFloatTest, EdgeRanges##Name) { \ + using Bits = LIBC_NAMESPACE::fputil::FPBits; \ + using DFType = LIBC_NAMESPACE::fputil::DyadicFloat; \ + Type max_normal = Bits::max_normal().get_val(); \ + Type min_normal = Bits::min_normal().get_val(); \ + Type min_subnormal = Bits::min_subnormal().get_val(); \ + Type two(2); \ + \ + DFType x(min_normal); \ + EXPECT_FP_EQ_ALL_ROUNDING(min_normal, static_cast(x)); \ + --x.exponent; \ + EXPECT_FP_EQ(min_normal / two, static_cast(x)); \ + \ + DFType y(two *min_normal - min_subnormal); \ + --y.exponent; \ + EXPECT_FP_EQ(min_normal, static_cast(y)); \ + \ + DFType z(min_subnormal); \ + EXPECT_FP_EQ_ALL_ROUNDING(min_subnormal, static_cast(z)); \ + --z.exponent; \ + EXPECT_FP_EQ(Bits::zero().get_val(), static_cast(z)); \ + \ + DFType t(max_normal); \ + EXPECT_FP_EQ_ALL_ROUNDING(max_normal, static_cast(t)); \ + ++t.exponent; \ + EXPECT_FP_EQ(Bits::inf().get_val(), static_cast(t)); \ + } \ + static_assert(true, "Require semicolon.") + +TEST_EDGE_RANGES(Float, float); +TEST_EDGE_RANGES(Double, double); +TEST_EDGE_RANGES(LongDouble, long double); diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 0d55be5d98bdce..1824c672cb9742 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -878,7 +878,6 @@ add_fp_unittest( HDRS LdExpTest.h DEPENDS - libc.include.math libc.src.math.ldexp libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits @@ -894,7 +893,6 @@ add_fp_unittest( HDRS LdExpTest.h DEPENDS - libc.include.math libc.src.math.ldexpf libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits @@ -910,13 +908,27 @@ add_fp_unittest( HDRS LdExpTest.h DEPENDS - libc.include.math libc.src.math.ldexpl libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.normal_float ) +add_fp_unittest( + ldexpf128_test + SUITE + libc-math-smoke-tests + SRCS + ldexpf128_test.cpp + HDRS + LdExpTest.h + DEPENDS + libc.src.math.ldexpf128 + libc.src.__support.CPP.limits + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.normal_float +) + add_fp_unittest( logb_test SUITE diff --git a/libc/test/src/math/smoke/LdExpTest.h b/libc/test/src/math/smoke/LdExpTest.h index fe84b5f4c192a4..7d17071f5b309c 100644 --- a/libc/test/src/math/smoke/LdExpTest.h +++ b/libc/test/src/math/smoke/LdExpTest.h @@ -15,7 +15,6 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include #include template @@ -163,6 +162,7 @@ class LdExpTestTemplate : public LIBC_NAMESPACE::testing::Test { TEST_F(LlvmLibcLdExpTest, UnderflowToZeroOnSubnormal) { \ testUnderflowToZeroOnSubnormal(&func); \ } \ - TEST_F(LlvmLibcLdExpTest, NormalOperation) { testNormalOperation(&func); } + TEST_F(LlvmLibcLdExpTest, NormalOperation) { testNormalOperation(&func); } \ + static_assert(true) #endif // LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H diff --git a/libc/test/src/math/smoke/ldexp_test.cpp b/libc/test/src/math/smoke/ldexp_test.cpp index aad580f95fb99d..adbf603b1e96e4 100644 --- a/libc/test/src/math/smoke/ldexp_test.cpp +++ b/libc/test/src/math/smoke/ldexp_test.cpp @@ -10,4 +10,4 @@ #include "src/math/ldexp.h" -LIST_LDEXP_TESTS(double, LIBC_NAMESPACE::ldexp) +LIST_LDEXP_TESTS(double, LIBC_NAMESPACE::ldexp); diff --git a/libc/test/src/math/smoke/ldexpf128_test.cpp b/libc/test/src/math/smoke/ldexpf128_test.cpp new file mode 100644 index 00000000000000..7ab34a4ce15de8 --- /dev/null +++ b/libc/test/src/math/smoke/ldexpf128_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for ldexpf128 -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LdExpTest.h" + +#include "src/math/ldexpf128.h" + +LIST_LDEXP_TESTS(float128, LIBC_NAMESPACE::ldexpf128); diff --git a/libc/test/src/math/smoke/ldexpf_test.cpp b/libc/test/src/math/smoke/ldexpf_test.cpp index f4cce37b9277dc..02fd8c56effcec 100644 --- a/libc/test/src/math/smoke/ldexpf_test.cpp +++ b/libc/test/src/math/smoke/ldexpf_test.cpp @@ -10,4 +10,4 @@ #include "src/math/ldexpf.h" -LIST_LDEXP_TESTS(float, LIBC_NAMESPACE::ldexpf) +LIST_LDEXP_TESTS(float, LIBC_NAMESPACE::ldexpf); diff --git a/libc/test/src/math/smoke/ldexpl_test.cpp b/libc/test/src/math/smoke/ldexpl_test.cpp index 405e53390e8c34..9bc17c5c7df70a 100644 --- a/libc/test/src/math/smoke/ldexpl_test.cpp +++ b/libc/test/src/math/smoke/ldexpl_test.cpp @@ -10,4 +10,4 @@ #include "src/math/ldexpl.h" -LIST_LDEXP_TESTS(long double, LIBC_NAMESPACE::ldexpl) +LIST_LDEXP_TESTS(long double, LIBC_NAMESPACE::ldexpl); From a7982d5e7a16f681e80891a819bdf14dde928755 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Wed, 14 Feb 2024 18:46:30 -0800 Subject: [PATCH 203/240] [analyzer] UncountedCallArgsChecker: Detect & ignore trivial function calls. (#81808) This PR introduces the concept of a "trivial function" which applies to a function that only calls other trivial functions and contain literals and expressions that don't result in heap mutations (specifically it does not call deref). This is implemented using ConstStmtVisitor and checking each statement and expression's trivialness. This PR also introduces the concept of a "ingleton function", which is a static member function or a free standing function which ends with the suffix "singleton". Such a function's return value is understood to be safe to call any function with. --- .../Checkers/WebKit/ASTUtils.cpp | 4 + .../Checkers/WebKit/PtrTypesSemantics.cpp | 213 ++++++++++++++++ .../Checkers/WebKit/PtrTypesSemantics.h | 21 ++ .../WebKit/UncountedCallArgsChecker.cpp | 8 +- .../Analysis/Checkers/WebKit/call-args.cpp | 33 +-- .../Checkers/WebKit/uncounted-obj-arg.cpp | 233 ++++++++++++++++++ 6 files changed, 495 insertions(+), 17 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index b76c0551c77bb0..94eaa81af51772 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -66,9 +66,13 @@ tryToFindPtrOrigin(const Expr *E, bool StopAtFirstRefCountedObj) { E = call->getArg(0); continue; } + if (isReturnValueRefCounted(callee)) return {E, true}; + if (isSingleton(callee)) + return {E, true}; + if (isPtrConversion(callee)) { E = call->getArg(0); continue; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 907244013d0871..bf6f9a64877c64 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -12,6 +12,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/StmtVisitor.h" #include using namespace clang; @@ -222,4 +223,216 @@ bool isPtrConversion(const FunctionDecl *F) { return false; } +bool isSingleton(const FunctionDecl *F) { + assert(F); + // FIXME: check # of params == 1 + if (auto *MethodDecl = dyn_cast(F)) { + if (!MethodDecl->isStatic()) + return false; + } + const auto &Name = safeGetName(F); + std::string SingletonStr = "singleton"; + auto index = Name.find(SingletonStr); + return index != std::string::npos && + index == Name.size() - SingletonStr.size(); +} + +// We only care about statements so let's use the simple +// (non-recursive) visitor. +class TrivialFunctionAnalysisVisitor + : public ConstStmtVisitor { + + // Returns false if at least one child is non-trivial. + bool VisitChildren(const Stmt *S) { + for (const Stmt *Child : S->children()) { + if (Child && !Visit(Child)) + return false; + } + + return true; + } + +public: + using CacheTy = TrivialFunctionAnalysis::CacheTy; + + TrivialFunctionAnalysisVisitor(CacheTy &Cache) : Cache(Cache) {} + + bool VisitStmt(const Stmt *S) { + // All statements are non-trivial unless overriden later. + // Don't even recurse into children by default. + return false; + } + + bool VisitCompoundStmt(const CompoundStmt *CS) { + // A compound statement is allowed as long each individual sub-statement + // is trivial. + return VisitChildren(CS); + } + + bool VisitReturnStmt(const ReturnStmt *RS) { + // A return statement is allowed as long as the return value is trivial. + if (auto *RV = RS->getRetValue()) + return Visit(RV); + return true; + } + + bool VisitDeclStmt(const DeclStmt *DS) { return VisitChildren(DS); } + bool VisitDoStmt(const DoStmt *DS) { return VisitChildren(DS); } + bool VisitIfStmt(const IfStmt *IS) { return VisitChildren(IS); } + bool VisitSwitchStmt(const SwitchStmt *SS) { return VisitChildren(SS); } + bool VisitCaseStmt(const CaseStmt *CS) { return VisitChildren(CS); } + bool VisitDefaultStmt(const DefaultStmt *DS) { return VisitChildren(DS); } + + bool VisitUnaryOperator(const UnaryOperator *UO) { + // Operator '*' and '!' are allowed as long as the operand is trivial. + if (UO->getOpcode() == UO_Deref || UO->getOpcode() == UO_LNot) + return Visit(UO->getSubExpr()); + + // Other operators are non-trivial. + return false; + } + + bool VisitBinaryOperator(const BinaryOperator *BO) { + // Binary operators are trivial if their operands are trivial. + return Visit(BO->getLHS()) && Visit(BO->getRHS()); + } + + bool VisitConditionalOperator(const ConditionalOperator *CO) { + // Ternary operators are trivial if their conditions & values are trivial. + return VisitChildren(CO); + } + + bool VisitDeclRefExpr(const DeclRefExpr *DRE) { + if (auto *decl = DRE->getDecl()) { + if (isa(decl)) + return true; + } + return false; + } + + bool VisitStaticAssertDecl(const StaticAssertDecl *SAD) { + // Any static_assert is considered trivial. + return true; + } + + bool VisitCallExpr(const CallExpr *CE) { + if (!checkArguments(CE)) + return false; + + auto *Callee = CE->getDirectCallee(); + if (!Callee) + return false; + const auto &Name = safeGetName(Callee); + + if (Name == "WTFCrashWithInfo" || Name == "WTFBreakpointTrap" || + Name == "compilerFenceForCrash" || Name == "__builtin_unreachable") + return true; + + return TrivialFunctionAnalysis::isTrivialImpl(Callee, Cache); + } + + bool VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + if (!checkArguments(MCE)) + return false; + + bool TrivialThis = Visit(MCE->getImplicitObjectArgument()); + if (!TrivialThis) + return false; + + auto *Callee = MCE->getMethodDecl(); + if (!Callee) + return false; + + std::optional IsGetterOfRefCounted = isGetterOfRefCounted(Callee); + if (IsGetterOfRefCounted && *IsGetterOfRefCounted) + return true; + + // Recursively descend into the callee to confirm that it's trivial as well. + return TrivialFunctionAnalysis::isTrivialImpl(Callee, Cache); + } + + bool checkArguments(const CallExpr *CE) { + for (const Expr *Arg : CE->arguments()) { + if (Arg && !Visit(Arg)) + return false; + } + return true; + } + + bool VisitCXXConstructExpr(const CXXConstructExpr *CE) { + for (const Expr *Arg : CE->arguments()) { + if (Arg && !Visit(Arg)) + return false; + } + + // Recursively descend into the callee to confirm that it's trivial. + return TrivialFunctionAnalysis::isTrivialImpl(CE->getConstructor(), Cache); + } + + bool VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + return Visit(ICE->getSubExpr()); + } + + bool VisitExplicitCastExpr(const ExplicitCastExpr *ECE) { + return Visit(ECE->getSubExpr()); + } + + bool VisitParenExpr(const ParenExpr *PE) { return Visit(PE->getSubExpr()); } + + bool VisitInitListExpr(const InitListExpr *ILE) { + for (const Expr *Child : ILE->inits()) { + if (Child && !Visit(Child)) + return false; + } + return true; + } + + bool VisitMemberExpr(const MemberExpr *ME) { + // Field access is allowed but the base pointer may itself be non-trivial. + return Visit(ME->getBase()); + } + + bool VisitCXXThisExpr(const CXXThisExpr *CTE) { + // The expression 'this' is always trivial, be it explicit or implicit. + return true; + } + + // Constant literal expressions are always trivial + bool VisitIntegerLiteral(const IntegerLiteral *E) { return true; } + bool VisitFloatingLiteral(const FloatingLiteral *E) { return true; } + bool VisitFixedPointLiteral(const FixedPointLiteral *E) { return true; } + bool VisitCharacterLiteral(const CharacterLiteral *E) { return true; } + bool VisitStringLiteral(const StringLiteral *E) { return true; } + + bool VisitConstantExpr(const ConstantExpr *CE) { + // Constant expressions are trivial. + return true; + } + +private: + CacheTy Cache; +}; + +bool TrivialFunctionAnalysis::isTrivialImpl( + const Decl *D, TrivialFunctionAnalysis::CacheTy &Cache) { + // If the function isn't in the cache, conservatively assume that + // it's not trivial until analysis completes. This makes every recursive + // function non-trivial. This also guarantees that each function + // will be scanned at most once. + auto [It, IsNew] = Cache.insert(std::make_pair(D, false)); + if (!IsNew) + return It->second; + + const Stmt *Body = D->getBody(); + if (!Body) + return false; + + TrivialFunctionAnalysisVisitor V(Cache); + bool Result = V.Visit(Body); + if (Result) + Cache[D] = true; + + return Result; +} + } // namespace clang diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index c2c5b74442ba43..e07cd31395747d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -10,12 +10,14 @@ #define LLVM_CLANG_ANALYZER_WEBKIT_PTRTYPESEMANTICS_H #include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" #include namespace clang { class CXXBaseSpecifier; class CXXMethodDecl; class CXXRecordDecl; +class Decl; class FunctionDecl; class Type; @@ -60,6 +62,25 @@ std::optional isGetterOfRefCounted(const clang::CXXMethodDecl* Method); /// pointer types. bool isPtrConversion(const FunctionDecl *F); +/// \returns true if \p F is a static singleton function. +bool isSingleton(const FunctionDecl *F); + +/// An inter-procedural analysis facility that detects functions with "trivial" +/// behavior with respect to reference counting, such as simple field getters. +class TrivialFunctionAnalysis { +public: + /// \returns true if \p D is a "trivial" function. + bool isTrivial(const Decl *D) const { return isTrivialImpl(D, TheCache); } + +private: + friend class TrivialFunctionAnalysisVisitor; + + using CacheTy = llvm::DenseMap; + mutable CacheTy TheCache{}; + + static bool isTrivialImpl(const Decl *D, CacheTy &Cache); +}; + } // namespace clang #endif diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index e2e1add31c9b17..17a64e1b1b8e04 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -32,6 +32,8 @@ class UncountedCallArgsChecker "WebKit coding guidelines"}; mutable BugReporter *BR; + TrivialFunctionAnalysis TFA; + public: void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, @@ -134,6 +136,11 @@ class UncountedCallArgsChecker } bool shouldSkipCall(const CallExpr *CE) const { + const auto *Callee = CE->getDirectCallee(); + + if (Callee && TFA.isTrivial(Callee)) + return true; + if (CE->getNumArgs() == 0) return false; @@ -155,7 +162,6 @@ class UncountedCallArgsChecker return false; } - const auto *Callee = CE->getDirectCallee(); if (!Callee) return false; diff --git a/clang/test/Analysis/Checkers/WebKit/call-args.cpp b/clang/test/Analysis/Checkers/WebKit/call-args.cpp index e5c49881070420..f2e1f9bc5a2464 100644 --- a/clang/test/Analysis/Checkers/WebKit/call-args.cpp +++ b/clang/test/Analysis/Checkers/WebKit/call-args.cpp @@ -2,8 +2,9 @@ #include "mock-types.h" -RefCountable* provide() { return nullptr; } -void consume_refcntbl(RefCountable*) {} +RefCountable* provide(); +void consume_refcntbl(RefCountable*); +void some_function(); namespace simple { void foo() { @@ -19,7 +20,7 @@ namespace simple { } namespace multi_arg { - void consume_refcntbl(int, RefCountable* foo, bool) {} + void consume_refcntbl(int, RefCountable* foo, bool); void foo() { consume_refcntbl(42, provide(), true); // expected-warning@-1{{Call argument for parameter 'foo' is uncounted and unsafe}} @@ -38,8 +39,8 @@ namespace ref_counted { namespace methods { struct Consumer { - void consume_ptr(RefCountable* ptr) {} - void consume_ref(const RefCountable& ref) {} + void consume_ptr(RefCountable* ptr); + void consume_ref(const RefCountable& ref); }; void foo() { @@ -53,7 +54,7 @@ namespace methods { void foo2() { struct Consumer { - void consume(RefCountable*) { } + void consume(RefCountable*) { some_function(); } void whatever() { consume(provide()); // expected-warning@-1{{Call argument is uncounted and unsafe}} @@ -63,7 +64,7 @@ namespace methods { void foo3() { struct Consumer { - void consume(RefCountable*) { } + void consume(RefCountable*) { some_function(); } void whatever() { this->consume(provide()); // expected-warning@-1{{Call argument is uncounted and unsafe}} @@ -73,7 +74,7 @@ namespace methods { } namespace casts { - RefCountable* downcast(RefCountable*) { return nullptr; } + RefCountable* downcast(RefCountable*); void foo() { consume_refcntbl(provide()); @@ -145,8 +146,8 @@ namespace Ref_to_reference_conversion_operator { } namespace param_formarding_function { - void consume_ref_countable_ref(RefCountable&) {} - void consume_ref_countable_ptr(RefCountable*) {} + void consume_ref_countable_ref(RefCountable&); + void consume_ref_countable_ptr(RefCountable*); namespace ptr { void foo(RefCountable* param) { @@ -185,8 +186,8 @@ namespace param_formarding_function { } namespace param_formarding_lambda { - auto consume_ref_countable_ref = [](RefCountable&) {}; - auto consume_ref_countable_ptr = [](RefCountable*) {}; + auto consume_ref_countable_ref = [](RefCountable&) { some_function(); }; + auto consume_ref_countable_ptr = [](RefCountable*) { some_function(); }; namespace ptr { void foo(RefCountable* param) { @@ -304,7 +305,7 @@ namespace string_impl { namespace default_arg { RefCountable* global; - void function_with_default_arg(RefCountable* param = global) {} + void function_with_default_arg(RefCountable* param = global); // expected-warning@-1{{Call argument for parameter 'param' is uncounted and unsafe}} void foo() { @@ -315,9 +316,9 @@ namespace default_arg { namespace cxx_member_operator_call { // The hidden this-pointer argument without a corresponding parameter caused couple bugs in parameter <-> argument attribution. struct Foo { - Foo& operator+(RefCountable* bad) { return *this; } - friend Foo& operator-(Foo& lhs, RefCountable* bad) { return lhs; } - void operator()(RefCountable* bad) { } + Foo& operator+(RefCountable* bad); + friend Foo& operator-(Foo& lhs, RefCountable* bad); + void operator()(RefCountable* bad); }; RefCountable* global; diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp index e5e39e3faac714..156a2480901bf0 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp @@ -1,12 +1,175 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s #include "mock-types.h" +//#include + +void WTFBreakpointTrap(); +void WTFCrashWithInfo(int, const char*, const char*, int); + +inline void compilerFenceForCrash() +{ + asm volatile("" ::: "memory"); +} + +inline void isIntegralOrPointerType() { } + +template +void isIntegralOrPointerType(T, Types... types) +{ + static_assert(sizeof(char) < sizeof(short), "All types need to be bitwise_cast-able to integral type for logging"); + isIntegralOrPointerType(types...); +} + +#define CRASH_WITH_INFO(...) do { \ + isIntegralOrPointerType(__VA_ARGS__); \ + compilerFenceForCrash(); \ + WTFBreakpointTrap(); \ + __builtin_unreachable(); \ +} while (0) + +#define RELEASE_ASSERT(assertion, ...) do { \ + if (!(assertion)) \ + CRASH_WITH_INFO(__VA_ARGS__); \ +} while (0) + +#if !defined(NOT_TAIL_CALLED) +#if __has_attribute(not_tail_called) +#define NOT_TAIL_CALLED __attribute__((not_tail_called)) +#else +#define NOT_TAIL_CALLED +#endif +#endif +#define NO_RETURN_DUE_TO_CRASH + +#if !defined(ALWAYS_INLINE) +#define ALWAYS_INLINE inline +#endif + +NO_RETURN_DUE_TO_CRASH NOT_TAIL_CALLED void WTFCrashWithInfoImpl(int line, const char* file, const char* function, int counter, unsigned long reason); +NO_RETURN_DUE_TO_CRASH NOT_TAIL_CALLED void WTFCrashWithInfo(int line, const char* file, const char* function, int counter); + +template +ALWAYS_INLINE unsigned long wtfCrashArg(T* arg) { return reinterpret_cast(arg); } + +template +ALWAYS_INLINE unsigned long wtfCrashArg(T arg) { return arg; } + +template +NO_RETURN_DUE_TO_CRASH ALWAYS_INLINE void WTFCrashWithInfo(int line, const char* file, const char* function, int counter, T reason) +{ + WTFCrashWithInfoImpl(line, file, function, counter, wtfCrashArg(reason)); +} + +class Number { +public: + Number(int v) : v(v) { } + Number(double); + Number operator+(const Number&); +private: + int v; +}; class RefCounted { public: void ref() const; void deref() const; + void someFunction(); + int otherFunction(); + + int trivial1() { return 123; } + float trivial2() { return 0.3; } + float trivial3() { return (float)0.4; } + float trivial4() { return 0.5f; } + char trivial5() { return 'a'; } + const char *trivial6() { return "abc"; } + int trivial7() { return (1); } + Number trivial8() { return Number { 5 }; } + int trivial9() { return 3 + 4; } + int trivial10() { return 0x1010 | 0x1; } + int trivial11(int v) { return v + 1; } + const char *trivial12(char *p) { return p ? "str" : "null"; } + int trivial13(int v) { + if (v) + return 123; + else + return 0; + } + int trivial14(int v) { + switch (v) { + case 1: + return 100; + case 2: + return 200; + default: + return 300; + } + return 0; + } + void *trivial15() { return static_cast(this); } + unsigned long trivial16() { return reinterpret_cast(this); } + RefCounted& trivial17() const { return const_cast(*this); } + RefCounted& trivial18() const { RELEASE_ASSERT(this, "this must be not null"); return const_cast(*this); } + void trivial19() const { return; } + + static RefCounted& singleton() { + static RefCounted s_RefCounted; + s_RefCounted.ref(); + return s_RefCounted; + } + + Number nonTrivial1() { return Number(3) + Number(4); } + Number nonTrivial2() { return Number { 0.3 }; } + int nonTrivial3() { return v ? otherFunction() : 0; } + int nonTrivial4() { + if (v) + return 8; + else + return otherFunction(); + } + + int nonTrivial5() { + if (v) + return otherFunction(); + else + return 9; + } + + int nonTrivial6() { + if (otherFunction()) + return 1; + else + return 0; + } + + int nonTrivial7() { + switch (v) { + case 1: + return otherFunction(); + default: + return 7; + } + } + + int nonTrivial8() { + switch (v) { + case 1: + return 9; + default: + return otherFunction(); + } + } + + int nonTrivial9() { + switch (otherFunction()) { + case 0: + return -1; + default: + return 12; + } + } + + unsigned v { 0 }; }; RefCounted* refCountedObj(); @@ -16,3 +179,73 @@ void test() refCountedObj()->someFunction(); // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} } + +class UnrelatedClass { + RefPtr Field; + bool value; + +public: + RefCounted &getFieldTrivial() { return *Field.get(); } + RefCounted *getFieldTernary() { return value ? Field.get() : nullptr; } + + void test() { + getFieldTrivial().trivial1(); // no-warning + getFieldTrivial().trivial2(); // no-warning + getFieldTrivial().trivial3(); // no-warning + getFieldTrivial().trivial4(); // no-warning + getFieldTrivial().trivial5(); // no-warning + getFieldTrivial().trivial6(); // no-warning + getFieldTrivial().trivial7(); // no-warning + getFieldTrivial().trivial8(); // no-warning + getFieldTrivial().trivial9(); // no-warning + getFieldTrivial().trivial10(); // no-warning + getFieldTrivial().trivial11(1); // no-warning + getFieldTrivial().trivial12(nullptr); // no-warning + getFieldTrivial().trivial13(0); // no-warning + getFieldTrivial().trivial14(3); // no-warning + getFieldTrivial().trivial15(); // no-warning + getFieldTrivial().trivial16(); // no-warning + getFieldTrivial().trivial17(); // no-warning + getFieldTrivial().trivial18(); // no-warning + getFieldTrivial().trivial19(); // no-warning + RefCounted::singleton().trivial18(); // no-warning + RefCounted::singleton().someFunction(); // no-warning + + getFieldTrivial().someFunction(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial1(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial2(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial3(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial4(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial5(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial6(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial7(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial8(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + getFieldTrivial().nonTrivial9(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + } +}; + +class UnrelatedClass2 { + RefPtr Field; + +public: + UnrelatedClass &getFieldTrivial() { return *Field.get(); } + RefCounted &getFieldTrivialRecursively() { return getFieldTrivial().getFieldTrivial(); } + RefCounted *getFieldTrivialTernary() { return Field ? Field->getFieldTernary() : nullptr; } + + void test() { + getFieldTrivialRecursively().trivial1(); // no-warning + getFieldTrivialTernary()->trivial2(); // no-warning + getFieldTrivialRecursively().someFunction(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + } +}; From 6fce42f89a2c3f12b019bd3d7fef3e8db2d4671f Mon Sep 17 00:00:00 2001 From: jkorous-apple <32549412+jkorous-apple@users.noreply.github.com> Date: Wed, 14 Feb 2024 19:03:37 -0800 Subject: [PATCH 204/240] [-Wunsafe-buffer-usage] Add fixits for array to pointer assignment (#81343) Introducing CArrayToPtrAssignment gadget and implementing fixits for some cases of array being assigned to pointer. Key observations: - const size array can be assigned to std::span and bounds are propagated - const size array can't be on LHS of assignment This means array to pointer assignment has no strategy implications. Fixits are implemented for cases where one of the variables in the assignment is safe. For assignment of a safe array to unsafe pointer we know that the RHS will never be transformed since it's safe and can immediately emit the optimal fixit. Similarly for assignment of unsafe array to safe pointer. (Obviously this is not and can't be future-proof in regards to what variables we consider unsafe and that is fine.) Fixits for assignment from unsafe array to unsafe pointer (from Array to Span strategy) are not implemented in this patch as that needs to be properly designed first - we might possibly implement optimal fixits for partially transformed cases, put both variables in a single fixit group or do something else. --- .../Analyses/UnsafeBufferUsageGadgets.def | 3 +- clang/lib/Analysis/UnsafeBufferUsage.cpp | 130 ++++++++++++++++-- .../warn-unsafe-buffer-usage-debug.cpp | 2 +- ...uffer-usage-fixits-array-assign-to-ptr.cpp | 45 ++++++ 4 files changed, 163 insertions(+), 17 deletions(-) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-array-assign-to-ptr.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 07f805ebb11013..3273c642eed517 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -45,7 +45,8 @@ FIXABLE_GADGET(UPCAddressofArraySubscript) // '&DRE[any]' in an Unspecified Poin FIXABLE_GADGET(UPCStandalonePointer) FIXABLE_GADGET(UPCPreIncrement) // '++Ptr' in an Unspecified Pointer Context FIXABLE_GADGET(UUCAddAssign) // 'Ptr += n' in an Unspecified Untyped Context -FIXABLE_GADGET(PointerAssignment) +FIXABLE_GADGET(PtrToPtrAssignment) +FIXABLE_GADGET(CArrayToPtrAssignment) FIXABLE_GADGET(PointerInit) #undef FIXABLE_GADGET diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index a74c113e29f1cf..769c6d9ebefaa5 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -7,11 +7,14 @@ //===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" @@ -799,7 +802,8 @@ class PointerInitGadget : public FixableGadget { /// \code /// p = q; /// \endcode -class PointerAssignmentGadget : public FixableGadget { +/// where both `p` and `q` are pointers. +class PtrToPtrAssignmentGadget : public FixableGadget { private: static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; @@ -807,13 +811,13 @@ class PointerAssignmentGadget : public FixableGadget { const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA` public: - PointerAssignmentGadget(const MatchFinder::MatchResult &Result) - : FixableGadget(Kind::PointerAssignment), - PtrLHS(Result.Nodes.getNodeAs(PointerAssignLHSTag)), - PtrRHS(Result.Nodes.getNodeAs(PointerAssignRHSTag)) {} + PtrToPtrAssignmentGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::PtrToPtrAssignment), + PtrLHS(Result.Nodes.getNodeAs(PointerAssignLHSTag)), + PtrRHS(Result.Nodes.getNodeAs(PointerAssignRHSTag)) {} static bool classof(const Gadget *G) { - return G->getKind() == Kind::PointerAssignment; + return G->getKind() == Kind::PtrToPtrAssignment; } static Matcher matcher() { @@ -848,6 +852,60 @@ class PointerAssignmentGadget : public FixableGadget { } }; +/// An assignment expression of the form: +/// \code +/// ptr = array; +/// \endcode +/// where `p` is a pointer and `array` is a constant size array. +class CArrayToPtrAssignmentGadget : public FixableGadget { +private: + static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; + static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; + const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA` + const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA` + +public: + CArrayToPtrAssignmentGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::CArrayToPtrAssignment), + PtrLHS(Result.Nodes.getNodeAs(PointerAssignLHSTag)), + PtrRHS(Result.Nodes.getNodeAs(PointerAssignRHSTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::CArrayToPtrAssignment; + } + + static Matcher matcher() { + auto PtrAssignExpr = binaryOperator( + allOf(hasOperatorName("="), + hasRHS(ignoringParenImpCasts( + declRefExpr(hasType(hasCanonicalType(constantArrayType())), + toSupportedVariable()) + .bind(PointerAssignRHSTag))), + hasLHS(declRefExpr(hasPointerType(), toSupportedVariable()) + .bind(PointerAssignLHSTag)))); + + return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); + } + + virtual std::optional + getFixits(const FixitStrategy &S) const override; + + virtual const Stmt *getBaseStmt() const override { + // FIXME: This should be the binary operator, assuming that this method + // makes sense at all on a FixableGadget. + return PtrLHS; + } + + virtual DeclUseList getClaimedVarUseSites() const override { + return DeclUseList{PtrLHS, PtrRHS}; + } + + virtual std::optional> + getStrategyImplications() const override { + return {}; + } +}; + /// A call of a function or method that performs unchecked buffer operations /// over one of its pointer parameters. class UnsafeBufferUsageAttrGadget : public WarningGadget { @@ -1471,7 +1529,7 @@ bool clang::internal::anyConflict(const SmallVectorImpl &FixIts, } std::optional -PointerAssignmentGadget::getFixits(const FixitStrategy &S) const { +PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const { const auto *LeftVD = cast(PtrLHS->getDecl()); const auto *RightVD = cast(PtrRHS->getDecl()); switch (S.lookup(LeftVD)) { @@ -1490,6 +1548,42 @@ PointerAssignmentGadget::getFixits(const FixitStrategy &S) const { return std::nullopt; } +/// \returns fixit that adds .data() call after \DRE. +static inline std::optional createDataFixit(const ASTContext &Ctx, + const DeclRefExpr *DRE); + +std::optional +CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const { + const auto *LeftVD = cast(PtrLHS->getDecl()); + const auto *RightVD = cast(PtrRHS->getDecl()); + // TLDR: Implementing fixits for non-Wontfix strategy on both LHS and RHS is + // non-trivial. + // + // CArrayToPtrAssignmentGadget doesn't have strategy implications because + // constant size array propagates its bounds. Because of that LHS and RHS are + // addressed by two different fixits. + // + // At the same time FixitStrategy S doesn't reflect what group a fixit belongs + // to and can't be generally relied on in multi-variable Fixables! + // + // E. g. If an instance of this gadget is fixing variable on LHS then the + // variable on RHS is fixed by a different fixit and its strategy for LHS + // fixit is as if Wontfix. + // + // The only exception is Wontfix strategy for a given variable as that is + // valid for any fixit produced for the given input source code. + if (S.lookup(LeftVD) == FixitStrategy::Kind::Span) { + if (S.lookup(RightVD) == FixitStrategy::Kind::Wontfix) { + return FixItList{}; + } + } else if (S.lookup(LeftVD) == FixitStrategy::Kind::Wontfix) { + if (S.lookup(RightVD) == FixitStrategy::Kind::Array) { + return createDataFixit(RightVD->getASTContext(), PtrRHS); + } + } + return std::nullopt; +} + std::optional PointerInitGadget::getFixits(const FixitStrategy &S) const { const auto *LeftVD = PtrInitLHS; @@ -1907,6 +2001,19 @@ PointerDereferenceGadget::getFixits(const FixitStrategy &S) const { return std::nullopt; } +static inline std::optional createDataFixit(const ASTContext &Ctx, + const DeclRefExpr *DRE) { + const SourceManager &SM = Ctx.getSourceManager(); + // Inserts the .data() after the DRE + std::optional EndOfOperand = + getPastLoc(DRE, SM, Ctx.getLangOpts()); + + if (EndOfOperand) + return FixItList{{FixItHint::CreateInsertion(*EndOfOperand, ".data()")}}; + + return std::nullopt; +} + // Generates fix-its replacing an expression of the form UPC(DRE) with // `DRE.data()` std::optional @@ -1915,14 +2022,7 @@ UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const { switch (S.lookup(VD)) { case FixitStrategy::Kind::Array: case FixitStrategy::Kind::Span: { - ASTContext &Ctx = VD->getASTContext(); - SourceManager &SM = Ctx.getSourceManager(); - // Inserts the .data() after the DRE - std::optional EndOfOperand = - getPastLoc(Node, SM, Ctx.getLangOpts()); - - if (EndOfOperand) - return FixItList{{FixItHint::CreateInsertion(*EndOfOperand, ".data()")}}; + return createDataFixit(VD->getASTContext(), Node); // FIXME: Points inside a macro expansion. break; } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp index a5b578b98d4e5b..4cc1948f28a773 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp @@ -53,7 +53,7 @@ void unclaimed_use() { void implied_unclaimed_var(int *b) { // expected-warning{{'b' is an unsafe pointer used for buffer access}} int *a = new int[3]; // expected-warning{{'a' is an unsafe pointer used for buffer access}} a[4] = 7; // expected-note{{used in buffer access here}} - a = b; // debug-note{{safe buffers debug: gadget 'PointerAssignment' refused to produce a fix}} + a = b; // debug-note{{safe buffers debug: gadget 'PtrToPtrAssignment' refused to produce a fix}} b++; // expected-note{{used in pointer arithmetic here}} \ // debug-note{{safe buffers debug: failed to produce fixit for 'b' : has an unclaimed use}} } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-array-assign-to-ptr.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-array-assign-to-ptr.cpp new file mode 100644 index 00000000000000..ff91e32bdc1ccf --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-array-assign-to-ptr.cpp @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage \ +// RUN: -fsafe-buffer-usage-suggestions \ +// RUN: -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s + +void safe_array_assigned_to_safe_ptr(unsigned idx) { + int buffer[10]; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]: + int* ptr; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]: + ptr = buffer; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]: +} + +void safe_array_assigned_to_unsafe_ptr(unsigned idx) { + int buffer[10]; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]: + int* ptr; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span ptr" + ptr = buffer; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]: + ptr[idx] = 0; +} + +void unsafe_array_assigned_to_safe_ptr(unsigned idx) { + int buffer[10]; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array buffer" + int* ptr; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]: + ptr = buffer; + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:15-[[@LINE-1]]:15}:".data()" + buffer[idx] = 0; +} + +// FIXME: Implement fixit/s for this case. +// See comment in CArrayToPtrAssignmentGadget::getFixits to learn why this hasn't been implemented. +void unsafe_array_assigned_to_unsafe_ptr(unsigned idx) { + int buffer[10]; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:{{.*}} + int* ptr; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:{{.*}} + ptr = buffer; + // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:{{.*}} + buffer[idx] = 0; + ptr[idx] = 0; +} From dcbb574cfc3445251ff1c751f27b52ed6503bead Mon Sep 17 00:00:00 2001 From: Brianna Fan Date: Wed, 14 Feb 2024 18:53:07 -0800 Subject: [PATCH 205/240] [analyzer] Teach scan-build to filter reports by file. That's a new GUI bell-and-whistle in the index.html page. --- .../test/Analysis/scan-build/html_output.test | 8 ++++++-- clang/tools/scan-build/bin/scan-build | 11 +++++++++- .../scan-build/share/scan-build/sorttable.js | 20 +++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/clang/test/Analysis/scan-build/html_output.test b/clang/test/Analysis/scan-build/html_output.test index eed2051d4df627..add35d83b95887 100644 --- a/clang/test/Analysis/scan-build/html_output.test +++ b/clang/test/Analysis/scan-build/html_output.test @@ -19,13 +19,17 @@ CHECK-FILENAMES: report-{{.*}}.html CHECK-FILENAMES: scanview.css CHECK-FILENAMES: sorttable.js - -// The index should have a link to the report for the single issue. +// Tests for the front page. RUN: cat %t.output_dir/*/index.html \ RUN: | FileCheck %s -check-prefix CHECK-INDEX-HTML +// Let's confirm that the new filtering facility is present. +CHECK-INDEX-HTML: Filter Results by File + +// The index should have a link to the report for the single issue. CHECK-INDEX-HTML: + // The report should describe the issue. RUN: cat %t.output_dir/*/report-*.html \ RUN: | FileCheck %s -check-prefix CHECK-REPORT-HTML diff --git a/clang/tools/scan-build/bin/scan-build b/clang/tools/scan-build/bin/scan-build index 04734d9cfa9af6..37241c6d85c5b2 100755 --- a/clang/tools/scan-build/bin/scan-build +++ b/clang/tools/scan-build/bin/scan-build @@ -722,9 +722,18 @@ ENDTEXT print OUT < + +

Filter Results by File

+ +

Reports

- +
diff --git a/clang/tools/scan-build/share/scan-build/sorttable.js b/clang/tools/scan-build/share/scan-build/sorttable.js index 32faa078d89934..e608daa9e39bc5 100644 --- a/clang/tools/scan-build/share/scan-build/sorttable.js +++ b/clang/tools/scan-build/share/scan-build/sorttable.js @@ -490,3 +490,23 @@ var forEach = function(object, block, context) { resolve.forEach(object, block, context); } }; + +// filter results by filename +const searchFiles = () => { + const columns = [ + { name: 'Filename', index: 2, isFilter: true }, + ] + const filterColumns = columns.filter(c => c.isFilter).map(c => c.index) + const trs = document.querySelectorAll(`#reports_table tr:not(.header)`) + const filter = document.querySelector('#file_input').value + const regex = new RegExp(escape(filter), 'i') + const isFoundInTds = td => regex.test(td.innerHTML) + const isFound = childrenArr => childrenArr.some(isFoundInTds) + const setTrStyleDisplay = ({ style, children }) => { + style.display = isFound([ + ...filterColumns.map(c => children[c]) + ]) ? '' : 'none' + } + + trs.forEach(setTrStyleDisplay) +} From de6fad51462bd0784beafef6fd171ad7725205b5 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Thu, 15 Feb 2024 04:02:30 +0000 Subject: [PATCH 206/240] [TableGen][NFCI] Simplify TypeSetByHwMode::intersect and make extensible (#81688) The current implementation considers both iPTR+iN and everything else all in one go, which leads to more special casing when iPTR is present in only one set than is described in the comment block. Moreover this makes it very difficult to add any new iPTR-like wildcards due to the exponential combinatorial explosion that occurs. Logically, iPTR+iN handling is entirely independent from everything else, so rewrite the code to do them separately. This removes special cases, making the core of the implementation more succinct, whilst more clearly implementing exactly what is described in the comment block, and allows for any number of (non-overlapping) wildcards to be added to the list, as needed by CHERI LLVM downstream (due to having a new capability type which, much like a normal integer pointer in LLVM, varies in size between targets and modes). In testing, this change results in identical TableGen output for all in-tree backends (including those in LLVM_ALL_EXPERIMENTAL_TARGETS), and it is intended that this implementation is entirely equivalent to the old one. --- llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 157 +++++++++++---------- 1 file changed, 81 insertions(+), 76 deletions(-) diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index ca6aa4b251e9b2..076d0427a85971 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -41,7 +41,6 @@ static inline bool isIntegerOrPtr(MVT VT) { static inline bool isFloatingPoint(MVT VT) { return VT.isFloatingPoint(); } static inline bool isVector(MVT VT) { return VT.isVector(); } static inline bool isScalar(MVT VT) { return !VT.isVector(); } -static inline bool isScalarInteger(MVT VT) { return VT.isScalarInteger(); } template static bool berase_if(MachineValueTypeSet &S, Predicate P) { @@ -262,85 +261,91 @@ LLVM_DUMP_METHOD void TypeSetByHwMode::dump() const { dbgs() << *this << '\n'; } bool TypeSetByHwMode::intersect(SetType &Out, const SetType &In) { - bool OutP = Out.count(MVT::iPTR), InP = In.count(MVT::iPTR); - // Complement of In. - auto CompIn = [&In](MVT T) -> bool { return !In.count(T); }; - - if (OutP == InP) - return berase_if(Out, CompIn); - - // Compute the intersection of scalars separately to account for only - // one set containing iPTR. - // The intersection of iPTR with a set of integer scalar types that does not - // include iPTR will result in the most specific scalar type: - // - iPTR is more specific than any set with two elements or more - // - iPTR is less specific than any single integer scalar type. - // For example - // { iPTR } * { i32 } -> { i32 } - // { iPTR } * { i32 i64 } -> { iPTR } - // and - // { iPTR i32 } * { i32 } -> { i32 } - // { iPTR i32 } * { i32 i64 } -> { i32 i64 } - // { iPTR i32 } * { i32 i64 i128 } -> { iPTR i32 } - - // Let In' = elements only in In, Out' = elements only in Out, and - // IO = elements common to both. Normally IO would be returned as the result - // of the intersection, but we need to account for iPTR being a "wildcard" of - // sorts. Since elements in IO are those that match both sets exactly, they - // will all belong to the output. If any of the "leftovers" (i.e. In' or - // Out') contain iPTR, it means that the other set doesn't have it, but it - // could have (1) a more specific type, or (2) a set of types that is less - // specific. The "leftovers" from the other set is what we want to examine - // more closely. - - auto subtract = [](const SetType &A, const SetType &B) { - SetType Diff = A; - berase_if(Diff, [&B](MVT T) { return B.count(T); }); - return Diff; - }; - - if (InP) { - SetType OutOnly = subtract(Out, In); - if (OutOnly.empty()) { - // This means that Out \subset In, so no change to Out. - return false; - } - unsigned NumI = llvm::count_if(OutOnly, isScalarInteger); - if (NumI == 1 && OutOnly.size() == 1) { - // There is only one element in Out', and it happens to be a scalar - // integer that should be kept as a match for iPTR in In. - return false; + auto IntersectP = [&](std::optional WildVT, function_ref P) { + // Complement of In within this partition. + auto CompIn = [&](MVT T) -> bool { return !In.count(T) && P(T); }; + + if (!WildVT) + return berase_if(Out, CompIn); + + bool OutW = Out.count(*WildVT), InW = In.count(*WildVT); + if (OutW == InW) + return berase_if(Out, CompIn); + + // Compute the intersection of scalars separately to account for only one + // set containing WildVT. + // The intersection of WildVT with a set of corresponding types that does + // not include WildVT will result in the most specific type: + // - WildVT is more specific than any set with two elements or more + // - WildVT is less specific than any single type. + // For example, for iPTR and scalar integer types + // { iPTR } * { i32 } -> { i32 } + // { iPTR } * { i32 i64 } -> { iPTR } + // and + // { iPTR i32 } * { i32 } -> { i32 } + // { iPTR i32 } * { i32 i64 } -> { i32 i64 } + // { iPTR i32 } * { i32 i64 i128 } -> { iPTR i32 } + + // Looking at just this partition, let In' = elements only in In, + // Out' = elements only in Out, and IO = elements common to both. Normally + // IO would be returned as the result of the intersection, but we need to + // account for WildVT being a "wildcard" of sorts. Since elements in IO are + // those that match both sets exactly, they will all belong to the output. + // If any of the "leftovers" (i.e. In' or Out') contain WildVT, it means + // that the other set doesn't have it, but it could have (1) a more + // specific type, or (2) a set of types that is less specific. The + // "leftovers" from the other set is what we want to examine more closely. + + auto Leftovers = [&](const SetType &A, const SetType &B) { + SetType Diff = A; + berase_if(Diff, [&](MVT T) { return B.count(T) || !P(T); }); + return Diff; + }; + + if (InW) { + SetType OutLeftovers = Leftovers(Out, In); + if (OutLeftovers.size() < 2) { + // WildVT not added to Out. Keep the possible single leftover. + return false; + } + // WildVT replaces the leftovers. + berase_if(Out, CompIn); + Out.insert(*WildVT); + return true; } - berase_if(Out, CompIn); - if (NumI == 1) { - // Replace the iPTR with the leftover scalar integer. - Out.insert(*llvm::find_if(OutOnly, isScalarInteger)); - } else if (NumI > 1) { - Out.insert(MVT::iPTR); + + // OutW == true + SetType InLeftovers = Leftovers(In, Out); + unsigned SizeOut = Out.size(); + berase_if(Out, CompIn); // This will remove at least the WildVT. + if (InLeftovers.size() < 2) { + // WildVT deleted from Out. Add back the possible single leftover. + Out.insert(InLeftovers); + return true; } - return true; - } - // OutP == true - SetType InOnly = subtract(In, Out); - unsigned SizeOut = Out.size(); - berase_if(Out, CompIn); // This will remove at least the iPTR. - unsigned NumI = llvm::count_if(InOnly, isScalarInteger); - if (NumI == 0) { - // iPTR deleted from Out. - return true; - } - if (NumI == 1) { - // Replace the iPTR with the leftover scalar integer. - Out.insert(*llvm::find_if(InOnly, isScalarInteger)); - return true; - } + // Keep the WildVT in Out. + Out.insert(*WildVT); + // If WildVT was the only element initially removed from Out, then Out + // has not changed. + return SizeOut != Out.size(); + }; - // NumI > 1: Keep the iPTR in Out. - Out.insert(MVT::iPTR); - // If iPTR was the only element initially removed from Out, then Out - // has not changed. - return SizeOut != Out.size(); + // Note: must be non-overlapping + using WildPartT = std::pair>; + static const WildPartT WildParts[] = { + {MVT::iPTR, [](MVT T) { return T.isScalarInteger() || T == MVT::iPTR; }}, + }; + + bool Changed = false; + for (const auto &I : WildParts) + Changed |= IntersectP(I.first, I.second); + + Changed |= IntersectP(std::nullopt, [&](MVT T) { + return !any_of(WildParts, [=](const WildPartT &I) { return I.second(T); }); + }); + + return Changed; } bool TypeSetByHwMode::validate() const { From d821650e13145a1acccd337c9853354ad6531507 Mon Sep 17 00:00:00 2001 From: rmarker <37921131+rmarker@users.noreply.github.com> Date: Thu, 15 Feb 2024 14:40:56 +1030 Subject: [PATCH 207/240] [clang-format][NFC] Drop "Always" in "AlwaysBreakAfterReturnType". (#81591) Complete the switch from "AlwaysBreakAfterReturnType" to "BreakAfterReturnType". --- clang/include/clang/Format/Format.h | 6 +-- clang/lib/Format/ContinuationIndenter.cpp | 6 +-- clang/lib/Format/Format.cpp | 24 +++++------ clang/lib/Format/TokenAnnotator.cpp | 7 ++-- clang/unittests/Format/ConfigParseTest.cpp | 40 +++++++++---------- .../Format/DefinitionBlockSeparatorTest.cpp | 2 +- clang/unittests/Format/FormatTest.cpp | 16 ++++---- clang/unittests/Format/FormatTestCSharp.cpp | 2 +- 8 files changed, 49 insertions(+), 54 deletions(-) diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 737cbfced9e9ce..e9b2160a7b9243 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -1013,7 +1013,7 @@ struct FormatStyle { /// This option is renamed to ``BreakAfterReturnType``. /// \version 3.8 /// @deprecated - ReturnTypeBreakingStyle AlwaysBreakAfterReturnType; + // ReturnTypeBreakingStyle AlwaysBreakAfterReturnType; /// If ``true``, always break before multiline string literals. /// @@ -1579,7 +1579,7 @@ struct FormatStyle { /// The function declaration return type breaking style to use. /// \version 19 - // ReturnTypeBreakingStyle BreakAfterReturnType; + ReturnTypeBreakingStyle BreakAfterReturnType; /// If ``true``, clang-format will always break after a Json array ``[`` /// otherwise it will scan until the closing ``]`` to determine if it should @@ -4824,7 +4824,6 @@ struct FormatStyle { R.AllowShortIfStatementsOnASingleLine && AllowShortLambdasOnASingleLine == R.AllowShortLambdasOnASingleLine && AllowShortLoopsOnASingleLine == R.AllowShortLoopsOnASingleLine && - AlwaysBreakAfterReturnType == R.AlwaysBreakAfterReturnType && AlwaysBreakBeforeMultilineStrings == R.AlwaysBreakBeforeMultilineStrings && AttributeMacros == R.AttributeMacros && @@ -4835,6 +4834,7 @@ struct FormatStyle { BreakAdjacentStringLiterals == R.BreakAdjacentStringLiterals && BreakAfterAttributes == R.BreakAfterAttributes && BreakAfterJavaFieldAnnotations == R.BreakAfterJavaFieldAnnotations && + BreakAfterReturnType == R.BreakAfterReturnType && BreakArrays == R.BreakArrays && BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators && BreakBeforeBraces == R.BreakBeforeBraces && diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 0b2ef97af44d83..159d130cb67332 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -329,12 +329,12 @@ bool ContinuationIndenter::canBreak(const LineState &State) { // Don't break after very short return types (e.g. "void") as that is often // unexpected. if (Current.is(TT_FunctionDeclarationName)) { - if (Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None && + if (Style.BreakAfterReturnType == FormatStyle::RTBS_None && State.Column < 6) { return false; } - if (Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_ExceptShortType) { + if (Style.BreakAfterReturnType == FormatStyle::RTBS_ExceptShortType) { assert(State.Column >= State.FirstIndent); if (State.Column - State.FirstIndent < 6) return false; @@ -597,7 +597,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { !State.Line->ReturnTypeWrapped && // Don't break before a C# function when no break after return type. (!Style.isCSharp() || - Style.AlwaysBreakAfterReturnType > FormatStyle::RTBS_ExceptShortType) && + Style.BreakAfterReturnType > FormatStyle::RTBS_ExceptShortType) && // Don't always break between a JavaScript `function` and the function // name. !Style.isJavaScript() && Previous.isNot(tok::kw_template) && diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 8efc42e0576cf9..56cd9495920c7b 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -877,8 +877,7 @@ template <> struct MappingTraits { if (!IO.outputting()) { IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines); IO.mapOptional("AllowAllConstructorInitializersOnNextLine", OnNextLine); - IO.mapOptional("AlwaysBreakAfterReturnType", - Style.AlwaysBreakAfterReturnType); + IO.mapOptional("AlwaysBreakAfterReturnType", Style.BreakAfterReturnType); IO.mapOptional("AlwaysBreakTemplateDeclarations", Style.BreakTemplateDeclarations); IO.mapOptional("BreakBeforeInheritanceComma", @@ -957,7 +956,7 @@ template <> struct MappingTraits { IO.mapOptional("BreakAfterAttributes", Style.BreakAfterAttributes); IO.mapOptional("BreakAfterJavaFieldAnnotations", Style.BreakAfterJavaFieldAnnotations); - IO.mapOptional("BreakAfterReturnType", Style.AlwaysBreakAfterReturnType); + IO.mapOptional("BreakAfterReturnType", Style.BreakAfterReturnType); IO.mapOptional("BreakArrays", Style.BreakArrays); IO.mapOptional("BreakBeforeBinaryOperators", Style.BreakBeforeBinaryOperators); @@ -1127,17 +1126,16 @@ template <> struct MappingTraits { Style.WhitespaceSensitiveMacros); // If AlwaysBreakAfterDefinitionReturnType was specified but - // AlwaysBreakAfterReturnType was not, initialize the latter from the - // former for backwards compatibility. + // BreakAfterReturnType was not, initialize the latter from the former for + // backwards compatibility. if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None && - Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) { + Style.BreakAfterReturnType == FormatStyle::RTBS_None) { if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All) { - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; + Style.BreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; } else if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_TopLevel) { - Style.AlwaysBreakAfterReturnType = - FormatStyle::RTBS_TopLevelDefinitions; + Style.BreakAfterReturnType = FormatStyle::RTBS_TopLevelDefinitions; } } @@ -1439,7 +1437,6 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_Never; LLVMStyle.AllowShortLambdasOnASingleLine = FormatStyle::SLS_All; LLVMStyle.AllowShortLoopsOnASingleLine = false; - LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None; LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; LLVMStyle.BreakTemplateDeclarations = FormatStyle::BTDS_MultiLine; @@ -1469,6 +1466,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.BreakAdjacentStringLiterals = true; LLVMStyle.BreakAfterAttributes = FormatStyle::ABS_Leave; LLVMStyle.BreakAfterJavaFieldAnnotations = false; + LLVMStyle.BreakAfterReturnType = FormatStyle::RTBS_None; LLVMStyle.BreakArrays = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; @@ -1822,12 +1820,12 @@ FormatStyle getMozillaStyle() { FormatStyle MozillaStyle = getLLVMStyle(); MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; - MozillaStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel; MozillaStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel; MozillaStyle.BreakTemplateDeclarations = FormatStyle::BTDS_Yes; MozillaStyle.BinPackParameters = false; MozillaStyle.BinPackArguments = false; + MozillaStyle.BreakAfterReturnType = FormatStyle::RTBS_TopLevel; MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; MozillaStyle.BreakInheritanceList = FormatStyle::BILS_BeforeComma; @@ -1871,7 +1869,7 @@ FormatStyle getWebKitStyle() { FormatStyle getGNUStyle() { FormatStyle Style = getLLVMStyle(); Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All; - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; + Style.BreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; Style.BreakBeforeBraces = FormatStyle::BS_GNU; Style.BreakBeforeTernaryOperators = true; @@ -1908,7 +1906,7 @@ FormatStyle getMicrosoftStyle(FormatStyle::LanguageKind Language) { Style.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_Never; Style.AllowShortLoopsOnASingleLine = false; Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None; + Style.BreakAfterReturnType = FormatStyle::RTBS_None; return Style; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 08a49bc17f13f3..ac876bf4442e95 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3728,14 +3728,13 @@ static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const { assert(Line.MightBeFunctionDecl); - if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel || - Style.AlwaysBreakAfterReturnType == - FormatStyle::RTBS_TopLevelDefinitions) && + if ((Style.BreakAfterReturnType == FormatStyle::RTBS_TopLevel || + Style.BreakAfterReturnType == FormatStyle::RTBS_TopLevelDefinitions) && Line.Level > 0) { return false; } - switch (Style.AlwaysBreakAfterReturnType) { + switch (Style.BreakAfterReturnType) { case FormatStyle::RTBS_None: case FormatStyle::RTBS_Automatic: case FormatStyle::RTBS_ExceptShortType: diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index ee8a55680753f4..8c74ed2d119a3f 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -677,38 +677,36 @@ TEST(ConfigParseTest, ParsesConfiguration) { " AfterControlStatement: false", BraceWrapping.AfterControlStatement, FormatStyle::BWACS_Never); - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All; - CHECK_PARSE("BreakAfterReturnType: None", AlwaysBreakAfterReturnType, + Style.BreakAfterReturnType = FormatStyle::RTBS_All; + CHECK_PARSE("BreakAfterReturnType: None", BreakAfterReturnType, FormatStyle::RTBS_None); - CHECK_PARSE("BreakAfterReturnType: Automatic", AlwaysBreakAfterReturnType, + CHECK_PARSE("BreakAfterReturnType: Automatic", BreakAfterReturnType, FormatStyle::RTBS_Automatic); - CHECK_PARSE("BreakAfterReturnType: ExceptShortType", - AlwaysBreakAfterReturnType, FormatStyle::RTBS_ExceptShortType); - CHECK_PARSE("BreakAfterReturnType: All", AlwaysBreakAfterReturnType, + CHECK_PARSE("BreakAfterReturnType: ExceptShortType", BreakAfterReturnType, + FormatStyle::RTBS_ExceptShortType); + CHECK_PARSE("BreakAfterReturnType: All", BreakAfterReturnType, FormatStyle::RTBS_All); - CHECK_PARSE("BreakAfterReturnType: TopLevel", AlwaysBreakAfterReturnType, + CHECK_PARSE("BreakAfterReturnType: TopLevel", BreakAfterReturnType, FormatStyle::RTBS_TopLevel); - CHECK_PARSE("BreakAfterReturnType: AllDefinitions", - AlwaysBreakAfterReturnType, FormatStyle::RTBS_AllDefinitions); - CHECK_PARSE("BreakAfterReturnType: TopLevelDefinitions", - AlwaysBreakAfterReturnType, + CHECK_PARSE("BreakAfterReturnType: AllDefinitions", BreakAfterReturnType, + FormatStyle::RTBS_AllDefinitions); + CHECK_PARSE("BreakAfterReturnType: TopLevelDefinitions", BreakAfterReturnType, FormatStyle::RTBS_TopLevelDefinitions); // For backward compatibility: - CHECK_PARSE("AlwaysBreakAfterReturnType: None", AlwaysBreakAfterReturnType, + CHECK_PARSE("AlwaysBreakAfterReturnType: None", BreakAfterReturnType, FormatStyle::RTBS_None); - CHECK_PARSE("AlwaysBreakAfterReturnType: Automatic", - AlwaysBreakAfterReturnType, FormatStyle::RTBS_Automatic); + CHECK_PARSE("AlwaysBreakAfterReturnType: Automatic", BreakAfterReturnType, + FormatStyle::RTBS_Automatic); CHECK_PARSE("AlwaysBreakAfterReturnType: ExceptShortType", - AlwaysBreakAfterReturnType, FormatStyle::RTBS_ExceptShortType); - CHECK_PARSE("AlwaysBreakAfterReturnType: All", AlwaysBreakAfterReturnType, + BreakAfterReturnType, FormatStyle::RTBS_ExceptShortType); + CHECK_PARSE("AlwaysBreakAfterReturnType: All", BreakAfterReturnType, FormatStyle::RTBS_All); - CHECK_PARSE("AlwaysBreakAfterReturnType: TopLevel", - AlwaysBreakAfterReturnType, FormatStyle::RTBS_TopLevel); + CHECK_PARSE("AlwaysBreakAfterReturnType: TopLevel", BreakAfterReturnType, + FormatStyle::RTBS_TopLevel); CHECK_PARSE("AlwaysBreakAfterReturnType: AllDefinitions", - AlwaysBreakAfterReturnType, FormatStyle::RTBS_AllDefinitions); + BreakAfterReturnType, FormatStyle::RTBS_AllDefinitions); CHECK_PARSE("AlwaysBreakAfterReturnType: TopLevelDefinitions", - AlwaysBreakAfterReturnType, - FormatStyle::RTBS_TopLevelDefinitions); + BreakAfterReturnType, FormatStyle::RTBS_TopLevelDefinitions); Style.BreakTemplateDeclarations = FormatStyle::BTDS_Yes; CHECK_PARSE("BreakTemplateDeclarations: Leave", BreakTemplateDeclarations, diff --git a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp index f5489498a93b9e..7a120935cfa965 100644 --- a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp +++ b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp @@ -144,7 +144,7 @@ TEST_F(DefinitionBlockSeparatorTest, Basic) { Style); FormatStyle BreakAfterReturnTypeStyle = Style; - BreakAfterReturnTypeStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All; + BreakAfterReturnTypeStyle.BreakAfterReturnType = FormatStyle::RTBS_All; // Test uppercased long typename verifyFormat("class Foo {\n" " void\n" diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 79cd521b6a9998..b0687eaecb10fe 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -9870,7 +9870,7 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { Style.ColumnLimit = 60; // No declarations or definitions should be moved to own line. - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None; + Style.BreakAfterReturnType = FormatStyle::RTBS_None; verifyFormat("class A {\n" " int f() { return 1; }\n" " int g();\n" @@ -9884,7 +9884,7 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { Style); // It is now allowed to break after a short return type if necessary. - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_Automatic; + Style.BreakAfterReturnType = FormatStyle::RTBS_Automatic; verifyFormat("class A {\n" " int f() { return 1; }\n" " int g();\n" @@ -9898,7 +9898,7 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { Style); // It now must never break after a short return type. - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_ExceptShortType; + Style.BreakAfterReturnType = FormatStyle::RTBS_ExceptShortType; verifyFormat("class A {\n" " int f() { return 1; }\n" " int g();\n" @@ -9913,7 +9913,7 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { // All declarations and definitions should have the return type moved to its // own line. - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All; + Style.BreakAfterReturnType = FormatStyle::RTBS_All; Style.TypenameMacros = {"LIST"}; verifyFormat("SomeType\n" "funcdecl(LIST(uint64_t));", @@ -9940,7 +9940,7 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { // Top-level definitions, and no kinds of declarations should have the // return type moved to its own line. - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevelDefinitions; + Style.BreakAfterReturnType = FormatStyle::RTBS_TopLevelDefinitions; verifyFormat("class B {\n" " int f() { return 1; }\n" " int g();\n" @@ -9954,7 +9954,7 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { // Top-level definitions and declarations should have the return type moved // to its own line. - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel; + Style.BreakAfterReturnType = FormatStyle::RTBS_TopLevel; verifyFormat("class C {\n" " int f() { return 1; }\n" " int g();\n" @@ -9971,7 +9971,7 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { // All definitions should have the return type moved to its own line, but no // kinds of declarations. - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; + Style.BreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; verifyFormat("class D {\n" " int\n" " f() {\n" @@ -11939,7 +11939,7 @@ TEST_F(FormatTest, UnderstandsAttributes) { "aaaaaaaaaaaaaaaaaaaaaaa(int i);"); verifyFormat("__attribute__((nodebug)) ::qualified_type f();"); FormatStyle AfterType = getLLVMStyle(); - AfterType.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All; + AfterType.BreakAfterReturnType = FormatStyle::RTBS_All; verifyFormat("__attribute__((nodebug)) void\n" "foo() {}", AfterType); diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp index 6f5e1e41ef7e0b..de261c09483086 100644 --- a/clang/unittests/Format/FormatTestCSharp.cpp +++ b/clang/unittests/Format/FormatTestCSharp.cpp @@ -505,7 +505,7 @@ TEST_F(FormatTestCSharp, CSharpNullForgiving) { TEST_F(FormatTestCSharp, AttributesIndentation) { FormatStyle Style = getMicrosoftStyle(FormatStyle::LK_CSharp); - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None; + Style.BreakAfterReturnType = FormatStyle::RTBS_None; verifyFormat("[STAThread]\n" "static void Main(string[] args)\n" From fa9e297b8b63dacb962d99814e698658ad71f946 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 14 Feb 2024 22:09:48 -0600 Subject: [PATCH 208/240] [ClangPackager] Fix passing in multiple instances of `file` Summary: This is necessary because CMake build tools might need to generate several files but are unable to put them in separate images. This patch sipmly moves the file handling out into a separate split iterator. --- .../ClangOffloadPackager.cpp | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp index 08de3f3a3771c1..c36a5aa58cee50 100644 --- a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp +++ b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp @@ -104,33 +104,36 @@ static Error bundleImages() { inconvertibleErrorCode(), "'file' and 'triple' are required image arguments"); - OffloadBinary::OffloadingImage ImageBinary{}; - std::unique_ptr DeviceImage; - for (const auto &[Key, Value] : Args) { - if (Key == "file") { - llvm::ErrorOr> ObjectOrErr = - llvm::MemoryBuffer::getFileOrSTDIN(Value); - if (std::error_code EC = ObjectOrErr.getError()) - return errorCodeToError(EC); - - // Clang uses the '.o' suffix for LTO bitcode. - if (identify_magic((*ObjectOrErr)->getBuffer()) == file_magic::bitcode) - ImageBinary.TheImageKind = object::IMG_Bitcode; - else - ImageBinary.TheImageKind = - getImageKind(sys::path::extension(Value).drop_front()); - ImageBinary.Image = std::move(*ObjectOrErr); - } else if (Key == "kind") { - ImageBinary.TheOffloadKind = getOffloadKind(Value); - } else { - ImageBinary.StringData[Key] = Value; + // Permit using multiple instances of `file` in a single string. + for (auto &File : llvm::split(Args["file"], ",")) { + OffloadBinary::OffloadingImage ImageBinary{}; + std::unique_ptr DeviceImage; + + llvm::ErrorOr> ObjectOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(File); + if (std::error_code EC = ObjectOrErr.getError()) + return errorCodeToError(EC); + + // Clang uses the '.o' suffix for LTO bitcode. + if (identify_magic((*ObjectOrErr)->getBuffer()) == file_magic::bitcode) + ImageBinary.TheImageKind = object::IMG_Bitcode; + else + ImageBinary.TheImageKind = + getImageKind(sys::path::extension(File).drop_front()); + ImageBinary.Image = std::move(*ObjectOrErr); + for (const auto &[Key, Value] : Args) { + if (Key == "kind") { + ImageBinary.TheOffloadKind = getOffloadKind(Value); + } else if (Key != "file") { + ImageBinary.StringData[Key] = Value; + } } + llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary); + if (Buffer.size() % OffloadBinary::getAlignment() != 0) + return createStringError(inconvertibleErrorCode(), + "Offload binary has invalid size alignment"); + OS << Buffer; } - llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary); - if (Buffer.size() % OffloadBinary::getAlignment() != 0) - return createStringError(inconvertibleErrorCode(), - "Offload binary has invalid size alignment"); - OS << Buffer; } if (Error E = writeFile(OutputFile, From fc0b67e1d79d1f199687f8f06d619984d9520230 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 15 Feb 2024 11:48:38 +0800 Subject: [PATCH 209/240] [RISCV] Add cost model tests for llvm.vector.{insert,extract}. NFC For llvm.vector.extract, this tests combinations of inserting at a zero and non-zero index, and extracting from a fixed or scalable vector. For llvm.vector.insert, this tests the same combinations as extracts but with an additional configuration for an undef vector. This is because we can use a subregister insert if the index is 0 and the vector is undef, which should be free. --- .../CostModel/RISCV/rvv-vectorextract.ll | 169 +++++++++ .../CostModel/RISCV/rvv-vectorinsert.ll | 335 ++++++++++++++++++ 2 files changed, 504 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/RISCV/rvv-vectorextract.ll create mode 100644 llvm/test/Analysis/CostModel/RISCV/rvv-vectorinsert.ll diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-vectorextract.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-vectorextract.ll new file mode 100644 index 00000000000000..1e2d1f4d94954e --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-vectorextract.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE + +define void @vector_extract_nxv128i8_0( %v) { +; CHECK-LABEL: 'vector_extract_nxv128i8_0' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.extract.nxv1i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.extract.nxv2i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.extract.nxv4i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.extract.nxv8i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.extract.nxv16i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.extract.nxv32i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.extract.nxv64i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8( %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_extract_nxv128i8_0' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.extract.nxv1i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.extract.nxv2i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.extract.nxv4i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.extract.nxv8i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.extract.nxv16i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.extract.nxv32i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.extract.nxv64i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8( %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %scalable_mf8 = call @llvm.vector.extract.nxv1i8.nxv128i8( %v, i64 0) + %scalable_mf4 = call @llvm.vector.extract.nxv2i8.nxv128i8( %v, i64 0) + %scalable_mf2 = call @llvm.vector.extract.nxv4i8.nxv128i8( %v, i64 0) + %scalable_m1 = call @llvm.vector.extract.nxv8i8.nxv128i8( %v, i64 0) + %scalable_m2 = call @llvm.vector.extract.nxv16i8.nxv128i8( %v, i64 0) + %scalable_m4 = call @llvm.vector.extract.nxv32i8.nxv128i8( %v, i64 0) + %scalable_m8 = call @llvm.vector.extract.nxv64i8.nxv128i8( %v, i64 0) + + %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8( %v, i64 0) + %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8( %v, i64 0) + %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8( %v, i64 0) + %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8( %v, i64 0) + %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8( %v, i64 0) + %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8( %v, i64 0) + %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8( %v, i64 0) + ret void +} + +define void @vector_extract_nxv128i8_1( %v) { +; CHECK-LABEL: 'vector_extract_nxv128i8_1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.extract.nxv1i8.nxv128i8( %v, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.extract.nxv2i8.nxv128i8( %v, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.extract.nxv4i8.nxv128i8( %v, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.extract.nxv8i8.nxv128i8( %v, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.extract.nxv16i8.nxv128i8( %v, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.extract.nxv32i8.nxv128i8( %v, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.extract.nxv64i8.nxv128i8( %v, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8( %v, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8( %v, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8( %v, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8( %v, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8( %v, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8( %v, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8( %v, i64 128) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_extract_nxv128i8_1' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.extract.nxv1i8.nxv128i8( %v, i64 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.extract.nxv2i8.nxv128i8( %v, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.extract.nxv4i8.nxv128i8( %v, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.extract.nxv8i8.nxv128i8( %v, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.extract.nxv16i8.nxv128i8( %v, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.extract.nxv32i8.nxv128i8( %v, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.extract.nxv64i8.nxv128i8( %v, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8( %v, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8( %v, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8( %v, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8( %v, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8( %v, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8( %v, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8( %v, i64 128) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %scalable_mf8 = call @llvm.vector.extract.nxv1i8.nxv128i8( %v, i64 1) + %scalable_mf4 = call @llvm.vector.extract.nxv2i8.nxv128i8( %v, i64 2) + %scalable_mf2 = call @llvm.vector.extract.nxv4i8.nxv128i8( %v, i64 4) + %scalable_m1 = call @llvm.vector.extract.nxv8i8.nxv128i8( %v, i64 8) + %scalable_m2 = call @llvm.vector.extract.nxv16i8.nxv128i8( %v, i64 16) + %scalable_m4 = call @llvm.vector.extract.nxv32i8.nxv128i8( %v, i64 32) + %scalable_m8 = call @llvm.vector.extract.nxv64i8.nxv128i8( %v, i64 64) + + %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.nxv128i8( %v, i64 2) + %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.nxv128i8( %v, i64 4) + %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.nxv128i8( %v, i64 8) + %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.nxv128i8( %v, i64 16) + %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.nxv128i8( %v, i64 32) + %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.nxv128i8( %v, i64 64) + %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.nxv128i8( %v, i64 128) + ret void +} + +define void @vector_extract_v128i8_0(<128 x i8> %v) { +; CHECK-LABEL: 'vector_extract_v128i8_0' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.v128i8(<128 x i8> %v, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_extract_v128i8_0' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.v128i8(<128 x i8> %v, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 0) + %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 0) + %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 0) + %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 0) + %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 0) + %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 0) + %fixed_m8 = call <128 x i8> @llvm.vector.extract.v128i8.v128i8(<128 x i8> %v, i64 0) + ret void +} + +define void @vector_extract_v128i8_1(<128 x i8> %v) { +; CHECK-LABEL: 'vector_extract_v128i8_1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_extract_v128i8_1' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fixed_mf8 = call <2 x i8> @llvm.vector.extract.v2i8.v128i8(<128 x i8> %v, i64 2) + %fixed_mf4 = call <4 x i8> @llvm.vector.extract.v4i8.v128i8(<128 x i8> %v, i64 4) + %fixed_mf2 = call <8 x i8> @llvm.vector.extract.v8i8.v128i8(<128 x i8> %v, i64 8) + %fixed_m1 = call <16 x i8> @llvm.vector.extract.v16i8.v128i8(<128 x i8> %v, i64 16) + %fixed_m2 = call <32 x i8> @llvm.vector.extract.v32i8.v128i8(<128 x i8> %v, i64 32) + %fixed_m4 = call <64 x i8> @llvm.vector.extract.v64i8.v128i8(<128 x i8> %v, i64 64) + ; No @llvm.vector.extract.v128i8.v128i8(<128 x i8> %v, i64 128) since it would overrun + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-vectorinsert.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-vectorinsert.ll new file mode 100644 index 00000000000000..7a9f45cb7af50b --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-vectorinsert.ll @@ -0,0 +1,335 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v < %s | FileCheck %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE + +define void @vector_insert_nxv128i8_0( %v) { +; CHECK-LABEL: 'vector_insert_nxv128i8_0' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( %v, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( %v, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( %v, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( %v, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( %v, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( %v, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( %v, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( %v, <2 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( %v, <4 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( %v, <8 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( %v, <16 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( %v, <32 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( %v, <64 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( %v, <128 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_insert_nxv128i8_0' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( %v, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( %v, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( %v, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( %v, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( %v, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( %v, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( %v, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( %v, <2 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( %v, <4 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( %v, <8 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( %v, <16 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( %v, <32 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( %v, <64 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( %v, <128 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( %v, undef, i64 0) + %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( %v, undef, i64 0) + %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( %v, undef, i64 0) + %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( %v, undef, i64 0) + %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( %v, undef, i64 0) + %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( %v, undef, i64 0) + %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( %v, undef, i64 0) + + %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( %v, <2 x i8> undef, i64 0) + %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( %v, <4 x i8> undef, i64 0) + %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( %v, <8 x i8> undef, i64 0) + %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( %v, <16 x i8> undef, i64 0) + %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( %v, <32 x i8> undef, i64 0) + %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( %v, <64 x i8> undef, i64 0) + %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( %v, <128 x i8> undef, i64 0) + ret void +} + +define void @vector_insert_nxv128i8_undef_0() { +; CHECK-LABEL: 'vector_insert_nxv128i8_undef_0' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( undef, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( undef, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( undef, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( undef, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( undef, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( undef, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( undef, undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( undef, <2 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( undef, <4 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( undef, <8 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( undef, <16 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( undef, <32 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( undef, <64 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( undef, <128 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_insert_nxv128i8_undef_0' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( undef, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( undef, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( undef, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( undef, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( undef, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( undef, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( undef, undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( undef, <2 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( undef, <4 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( undef, <8 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( undef, <16 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( undef, <32 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( undef, <64 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( undef, <128 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( undef, undef, i64 0) + %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( undef, undef, i64 0) + %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( undef, undef, i64 0) + %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( undef, undef, i64 0) + %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( undef, undef, i64 0) + %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( undef, undef, i64 0) + %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( undef, undef, i64 0) + + %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( undef, <2 x i8> undef, i64 0) + %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( undef, <4 x i8> undef, i64 0) + %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( undef, <8 x i8> undef, i64 0) + %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( undef, <16 x i8> undef, i64 0) + %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( undef, <32 x i8> undef, i64 0) + %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( undef, <64 x i8> undef, i64 0) + %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( undef, <128 x i8> undef, i64 0) + ret void +} + +define void @vector_insert_nxv128i8_1( %v) { +; CHECK-LABEL: 'vector_insert_nxv128i8_1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( %v, undef, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( %v, undef, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( %v, undef, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( %v, undef, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( %v, undef, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( %v, undef, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( %v, undef, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( %v, <2 x i8> undef, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( %v, <4 x i8> undef, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( %v, <8 x i8> undef, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( %v, <16 x i8> undef, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( %v, <32 x i8> undef, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( %v, <64 x i8> undef, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( %v, <128 x i8> undef, i64 128) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_insert_nxv128i8_1' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( %v, undef, i64 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( %v, undef, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( %v, undef, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( %v, undef, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( %v, undef, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( %v, undef, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( %v, undef, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( %v, <2 x i8> undef, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( %v, <4 x i8> undef, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( %v, <8 x i8> undef, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( %v, <16 x i8> undef, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( %v, <32 x i8> undef, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( %v, <64 x i8> undef, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( %v, <128 x i8> undef, i64 128) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( %v, undef, i64 1) + %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( %v, undef, i64 2) + %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( %v, undef, i64 4) + %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( %v, undef, i64 8) + %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( %v, undef, i64 16) + %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( %v, undef, i64 32) + %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( %v, undef, i64 64) + + %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( %v, <2 x i8> undef, i64 2) + %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( %v, <4 x i8> undef, i64 4) + %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( %v, <8 x i8> undef, i64 8) + %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( %v, <16 x i8> undef, i64 16) + %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( %v, <32 x i8> undef, i64 32) + %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( %v, <64 x i8> undef, i64 64) + %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( %v, <128 x i8> undef, i64 128) + ret void +} + +define void @vector_insert_nxv128i8_undef_1() { +; CHECK-LABEL: 'vector_insert_nxv128i8_undef_1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( undef, undef, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( undef, undef, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( undef, undef, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( undef, undef, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( undef, undef, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( undef, undef, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( undef, undef, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( undef, <2 x i8> undef, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( undef, <4 x i8> undef, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( undef, <8 x i8> undef, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( undef, <16 x i8> undef, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( undef, <32 x i8> undef, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( undef, <64 x i8> undef, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( undef, <128 x i8> undef, i64 128) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_insert_nxv128i8_undef_1' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( undef, undef, i64 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( undef, undef, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( undef, undef, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( undef, undef, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( undef, undef, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( undef, undef, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( undef, undef, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( undef, <2 x i8> undef, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( undef, <4 x i8> undef, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( undef, <8 x i8> undef, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( undef, <16 x i8> undef, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( undef, <32 x i8> undef, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( undef, <64 x i8> undef, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( undef, <128 x i8> undef, i64 128) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %scalable_mf8 = call @llvm.vector.insert.nxv128i8.nxv1i8( undef, undef, i64 1) + %scalable_mf4 = call @llvm.vector.insert.nxv128i8.nxv2i8( undef, undef, i64 2) + %scalable_mf2 = call @llvm.vector.insert.nxv128i8.nxv4i8( undef, undef, i64 4) + %scalable_m1 = call @llvm.vector.insert.nxv128i8.nxv8i8( undef, undef, i64 8) + %scalable_m2 = call @llvm.vector.insert.nxv128i8.nxv16i8( undef, undef, i64 16) + %scalable_m4 = call @llvm.vector.insert.nxv128i8.nxv32i8( undef, undef, i64 32) + %scalable_m8 = call @llvm.vector.insert.nxv128i8.nxv64i8( undef, undef, i64 64) + + %fixed_mf8 = call @llvm.vector.insert.nxv128i8.v2i8( undef, <2 x i8> undef, i64 2) + %fixed_mf4 = call @llvm.vector.insert.nxv128i8.v4i8( undef, <4 x i8> undef, i64 4) + %fixed_mf2 = call @llvm.vector.insert.nxv128i8.v8i8( undef, <8 x i8> undef, i64 8) + %fixed_m1 = call @llvm.vector.insert.nxv128i8.v16i8( undef, <16 x i8> undef, i64 16) + %fixed_m2 = call @llvm.vector.insert.nxv128i8.v32i8( undef, <32 x i8> undef, i64 32) + %fixed_m4 = call @llvm.vector.insert.nxv128i8.v64i8( undef, <64 x i8> undef, i64 64) + %fixed_m8 = call @llvm.vector.insert.nxv128i8.v128i8( undef, <128 x i8> undef, i64 128) + ret void +} + +define void @vector_insert_v128i8_0(<128 x i8> %v) { +; CHECK-LABEL: 'vector_insert_v128i8_0' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> %v, <128 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_insert_v128i8_0' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> %v, <128 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 0) + %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 0) + %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 0) + %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 0) + %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 0) + %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 0) + %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> %v, <128 x i8> undef, i64 0) + ret void +} + +define void @vector_insert_v128i8_undef_0() { +; CHECK-LABEL: 'vector_insert_v128i8_undef_0' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> undef, <128 x i8> undef, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_insert_v128i8_undef_0' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> undef, <128 x i8> undef, i64 0) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 0) + %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 0) + %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 0) + %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 0) + %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 0) + %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 0) + %fixed_m8 = call <128 x i8> @llvm.vector.insert.v128i8.v128i8(<128 x i8> undef, <128 x i8> undef, i64 0) + ret void +} + +define void @vector_insert_v128i8_1(<128 x i8> %v) { +; CHECK-LABEL: 'vector_insert_v128i8_1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_insert_v128i8_1' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> %v, <2 x i8> undef, i64 2) + %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> %v, <4 x i8> undef, i64 4) + %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> %v, <8 x i8> undef, i64 8) + %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> %v, <16 x i8> undef, i64 16) + %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> %v, <32 x i8> undef, i64 32) + %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> %v, <64 x i8> undef, i64 64) + ; No @llvm.vector.insert.v128i8.v128i8(<128 x i8> %v, <128 x i8> undef, i64 128) since it would overrun + ret void +} + +define void @vector_insert_v128i8_undef_1() { +; CHECK-LABEL: 'vector_insert_v128i8_undef_1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_insert_v128i8_undef_1' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 2) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 4) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 8) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 16) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 32) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 64) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %fixed_mf8 = call <128 x i8> @llvm.vector.insert.v128i8.v2i8(<128 x i8> undef, <2 x i8> undef, i64 2) + %fixed_mf4 = call <128 x i8> @llvm.vector.insert.v128i8.v4i8(<128 x i8> undef, <4 x i8> undef, i64 4) + %fixed_mf2 = call <128 x i8> @llvm.vector.insert.v128i8.v8i8(<128 x i8> undef, <8 x i8> undef, i64 8) + %fixed_m1 = call <128 x i8> @llvm.vector.insert.v128i8.v16i8(<128 x i8> undef, <16 x i8> undef, i64 16) + %fixed_m2 = call <128 x i8> @llvm.vector.insert.v128i8.v32i8(<128 x i8> undef, <32 x i8> undef, i64 32) + %fixed_m4 = call <128 x i8> @llvm.vector.insert.v128i8.v64i8(<128 x i8> undef, <64 x i8> undef, i64 64) + ; No @llvm.vector.insert.v128i8.v128i8(<128 x i8> undef, <128 x i8> undef, i64 128) since it would overrun + ret void +} From 4b5c21aa07fc6ba477109ec8b83395071ae49e23 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 14 Feb 2024 22:38:59 -0600 Subject: [PATCH 210/240] [libc][fix] Fix new test that crashes the NVPTX backend --- libc/test/src/__support/CMakeLists.txt | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 231b01e0ee50a7..850f5385ed3431 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -86,16 +86,19 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) ) endif() -add_libc_test( - uint_test - SUITE - libc-support-tests - SRCS - uint_test.cpp - DEPENDS - libc.src.__support.uint - libc.src.__support.CPP.optional -) +# FIXME: Crash in NVPTX target lowering for calls +if(NOT LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) + add_libc_test( + uint_test + SUITE + libc-support-tests + SRCS + uint_test.cpp + DEPENDS + libc.src.__support.uint + libc.src.__support.CPP.optional + ) +endif() add_libc_test( integer_literals_test From bfe302c58321abd79a5db7e805ef4b4db24df820 Mon Sep 17 00:00:00 2001 From: Qfrost Date: Thu, 15 Feb 2024 12:53:56 +0800 Subject: [PATCH 211/240] [clang][X86] X86::LAR X86::LSL add_implicate eflags (#80993) [@xia0ji233](https://github.com/xia0ji233) and I found that X86::LAR and X86::lSR implicit use eflags register. However, it was not been defined in LLVM, which means we will get wrong alive-result if we use these two instructions. ![T~ _81W6A}J}{AP{DF%E}KY](https://github.com/llvm/llvm-project/assets/58380176/b84e758b-2978-49e7-a11c-726fd66e1976) --- llvm/lib/Target/X86/X86InstrSystem.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index a7899a2492b882..24a334d38f9261 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -213,6 +213,7 @@ def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; +let Defs = [EFLAGS] in { let mayLoad = 1 in def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, @@ -253,6 +254,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; +} def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; From d53515afef57a3abf84daff169fbc7626a306817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Wed, 14 Feb 2024 15:29:47 +0100 Subject: [PATCH 212/240] [clang][Interp] Fix variadic member functions For variadic member functions, the way we calculated the instance pointer and RVO pointer offsts on the stack was incorrect, due to Func->getArgSize() not returning the full size of all the passed arguments. When calling variadic functions, we need to pass the size of the passed (variadic) arguments to the Call* ops, so they can use that information to properly check the instance pointer, etc. This patch adds a bit of code duplication in Interp.h, which I will get rid of in later cleanup NFC patches. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 45 +++++++++++--- clang/lib/AST/Interp/ByteCodeStmtGen.cpp | 2 +- clang/lib/AST/Interp/Context.cpp | 3 +- clang/lib/AST/Interp/EvalEmitter.cpp | 2 +- clang/lib/AST/Interp/Function.h | 10 ++++ clang/lib/AST/Interp/Interp.cpp | 25 +++++--- clang/lib/AST/Interp/Interp.h | 76 ++++++++++++++++++++---- clang/lib/AST/Interp/InterpFrame.cpp | 11 ++-- clang/lib/AST/Interp/InterpFrame.h | 5 +- clang/lib/AST/Interp/Opcodes.td | 11 +++- clang/test/AST/Interp/functions.cpp | 75 +++++++++++++++++++++++ 11 files changed, 226 insertions(+), 39 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 91b9985eefbd30..988765972a36e6 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1829,8 +1829,19 @@ bool ByteCodeExprGen::VisitCXXConstructExpr( return false; } - if (!this->emitCall(Func, E)) - return false; + if (Func->isVariadic()) { + uint32_t VarArgSize = 0; + unsigned NumParams = Func->getNumWrittenParams(); + for (unsigned I = NumParams, N = E->getNumArgs(); I != N; ++I) { + VarArgSize += + align(primSize(classify(E->getArg(I)->getType()).value_or(PT_Ptr))); + } + if (!this->emitCallVar(Func, VarArgSize, E)) + return false; + } else { + if (!this->emitCall(Func, 0, E)) + return false; + } // Immediately call the destructor if we have to. if (DiscardResult) { @@ -1863,7 +1874,7 @@ bool ByteCodeExprGen::VisitCXXConstructExpr( return false; } - if (!this->emitCall(Func, E)) + if (!this->emitCall(Func, 0, E)) return false; } return true; @@ -2049,7 +2060,7 @@ bool ByteCodeExprGen::VisitCXXInheritedCtorInitExpr( Offset += align(primSize(PT)); } - return this->emitCall(F, E); + return this->emitCall(F, 0, E); } template @@ -2846,20 +2857,38 @@ bool ByteCodeExprGen::VisitCallExpr(const CallExpr *E) { // and if the function has RVO, we already have the pointer on the stack to // write the result into. if (IsVirtual && !HasQualifier) { - if (!this->emitCallVirt(Func, E)) + uint32_t VarArgSize = 0; + unsigned NumParams = Func->getNumWrittenParams(); + for (unsigned I = NumParams, N = E->getNumArgs(); I != N; ++I) + VarArgSize += align(primSize(classify(E->getArg(I)).value_or(PT_Ptr))); + + if (!this->emitCallVirt(Func, VarArgSize, E)) + return false; + } else if (Func->isVariadic()) { + uint32_t VarArgSize = 0; + unsigned NumParams = Func->getNumWrittenParams(); + for (unsigned I = NumParams, N = E->getNumArgs(); I != N; ++I) + VarArgSize += align(primSize(classify(E->getArg(I)).value_or(PT_Ptr))); + if (!this->emitCallVar(Func, VarArgSize, E)) return false; } else { - if (!this->emitCall(Func, E)) + if (!this->emitCall(Func, 0, E)) return false; } } else { // Indirect call. Visit the callee, which will leave a FunctionPointer on // the stack. Cleanup of the returned value if necessary will be done after // the function call completed. + + // Sum the size of all args from the call expr. + uint32_t ArgSize = 0; + for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I) + ArgSize += align(primSize(classify(E->getArg(I)).value_or(PT_Ptr))); + if (!this->visit(E->getCallee())) return false; - if (!this->emitCallPtr(E)) + if (!this->emitCallPtr(ArgSize, E)) return false; } @@ -3386,7 +3415,7 @@ bool ByteCodeExprGen::emitRecordDestruction(const Descriptor *Desc) { assert(DtorFunc->getNumParams() == 1); if (!this->emitDupPtr(SourceInfo{})) return false; - if (!this->emitCall(DtorFunc, SourceInfo{})) + if (!this->emitCall(DtorFunc, 0, SourceInfo{})) return false; } } diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp index bedcc78dc23555..7e2043f8de90b0 100644 --- a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp @@ -126,7 +126,7 @@ bool ByteCodeStmtGen::emitLambdaStaticInvokerBody( return false; } - if (!this->emitCall(Func, LambdaCallOp)) + if (!this->emitCall(Func, 0, LambdaCallOp)) return false; this->emitCleanup(); diff --git a/clang/lib/AST/Interp/Context.cpp b/clang/lib/AST/Interp/Context.cpp index 5f5a6622f10f3d..7396db22943663 100644 --- a/clang/lib/AST/Interp/Context.cpp +++ b/clang/lib/AST/Interp/Context.cpp @@ -209,7 +209,8 @@ bool Context::Run(State &Parent, const Function *Func, APValue &Result) { { InterpState State(Parent, *P, Stk, *this); - State.Current = new InterpFrame(State, Func, /*Caller=*/nullptr, {}); + State.Current = new InterpFrame(State, Func, /*Caller=*/nullptr, CodePtr(), + Func->getArgSize()); if (Interpret(State, Result)) { assert(Stk.empty()); return true; diff --git a/clang/lib/AST/Interp/EvalEmitter.cpp b/clang/lib/AST/Interp/EvalEmitter.cpp index 945b78d7a609d7..c1e4ce3ebb0729 100644 --- a/clang/lib/AST/Interp/EvalEmitter.cpp +++ b/clang/lib/AST/Interp/EvalEmitter.cpp @@ -22,7 +22,7 @@ EvalEmitter::EvalEmitter(Context &Ctx, Program &P, State &Parent, : Ctx(Ctx), P(P), S(Parent, P, Stk, Ctx, this), EvalResult(&Ctx) { // Create a dummy frame for the interpreter which does not have locals. S.Current = - new InterpFrame(S, /*Func=*/nullptr, /*Caller=*/nullptr, CodePtr()); + new InterpFrame(S, /*Func=*/nullptr, /*Caller=*/nullptr, CodePtr(), 0); } EvalEmitter::~EvalEmitter() { diff --git a/clang/lib/AST/Interp/Function.h b/clang/lib/AST/Interp/Function.h index 7c3e0f63024908..6500e0126c226f 100644 --- a/clang/lib/AST/Interp/Function.h +++ b/clang/lib/AST/Interp/Function.h @@ -183,6 +183,16 @@ class Function final { unsigned getNumParams() const { return ParamTypes.size(); } + /// Returns the number of parameter this function takes when it's called, + /// i.e excluding the instance pointer and the RVO pointer. + unsigned getNumWrittenParams() const { + assert(getNumParams() >= (hasThisPointer() + hasRVO())); + return getNumParams() - hasThisPointer() - hasRVO(); + } + unsigned getWrittenArgSize() const { + return ArgSize - (align(primSize(PT_Ptr)) * (hasThisPointer() + hasRVO())); + } + unsigned getParamOffset(unsigned ParamIndex) const { return ParamOffsets[ParamIndex]; } diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index 683151f7caf528..2338f88569db8b 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -169,16 +169,27 @@ void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) { // CallExpr we're look for is at the return PC of the current function, i.e. // in the caller. // This code path should be executed very rarely. - const auto *CE = - cast(S.Current->Caller->getExpr(S.Current->getRetPC())); - unsigned FixedParams = CurFunc->getNumParams(); - int32_t ArgsToPop = CE->getNumArgs() - FixedParams; - assert(ArgsToPop >= 0); - for (int32_t I = ArgsToPop - 1; I >= 0; --I) { - const Expr *A = CE->getArg(FixedParams + I); + unsigned NumVarArgs; + const Expr *const *Args = nullptr; + unsigned NumArgs = 0; + const Expr *CallSite = S.Current->Caller->getExpr(S.Current->getRetPC()); + if (const auto *CE = dyn_cast(CallSite)) { + Args = CE->getArgs(); + NumArgs = CE->getNumArgs(); + } else if (const auto *CE = dyn_cast(CallSite)) { + Args = CE->getArgs(); + NumArgs = CE->getNumArgs(); + } else + assert(false && "Can't get arguments from that expression type"); + + assert(NumArgs >= CurFunc->getNumWrittenParams()); + NumVarArgs = NumArgs - CurFunc->getNumWrittenParams(); + for (unsigned I = 0; I != NumVarArgs; ++I) { + const Expr *A = Args[NumArgs - 1 - I]; popArg(S, A); } } + // And in any case, remove the fixed parameters (the non-variadic ones) // at the end. S.Current->popArgs(); diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index e2fda18e3f44d4..77c724f08e8eef 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1915,10 +1915,60 @@ inline bool ArrayDecay(InterpState &S, CodePtr OpPC) { return false; } -inline bool Call(InterpState &S, CodePtr OpPC, const Function *Func) { +inline bool CallVar(InterpState &S, CodePtr OpPC, const Function *Func, + uint32_t VarArgSize) { if (Func->hasThisPointer()) { - size_t ThisOffset = - Func->getArgSize() - (Func->hasRVO() ? primSize(PT_Ptr) : 0); + size_t ArgSize = Func->getArgSize() + VarArgSize; + size_t ThisOffset = ArgSize - (Func->hasRVO() ? primSize(PT_Ptr) : 0); + const Pointer &ThisPtr = S.Stk.peek(ThisOffset); + + // If the current function is a lambda static invoker and + // the function we're about to call is a lambda call operator, + // skip the CheckInvoke, since the ThisPtr is a null pointer + // anyway. + if (!(S.Current->getFunction() && + S.Current->getFunction()->isLambdaStaticInvoker() && + Func->isLambdaCallOperator())) { + if (!CheckInvoke(S, OpPC, ThisPtr)) + return false; + } + + if (S.checkingPotentialConstantExpression()) + return false; + } + + if (!CheckCallable(S, OpPC, Func)) + return false; + + if (!CheckCallDepth(S, OpPC)) + return false; + + auto NewFrame = std::make_unique(S, Func, OpPC, VarArgSize); + InterpFrame *FrameBefore = S.Current; + S.Current = NewFrame.get(); + + APValue CallResult; + // Note that we cannot assert(CallResult.hasValue()) here since + // Ret() above only sets the APValue if the curent frame doesn't + // have a caller set. + if (Interpret(S, CallResult)) { + NewFrame.release(); // Frame was delete'd already. + assert(S.Current == FrameBefore); + return true; + } + + // Interpreting the function failed somehow. Reset to + // previous state. + S.Current = FrameBefore; + return false; + + return false; +} +inline bool Call(InterpState &S, CodePtr OpPC, const Function *Func, + uint32_t VarArgSize) { + if (Func->hasThisPointer()) { + size_t ArgSize = Func->getArgSize() + VarArgSize; + size_t ThisOffset = ArgSize - (Func->hasRVO() ? primSize(PT_Ptr) : 0); const Pointer &ThisPtr = S.Stk.peek(ThisOffset); @@ -1943,7 +1993,7 @@ inline bool Call(InterpState &S, CodePtr OpPC, const Function *Func) { if (!CheckCallDepth(S, OpPC)) return false; - auto NewFrame = std::make_unique(S, Func, OpPC); + auto NewFrame = std::make_unique(S, Func, OpPC, VarArgSize); InterpFrame *FrameBefore = S.Current; S.Current = NewFrame.get(); @@ -1963,11 +2013,12 @@ inline bool Call(InterpState &S, CodePtr OpPC, const Function *Func) { return false; } -inline bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func) { +inline bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, + uint32_t VarArgSize) { assert(Func->hasThisPointer()); assert(Func->isVirtual()); - size_t ThisOffset = - Func->getArgSize() - (Func->hasRVO() ? primSize(PT_Ptr) : 0); + size_t ArgSize = Func->getArgSize() + VarArgSize; + size_t ThisOffset = ArgSize - (Func->hasRVO() ? primSize(PT_Ptr) : 0); Pointer &ThisPtr = S.Stk.peek(ThisOffset); const CXXRecordDecl *DynamicDecl = @@ -1998,7 +2049,7 @@ inline bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func) { } } - return Call(S, OpPC, Func); + return Call(S, OpPC, Func, VarArgSize); } inline bool CallBI(InterpState &S, CodePtr &PC, const Function *Func, @@ -2016,17 +2067,20 @@ inline bool CallBI(InterpState &S, CodePtr &PC, const Function *Func, return false; } -inline bool CallPtr(InterpState &S, CodePtr OpPC) { +inline bool CallPtr(InterpState &S, CodePtr OpPC, uint32_t ArgSize) { const FunctionPointer &FuncPtr = S.Stk.pop(); const Function *F = FuncPtr.getFunction(); if (!F || !F->isConstexpr()) return false; + assert(ArgSize >= F->getWrittenArgSize()); + uint32_t VarArgSize = ArgSize - F->getWrittenArgSize(); + if (F->isVirtual()) - return CallVirt(S, OpPC, F); + return CallVirt(S, OpPC, F, VarArgSize); - return Call(S, OpPC, F); + return Call(S, OpPC, F, VarArgSize); } inline bool GetFnPtr(InterpState &S, CodePtr OpPC, const Function *Func) { diff --git a/clang/lib/AST/Interp/InterpFrame.cpp b/clang/lib/AST/Interp/InterpFrame.cpp index bf2cca733b66bb..f69ff06b5e81b5 100644 --- a/clang/lib/AST/Interp/InterpFrame.cpp +++ b/clang/lib/AST/Interp/InterpFrame.cpp @@ -22,10 +22,10 @@ using namespace clang; using namespace clang::interp; InterpFrame::InterpFrame(InterpState &S, const Function *Func, - InterpFrame *Caller, CodePtr RetPC) + InterpFrame *Caller, CodePtr RetPC, unsigned ArgSize) : Caller(Caller), S(S), Depth(Caller ? Caller->Depth + 1 : 0), Func(Func), - RetPC(RetPC), ArgSize(Func ? Func->getArgSize() : 0), - Args(static_cast(S.Stk.top())), FrameOffset(S.Stk.size()) { + RetPC(RetPC), ArgSize(ArgSize), Args(static_cast(S.Stk.top())), + FrameOffset(S.Stk.size()) { if (!Func) return; @@ -43,8 +43,9 @@ InterpFrame::InterpFrame(InterpState &S, const Function *Func, } } -InterpFrame::InterpFrame(InterpState &S, const Function *Func, CodePtr RetPC) - : InterpFrame(S, Func, S.Current, RetPC) { +InterpFrame::InterpFrame(InterpState &S, const Function *Func, CodePtr RetPC, + unsigned VarArgSize) + : InterpFrame(S, Func, S.Current, RetPC, Func->getArgSize() + VarArgSize) { // As per our calling convention, the this pointer is // part of the ArgSize. // If the function has RVO, the RVO pointer is first. diff --git a/clang/lib/AST/Interp/InterpFrame.h b/clang/lib/AST/Interp/InterpFrame.h index cba4f9560bf56a..322d5dcfa698ae 100644 --- a/clang/lib/AST/Interp/InterpFrame.h +++ b/clang/lib/AST/Interp/InterpFrame.h @@ -32,13 +32,14 @@ class InterpFrame final : public Frame { /// Creates a new frame for a method call. InterpFrame(InterpState &S, const Function *Func, InterpFrame *Caller, - CodePtr RetPC); + CodePtr RetPC, unsigned ArgSize); /// Creates a new frame with the values that make sense. /// I.e., the caller is the current frame of S, /// the This() pointer is the current Pointer on the top of S's stack, /// and the RVO pointer is before that. - InterpFrame(InterpState &S, const Function *Func, CodePtr RetPC); + InterpFrame(InterpState &S, const Function *Func, CodePtr RetPC, + unsigned VarArgSize = 0); /// Destroys the frame, killing all live pointers to stack slots. ~InterpFrame(); diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td index 7f5bd7e5b44bca..f1b08944a8812e 100644 --- a/clang/lib/AST/Interp/Opcodes.td +++ b/clang/lib/AST/Interp/Opcodes.td @@ -191,12 +191,12 @@ def NoRet : Opcode {} def Call : Opcode { - let Args = [ArgFunction]; + let Args = [ArgFunction, ArgUint32]; let Types = []; } def CallVirt : Opcode { - let Args = [ArgFunction]; + let Args = [ArgFunction, ArgUint32]; let Types = []; } @@ -206,7 +206,12 @@ def CallBI : Opcode { } def CallPtr : Opcode { - let Args = []; + let Args = [ArgUint32]; + let Types = []; +} + +def CallVar : Opcode { + let Args = [ArgFunction, ArgUint32]; let Types = []; } diff --git a/clang/test/AST/Interp/functions.cpp b/clang/test/AST/Interp/functions.cpp index 6e995ce704e394..7b8278cf13aa88 100644 --- a/clang/test/AST/Interp/functions.cpp +++ b/clang/test/AST/Interp/functions.cpp @@ -381,6 +381,81 @@ namespace Variadic { constexpr int (*VFP)(...) = variadic_function2; static_assert(VFP() == 12, ""); + + /// Member functions + struct Foo { + int a = 0; + constexpr void bla(...) {} + constexpr S bla2(...) { + return S{12, true}; + } + constexpr Foo(...) : a(1337) {} + constexpr Foo(void *c, bool b, void*p, ...) : a('a' + b) {} + constexpr Foo(int a, const S* s, ...) : a(a) {} + }; + + constexpr int foo2() { + Foo f(1, nullptr); + auto s = f.bla2(1, 2, S{1, false}); + return s.a + s.b; + } + static_assert(foo2() == 13, ""); + + constexpr Foo _f = 123; + static_assert(_f.a == 1337, ""); + + constexpr Foo __f(nullptr, false, nullptr, nullptr, 'a', Foo()); + static_assert(__f.a == 'a', ""); + + +#if __cplusplus >= 202002L +namespace VariadicVirtual { + class A { + public: + constexpr virtual void foo(int &a, ...) { + a = 1; + } + }; + + class B : public A { + public: + constexpr void foo(int &a, ...) override { + a = 2; + } + }; + + constexpr int foo() { + B b; + int a; + b.foo(a, 1,2,nullptr); + return a; + } + static_assert(foo() == 2, ""); +} // VariadicVirtual + +namespace VariadicQualified { + class A { + public: + constexpr virtual int foo(...) const { + return 5; + } + }; + class B : public A {}; + class C : public B { + public: + constexpr int foo(...) const override { + return B::foo(1,2,3); // B doesn't have a foo(), so this should call A::foo(). + } + constexpr int foo2() const { + return this->A::foo(1,2,3,this); + } + }; + constexpr C c; + static_assert(c.foo() == 5); + static_assert(c.foo2() == 5); +} // VariadicQualified +#endif + } namespace Packs { From 562790f371f230d8f67a1a8fb4b54e02e8d1e31f Mon Sep 17 00:00:00 2001 From: Abhinav271828 <71174780+Abhinav271828@users.noreply.github.com> Date: Thu, 15 Feb 2024 11:03:32 +0530 Subject: [PATCH 213/240] [MLIR][Presburger] Implement vertex enumeration and chamber decomposition for polytope generating function computation. (#78987) We implement a function to compute the generating function corresponding to a full-dimensional parametric polytope whose tangent cones are all unimodular. We fix a bug in unimodGenFunc to check the absolute value of the index. We also implement Matrix::negateMatrix() and Matrix::scaleRow for convenience. --- .../mlir/Analysis/Presburger/Barvinok.h | 54 +++- .../Analysis/Presburger/GeneratingFunction.h | 2 +- .../Analysis/Presburger/IntegerRelation.h | 31 ++ .../include/mlir/Analysis/Presburger/Matrix.h | 26 ++ .../Analysis/Presburger/PresburgerRelation.h | 4 + .../mlir/Analysis/Presburger/Simplex.h | 6 + mlir/include/mlir/Analysis/Presburger/Utils.h | 2 + mlir/lib/Analysis/Presburger/Barvinok.cpp | 304 +++++++++++++++++- .../Analysis/Presburger/IntegerRelation.cpp | 26 ++ mlir/lib/Analysis/Presburger/Matrix.cpp | 76 +++++ .../Presburger/PresburgerRelation.cpp | 6 + mlir/lib/Analysis/Presburger/Simplex.cpp | 13 + mlir/lib/Analysis/Presburger/Utils.cpp | 6 +- .../Analysis/Presburger/BarvinokTest.cpp | 75 ++++- 14 files changed, 615 insertions(+), 16 deletions(-) diff --git a/mlir/include/mlir/Analysis/Presburger/Barvinok.h b/mlir/include/mlir/Analysis/Presburger/Barvinok.h index b70ec33b693235..f730a07393331a 100644 --- a/mlir/include/mlir/Analysis/Presburger/Barvinok.h +++ b/mlir/include/mlir/Analysis/Presburger/Barvinok.h @@ -27,7 +27,9 @@ #include "mlir/Analysis/Presburger/GeneratingFunction.h" #include "mlir/Analysis/Presburger/IntegerRelation.h" #include "mlir/Analysis/Presburger/Matrix.h" +#include "mlir/Analysis/Presburger/PresburgerRelation.h" #include "mlir/Analysis/Presburger/QuasiPolynomial.h" +#include #include namespace mlir { @@ -47,16 +49,22 @@ using PolyhedronV = IntMatrix; using ConeH = PolyhedronH; using ConeV = PolyhedronV; -inline ConeH defineHRep(int numVars) { +inline PolyhedronH defineHRep(int numVars, int numSymbols = 0) { // We don't distinguish between domain and range variables, so // we set the number of domain variables as 0 and the number of // range variables as the number of actual variables. - // There are no symbols (we don't work with parametric cones) and no local - // (existentially quantified) variables. + // + // numSymbols is the number of parameters. + // + // There are no local (existentially quantified) variables. + // + // The number of symbols is the number of parameters. By default, we consider + // nonparametric polyhedra. + // // Once the cone is defined, we use `addInequality()` to set inequalities. - return ConeH(PresburgerSpace::getSetSpace(/*numDims=*/numVars, - /*numSymbols=*/0, - /*numLocals=*/0)); + return PolyhedronH(PresburgerSpace::getSetSpace(/*numDims=*/numVars, + /*numSymbols=*/numSymbols, + /*numLocals=*/0)); } /// Get the index of a cone, i.e., the volume of the parallelepiped @@ -81,8 +89,38 @@ ConeH getDual(ConeV cone); /// Compute the generating function for a unimodular cone. /// The input cone must be unimodular; it assert-fails otherwise. -GeneratingFunction unimodularConeGeneratingFunction(ParamPoint vertex, int sign, - ConeH cone); +GeneratingFunction computeUnimodularConeGeneratingFunction(ParamPoint vertex, + int sign, + ConeH cone); + +/// Find the solution of a set of equations that express affine constraints +/// between a set of variables and a set of parameters. The solution expresses +/// each variable as an affine function of the parameters. +/// +/// If there is no solution, return null. +std::optional solveParametricEquations(FracMatrix equations); + +/// Given a list of possibly intersecting regions (PresburgerSet) and the +/// generating functions active in each region, produce a pairwise disjoint +/// list of regions (chambers) and identify the generating function of the +/// polytope in each chamber. +/// +/// "Disjoint" here means that the intersection of two chambers is no full- +/// dimensional. +/// +/// The returned list partitions the universe into parts depending on which +/// subset of GFs is active there, and gives the sum of active GFs for each +/// part. +std::vector> +computeChamberDecomposition( + unsigned numSymbols, ArrayRef> + regionsAndGeneratingFunctions); + +/// Compute the generating function corresponding to a polytope. +/// +/// All tangent cones of the polytope must be unimodular. +std::vector> +computePolytopeGeneratingFunction(const PolyhedronH &poly); /// Find a vector that is not orthogonal to any of the given vectors, /// i.e., has nonzero dot product with those of the given vectors diff --git a/mlir/include/mlir/Analysis/Presburger/GeneratingFunction.h b/mlir/include/mlir/Analysis/Presburger/GeneratingFunction.h index c38eab6efd0fc1..db5b6b6a959186 100644 --- a/mlir/include/mlir/Analysis/Presburger/GeneratingFunction.h +++ b/mlir/include/mlir/Analysis/Presburger/GeneratingFunction.h @@ -72,7 +72,7 @@ class GeneratingFunction { return denominators; } - GeneratingFunction operator+(GeneratingFunction &gf) const { + GeneratingFunction operator+(const GeneratingFunction &gf) const { assert(numParam == gf.getNumParams() && "two generating functions with different numbers of parameters " "cannot be added!"); diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index c476a022a48272..27dc382c1d5dbe 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -711,6 +711,17 @@ class IntegerRelation { /// return `this \ set`. PresburgerRelation subtract(const PresburgerRelation &set) const; + // Remove equalities which have only zero coefficients. + void removeTrivialEqualities(); + + // Verify whether the relation is full-dimensional, i.e., + // no equality holds for the relation. + // + // If there are no variables, it always returns true. + // If there is at least one variable and the relation is empty, it returns + // false. + bool isFullDim(); + void print(raw_ostream &os) const; void dump() const; @@ -871,6 +882,26 @@ class IntegerPolyhedron : public IntegerRelation { /*numReservedEqualities=*/0, /*numReservedCols=*/space.getNumVars() + 1, space) {} + /// Constructs a relation with the specified number of dimensions and symbols + /// and adds the given inequalities. + explicit IntegerPolyhedron(const PresburgerSpace &space, + IntMatrix inequalities) + : IntegerPolyhedron(space) { + for (unsigned i = 0, e = inequalities.getNumRows(); i < e; i++) + addInequality(inequalities.getRow(i)); + } + + /// Constructs a relation with the specified number of dimensions and symbols + /// and adds the given inequalities, after normalizing row-wise to integer + /// values. + explicit IntegerPolyhedron(const PresburgerSpace &space, + FracMatrix inequalities) + : IntegerPolyhedron(space) { + IntMatrix ineqsNormalized = inequalities.normalizeRows(); + for (unsigned i = 0, e = inequalities.getNumRows(); i < e; i++) + addInequality(ineqsNormalized.getRow(i)); + } + /// Construct a set from an IntegerRelation. The relation should have /// no domain vars. explicit IntegerPolyhedron(const IntegerRelation &rel) diff --git a/mlir/include/mlir/Analysis/Presburger/Matrix.h b/mlir/include/mlir/Analysis/Presburger/Matrix.h index 0d4a593a95b1c9..4484ebc747e61c 100644 --- a/mlir/include/mlir/Analysis/Presburger/Matrix.h +++ b/mlir/include/mlir/Analysis/Presburger/Matrix.h @@ -20,6 +20,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/raw_ostream.h" +#include #include namespace mlir { @@ -73,6 +74,8 @@ class Matrix { T operator()(unsigned row, unsigned column) const { return at(row, column); } + bool operator==(const Matrix &m) const; + /// Swap the given columns. void swapColumns(unsigned column, unsigned otherColumn); @@ -142,6 +145,9 @@ class Matrix { /// Add `scale` multiples of the rowVec row to the specified row. void addToRow(unsigned row, ArrayRef rowVec, const T &scale); + /// Multiply the specified row by a factor of `scale`. + void scaleRow(unsigned row, const T &scale); + /// Add `scale` multiples of the source column to the target column. void addToColumn(unsigned sourceColumn, unsigned targetColumn, const T &scale); @@ -156,6 +162,9 @@ class Matrix { /// Negate the specified row. void negateRow(unsigned row); + /// Negate the entire matrix. + void negateMatrix(); + /// The given vector is interpreted as a row vector v. Post-multiply v with /// this matrix, say M, and return vM. SmallVector preMultiplyWithRow(ArrayRef rowVec) const; @@ -184,6 +193,19 @@ class Matrix { // Transpose the matrix without modifying it. Matrix transpose() const; + // Copy the cells in the intersection of + // the rows between `fromRows` and `toRows` and + // the columns between `fromColumns` and `toColumns`, both inclusive. + Matrix getSubMatrix(unsigned fromRow, unsigned toRow, unsigned fromColumn, + unsigned toColumn) const; + + /// Split the rows of a matrix into two matrices according to which bits are + /// 1 and which are 0 in a given bitset. + /// + /// The first matrix returned has the rows corresponding to 1 and the second + /// corresponding to 2. + std::pair, Matrix> splitByBitset(ArrayRef indicator); + /// Print the matrix. void print(raw_ostream &os) const; void dump() const; @@ -297,6 +319,10 @@ class FracMatrix : public Matrix { // paper](https://www.cs.cmu.edu/~avrim/451f11/lectures/lect1129_LLL.pdf) // calls `y`, usually 3/4. void LLL(Fraction delta); + + // Multiply each row of the matrix by the LCM of the denominators, thereby + // converting it to an integer matrix. + IntMatrix normalizeRows() const; }; } // namespace presburger diff --git a/mlir/include/mlir/Analysis/Presburger/PresburgerRelation.h b/mlir/include/mlir/Analysis/Presburger/PresburgerRelation.h index c6b00eca90733a..9634df6d58a1a1 100644 --- a/mlir/include/mlir/Analysis/Presburger/PresburgerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/PresburgerRelation.h @@ -217,6 +217,10 @@ class PresburgerRelation { /// redundencies. PresburgerRelation simplify() const; + /// Return whether the given PresburgerRelation is full-dimensional. By full- + /// dimensional we mean that it is not flat along any dimension. + bool isFullDim() const; + /// Print the set's internal state. void print(raw_ostream &os) const; void dump() const; diff --git a/mlir/include/mlir/Analysis/Presburger/Simplex.h b/mlir/include/mlir/Analysis/Presburger/Simplex.h index 9482f69b31cd66..7ee74c150867c1 100644 --- a/mlir/include/mlir/Analysis/Presburger/Simplex.h +++ b/mlir/include/mlir/Analysis/Presburger/Simplex.h @@ -771,6 +771,12 @@ class Simplex : public SimplexBase { std::pair, MaybeOptimum> computeIntegerBounds(ArrayRef coeffs); + /// Check if the simplex takes only one rational value along the + /// direction of `coeffs`. + /// + /// `this` must be nonempty. + bool isFlatAlong(ArrayRef coeffs); + /// Returns true if the polytope is unbounded, i.e., extends to infinity in /// some direction. Otherwise, returns false. bool isUnbounded(); diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h index e6d29e4ef6d062..38262a65f97542 100644 --- a/mlir/include/mlir/Analysis/Presburger/Utils.h +++ b/mlir/include/mlir/Analysis/Presburger/Utils.h @@ -286,6 +286,8 @@ Fraction dotProduct(ArrayRef a, ArrayRef b); std::vector multiplyPolynomials(ArrayRef a, ArrayRef b); +bool isRangeZero(ArrayRef arr); + } // namespace presburger } // namespace mlir diff --git a/mlir/lib/Analysis/Presburger/Barvinok.cpp b/mlir/lib/Analysis/Presburger/Barvinok.cpp index d2752ded6b43f5..b6d1f99df8ba55 100644 --- a/mlir/lib/Analysis/Presburger/Barvinok.cpp +++ b/mlir/lib/Analysis/Presburger/Barvinok.cpp @@ -10,6 +10,7 @@ #include "mlir/Analysis/Presburger/Utils.h" #include "llvm/ADT/Sequence.h" #include +#include using namespace mlir; using namespace presburger; @@ -76,7 +77,8 @@ MPInt mlir::presburger::detail::getIndex(ConeV cone) { /// num is computed by expressing the vertex as a weighted /// sum of the generators, and then taking the floor of the /// coefficients. -GeneratingFunction mlir::presburger::detail::unimodularConeGeneratingFunction( +GeneratingFunction +mlir::presburger::detail::computeUnimodularConeGeneratingFunction( ParamPoint vertex, int sign, ConeH cone) { // Consider a cone with H-representation [0 -1]. // [-1 -2] @@ -84,7 +86,7 @@ GeneratingFunction mlir::presburger::detail::unimodularConeGeneratingFunction( // [-1 -1/2 1] // `cone` must be unimodular. - assert(getIndex(getDual(cone)) == 1 && "input cone is not unimodular!"); + assert(abs(getIndex(getDual(cone))) == 1 && "input cone is not unimodular!"); unsigned numVar = cone.getNumVars(); unsigned numIneq = cone.getNumInequalities(); @@ -147,6 +149,304 @@ GeneratingFunction mlir::presburger::detail::unimodularConeGeneratingFunction( std::vector({denominator})); } +/// We use Gaussian elimination to find the solution to a set of d equations +/// of the form +/// a_1 x_1 + ... + a_d x_d + b_1 m_1 + ... + b_p m_p + c = 0 +/// where x_i are variables, +/// m_i are parameters and +/// a_i, b_i, c are rational coefficients. +/// +/// The solution expresses each x_i as an affine function of the m_i, and is +/// therefore represented as a matrix of size d x (p+1). +/// If there is no solution, we return null. +std::optional +mlir::presburger::detail::solveParametricEquations(FracMatrix equations) { + // equations is a d x (d + p + 1) matrix. + // Each row represents an equation. + unsigned d = equations.getNumRows(); + unsigned numCols = equations.getNumColumns(); + + // If the determinant is zero, there is no unique solution. + // Thus we return null. + if (FracMatrix(equations.getSubMatrix(/*fromRow=*/0, /*toRow=*/d - 1, + /*fromColumn=*/0, + /*toColumn=*/d - 1)) + .determinant() == 0) + return std::nullopt; + + // Perform row operations to make each column all zeros except for the + // diagonal element, which is made to be one. + for (unsigned i = 0; i < d; ++i) { + // First ensure that the diagonal element is nonzero, by swapping + // it with a row that is non-zero at column i. + if (equations(i, i) != 0) + continue; + for (unsigned j = i + 1; j < d; ++j) { + if (equations(j, i) == 0) + continue; + equations.swapRows(j, i); + break; + } + + Fraction diagElement = equations(i, i); + + // Apply row operations to make all elements except the diagonal to zero. + for (unsigned j = 0; j < d; ++j) { + if (i == j) + continue; + if (equations(j, i) == 0) + continue; + // Apply row operations to make element (j, i) zero by subtracting the + // ith row, appropriately scaled. + Fraction currentElement = equations(j, i); + equations.addToRow(/*sourceRow=*/i, /*targetRow=*/j, + /*scale=*/-currentElement / diagElement); + } + } + + // Rescale diagonal elements to 1. + for (unsigned i = 0; i < d; ++i) + equations.scaleRow(i, 1 / equations(i, i)); + + // Now we have reduced the equations to the form + // x_i + b_1' m_1 + ... + b_p' m_p + c' = 0 + // i.e. each variable appears exactly once in the system, and has coefficient + // one. + // + // Thus we have + // x_i = - b_1' m_1 - ... - b_p' m_p - c + // and so we return the negation of the last p + 1 columns of the matrix. + // + // We copy these columns and return them. + ParamPoint vertex = + equations.getSubMatrix(/*fromRow=*/0, /*toRow=*/d - 1, + /*fromColumn=*/d, /*toColumn=*/numCols - 1); + vertex.negateMatrix(); + return vertex; +} + +/// This is an implementation of the Clauss-Loechner algorithm for chamber +/// decomposition. +/// +/// We maintain a list of pairwise disjoint chambers and the generating +/// functions corresponding to each one. We iterate over the list of regions, +/// each time adding the current region's generating function to the chambers +/// where it is active and separating the chambers where it is not. +/// +/// Given the region each generating function is active in, for each subset of +/// generating functions the region that (the sum of) precisely this subset is +/// in, is the intersection of the regions that these are active in, +/// intersected with the complements of the remaining regions. +std::vector> +mlir::presburger::detail::computeChamberDecomposition( + unsigned numSymbols, ArrayRef> + regionsAndGeneratingFunctions) { + assert(!regionsAndGeneratingFunctions.empty() && + "there must be at least one chamber!"); + // We maintain a list of regions and their associated generating function + // initialized with the universe and the empty generating function. + std::vector> chambers = { + {PresburgerSet::getUniverse(PresburgerSpace::getSetSpace(numSymbols)), + GeneratingFunction(numSymbols, {}, {}, {})}}; + + // We iterate over the region list. + // + // For each activity region R_j (corresponding to the generating function + // gf_j), we examine all the current chambers R_i. + // + // If R_j has a full-dimensional intersection with an existing chamber R_i, + // then that chamber is replaced by two new ones: + // 1. the intersection R_i \cap R_j, where the generating function is + // gf_i + gf_j. + // 2. the difference R_i - R_j, where the generating function is gf_i. + // + // At each step, we define a new chamber list after considering gf_j, + // replacing and appending chambers as discussed above. + // + // The loop has the invariant that the union over all the chambers gives the + // universe at every step. + for (const auto &[region, generatingFunction] : + regionsAndGeneratingFunctions) { + std::vector> newChambers; + + for (const auto &[currentRegion, currentGeneratingFunction] : chambers) { + PresburgerSet intersection = currentRegion.intersect(region); + + // If the intersection is not full-dimensional, we do not modify + // the chamber list. + if (!intersection.isFullDim()) { + newChambers.emplace_back(currentRegion, currentGeneratingFunction); + continue; + } + + // If it is, we add the intersection and the difference as chambers. + newChambers.emplace_back(intersection, + currentGeneratingFunction + generatingFunction); + newChambers.emplace_back(currentRegion.subtract(region), + currentGeneratingFunction); + } + chambers = std::move(newChambers); + } + + return chambers; +} + +/// For a polytope expressed as a set of n inequalities, compute the generating +/// function corresponding to the lattice points included in the polytope. This +/// algorithm has three main steps: +/// 1. Enumerate the vertices, by iterating over subsets of inequalities and +/// checking for satisfiability. For each d-subset of inequalities (where d +/// is the number of variables), we solve to obtain the vertex in terms of +/// the parameters, and then check for the region in parameter space where +/// this vertex satisfies the remaining (n - d) inequalities. +/// 2. For each vertex, identify the tangent cone and compute the generating +/// function corresponding to it. The generating function depends on the +/// parametric expression of the vertex and the (non-parametric) generators +/// of the tangent cone. +/// 3. [Clauss-Loechner decomposition] Identify the regions in parameter space +/// (chambers) where each vertex is active, and accordingly compute the +/// GF of the polytope in each chamber. +/// +/// Verdoolaege, Sven, et al. "Counting integer points in parametric +/// polytopes using Barvinok's rational functions." Algorithmica 48 (2007): +/// 37-66. +std::vector> +mlir::presburger::detail::computePolytopeGeneratingFunction( + const PolyhedronH &poly) { + unsigned numVars = poly.getNumRangeVars(); + unsigned numSymbols = poly.getNumSymbolVars(); + unsigned numIneqs = poly.getNumInequalities(); + + // We store a list of the computed vertices. + std::vector vertices; + // For each vertex, we store the corresponding active region and the + // generating functions of the tangent cone, in order. + std::vector> + regionsAndGeneratingFunctions; + + // We iterate over all subsets of inequalities with cardinality numVars, + // using permutations of numVars 1's and (numIneqs - numVars) 0's. + // + // For a given permutation, we consider a subset which contains + // the i'th inequality if the i'th bit in the bitset is 1. + // + // We start with the permutation that takes the last numVars inequalities. + SmallVector indicator(numIneqs); + for (unsigned i = numIneqs - numVars; i < numIneqs; ++i) + indicator[i] = 1; + + do { + // Collect the inequalities corresponding to the bits which are set + // and the remaining ones. + auto [subset, remainder] = poly.getInequalities().splitByBitset(indicator); + // All other inequalities are stored in a2 and b2c2. + // + // These are column-wise splits of the inequalities; + // a2 stores the coefficients of the variables, and + // b2c2 stores the coefficients of the parameters and the constant term. + FracMatrix a2(numIneqs - numVars, numVars); + FracMatrix b2c2(numIneqs - numVars, numSymbols + 1); + a2 = FracMatrix( + remainder.getSubMatrix(0, numIneqs - numVars - 1, 0, numVars - 1)); + b2c2 = FracMatrix(remainder.getSubMatrix(0, numIneqs - numVars - 1, numVars, + numVars + numSymbols)); + + // Find the vertex, if any, corresponding to the current subset of + // inequalities. + std::optional vertex = + solveParametricEquations(FracMatrix(subset)); // d x (p+1) + + if (!vertex) + continue; + if (std::find(vertices.begin(), vertices.end(), vertex) != vertices.end()) + continue; + // If this subset corresponds to a vertex that has not been considered, + // store it. + vertices.push_back(*vertex); + + // If a vertex is formed by the intersection of more than d facets, we + // assume that any d-subset of these facets can be solved to obtain its + // expression. This assumption is valid because, if the vertex has two + // distinct parametric expressions, then a nontrivial equality among the + // parameters holds, which is a contradiction as we know the parameter + // space to be full-dimensional. + + // Let the current vertex be [X | y], where + // X represents the coefficients of the parameters and + // y represents the constant term. + // + // The region (in parameter space) where this vertex is active is given + // by substituting the vertex into the *remaining* inequalities of the + // polytope (those which were not collected into `subset`), i.e., into the + // inequalities [A2 | B2 | c2]. + // + // Thus, the coefficients of the parameters after substitution become + // (A2 • X + B2) + // and the constant terms become + // (A2 • y + c2). + // + // The region is therefore given by + // (A2 • X + B2) p + (A2 • y + c2) ≥ 0 + // + // This is equivalent to A2 • [X | y] + [B2 | c2]. + // + // Thus we premultiply [X | y] with each row of A2 + // and add each row of [B2 | c2]. + FracMatrix activeRegion(numIneqs - numVars, numSymbols + 1); + for (unsigned i = 0; i < numIneqs - numVars; i++) { + activeRegion.setRow(i, vertex->preMultiplyWithRow(a2.getRow(i))); + activeRegion.addToRow(i, b2c2.getRow(i), 1); + } + + // We convert the representation of the active region to an integers-only + // form so as to store it as a PresburgerSet. + IntegerPolyhedron activeRegionRel( + PresburgerSpace::getRelationSpace(0, numSymbols, 0, 0), activeRegion); + + // Now, we compute the generating function at this vertex. + // We collect the inequalities corresponding to each vertex to compute + // the tangent cone at that vertex. + // + // We only need the coefficients of the variables (NOT the parameters) + // as the generating function only depends on these. + // We translate the cones to be pointed at the origin by making the + // constant terms zero. + ConeH tangentCone = defineHRep(numVars); + for (unsigned j = 0, e = subset.getNumRows(); j < e; ++j) { + SmallVector ineq(numVars + 1); + for (unsigned k = 0; k < numVars; ++k) + ineq[k] = subset(j, k); + tangentCone.addInequality(ineq); + } + // We assume that the tangent cone is unimodular, so there is no need + // to decompose it. + // + // In the general case, the unimodular decomposition may have several + // cones. + GeneratingFunction vertexGf(numSymbols, {}, {}, {}); + SmallVector, 4> unimodCones = {{1, tangentCone}}; + for (std::pair signedCone : unimodCones) { + auto [sign, cone] = signedCone; + vertexGf = vertexGf + + computeUnimodularConeGeneratingFunction(*vertex, sign, cone); + } + // We store the vertex we computed with the generating function of its + // tangent cone. + regionsAndGeneratingFunctions.emplace_back(PresburgerSet(activeRegionRel), + vertexGf); + } while (std::next_permutation(indicator.begin(), indicator.end())); + + // Now, we use Clauss-Loechner decomposition to identify regions in parameter + // space where each vertex is active. These regions (chambers) have the + // property that no two of them have a full-dimensional intersection, i.e., + // they may share "facets" or "edges", but their intersection can only have + // up to numVars - 1 dimensions. + // + // In each chamber, we sum up the generating functions of the active vertices + // to find the generating function of the polytope. + return computeChamberDecomposition(numSymbols, regionsAndGeneratingFunctions); +} + /// We use an iterative procedure to find a vector not orthogonal /// to a given set, ignoring the null vectors. /// Let the inputs be {x_1, ..., x_k}, all vectors of length n. diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp index 7d2a63d17676f5..2ac271e2e05531 100644 --- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -2498,6 +2499,31 @@ void IntegerRelation::printSpace(raw_ostream &os) const { os << getNumConstraints() << " constraints\n"; } +void IntegerRelation::removeTrivialEqualities() { + for (int i = getNumEqualities() - 1; i >= 0; --i) + if (rangeIsZero(getEquality(i))) + removeEquality(i); +} + +bool IntegerRelation::isFullDim() { + if (getNumVars() == 0) + return true; + if (isEmpty()) + return false; + + // If there is a non-trivial equality, the space cannot be full-dimensional. + removeTrivialEqualities(); + if (getNumEqualities() > 0) + return false; + + // The polytope is full-dimensional iff it is not flat along any of the + // inequality directions. + Simplex simplex(*this); + return llvm::none_of(llvm::seq(getNumInequalities()), [&](int i) { + return simplex.isFlatAlong(getInequality(i)); + }); +} + void IntegerRelation::print(raw_ostream &os) const { assert(hasConsistentState()); printSpace(os); diff --git a/mlir/lib/Analysis/Presburger/Matrix.cpp b/mlir/lib/Analysis/Presburger/Matrix.cpp index bd7f7f58a932f3..4cb6e6b16bc878 100644 --- a/mlir/lib/Analysis/Presburger/Matrix.cpp +++ b/mlir/lib/Analysis/Presburger/Matrix.cpp @@ -29,6 +29,22 @@ Matrix::Matrix(unsigned rows, unsigned columns, unsigned reservedRows, data.reserve(std::max(nRows, reservedRows) * nReservedColumns); } +/// We cannot use the default implementation of operator== as it compares +/// fields like `reservedColumns` etc., which are not part of the data. +template +bool Matrix::operator==(const Matrix &m) const { + if (nRows != m.getNumRows()) + return false; + if (nColumns != m.getNumColumns()) + return false; + + for (unsigned i = 0; i < nRows; i++) + if (getRow(i) != m.getRow(i)) + return false; + + return true; +} + template Matrix Matrix::identity(unsigned dimension) { Matrix matrix(dimension, dimension); @@ -295,6 +311,12 @@ void Matrix::addToRow(unsigned row, ArrayRef rowVec, const T &scale) { at(row, col) += scale * rowVec[col]; } +template +void Matrix::scaleRow(unsigned row, const T &scale) { + for (unsigned col = 0; col < nColumns; ++col) + at(row, col) *= scale; +} + template void Matrix::addToColumn(unsigned sourceColumn, unsigned targetColumn, const T &scale) { @@ -316,6 +338,12 @@ void Matrix::negateRow(unsigned row) { at(row, column) = -at(row, column); } +template +void Matrix::negateMatrix() { + for (unsigned row = 0; row < nRows; ++row) + negateRow(row); +} + template SmallVector Matrix::preMultiplyWithRow(ArrayRef rowVec) const { assert(rowVec.size() == getNumRows() && "Invalid row vector dimension!"); @@ -354,6 +382,22 @@ static void modEntryColumnOperation(Matrix &m, unsigned row, otherMatrix.addToColumn(sourceCol, targetCol, ratio); } +template +Matrix Matrix::getSubMatrix(unsigned fromRow, unsigned toRow, + unsigned fromColumn, + unsigned toColumn) const { + assert(fromRow <= toRow && "end of row range must be after beginning!"); + assert(toRow < nRows && "end of row range out of bounds!"); + assert(fromColumn <= toColumn && + "end of column range must be after beginning!"); + assert(toColumn < nColumns && "end of column range out of bounds!"); + Matrix subMatrix(toRow - fromRow + 1, toColumn - fromColumn + 1); + for (unsigned i = fromRow; i <= toRow; ++i) + for (unsigned j = fromColumn; j <= toColumn; ++j) + subMatrix(i - fromRow, j - fromColumn) = at(i, j); + return subMatrix; +} + template void Matrix::print(raw_ostream &os) const { for (unsigned row = 0; row < nRows; ++row) { @@ -363,6 +407,21 @@ void Matrix::print(raw_ostream &os) const { } } +/// We iterate over the `indicator` bitset, checking each bit. If a bit is 1, +/// we append it to one matrix, and if it is zero, we append it to the other. +template +std::pair, Matrix> +Matrix::splitByBitset(ArrayRef indicator) { + Matrix rowsForOne(0, nColumns), rowsForZero(0, nColumns); + for (unsigned i = 0; i < nRows; i++) { + if (indicator[i] == 1) + rowsForOne.appendExtraRow(getRow(i)); + else + rowsForZero.appendExtraRow(getRow(i)); + } + return {rowsForOne, rowsForZero}; +} + template void Matrix::dump() const { print(llvm::errs()); @@ -697,3 +756,20 @@ void FracMatrix::LLL(Fraction delta) { } } } + +IntMatrix FracMatrix::normalizeRows() const { + unsigned numRows = getNumRows(); + unsigned numColumns = getNumColumns(); + IntMatrix normalized(numRows, numColumns); + + MPInt lcmDenoms = MPInt(1); + for (unsigned i = 0; i < numRows; i++) { + // For a row, first compute the LCM of the denominators. + for (unsigned j = 0; j < numColumns; j++) + lcmDenoms = lcm(lcmDenoms, at(i, j).den); + // Then, multiply by it throughout and convert to integers. + for (unsigned j = 0; j < numColumns; j++) + normalized(i, j) = (at(i, j) * lcmDenoms).getAsInteger(); + } + return normalized; +} diff --git a/mlir/lib/Analysis/Presburger/PresburgerRelation.cpp b/mlir/lib/Analysis/Presburger/PresburgerRelation.cpp index 787fc1c659a12e..3af6baae0e7001 100644 --- a/mlir/lib/Analysis/Presburger/PresburgerRelation.cpp +++ b/mlir/lib/Analysis/Presburger/PresburgerRelation.cpp @@ -1041,6 +1041,12 @@ PresburgerRelation PresburgerRelation::simplify() const { return result; } +bool PresburgerRelation::isFullDim() const { + return llvm::any_of(getAllDisjuncts(), [&](IntegerRelation disjunct) { + return disjunct.isFullDim(); + }); +} + void PresburgerRelation::print(raw_ostream &os) const { os << "Number of Disjuncts: " << getNumDisjuncts() << "\n"; for (const IntegerRelation &disjunct : disjuncts) { diff --git a/mlir/lib/Analysis/Presburger/Simplex.cpp b/mlir/lib/Analysis/Presburger/Simplex.cpp index 42bbc3363d5830..1969cce93ad2e0 100644 --- a/mlir/lib/Analysis/Presburger/Simplex.cpp +++ b/mlir/lib/Analysis/Presburger/Simplex.cpp @@ -2104,6 +2104,19 @@ Simplex::computeIntegerBounds(ArrayRef coeffs) { return {minRoundedUp, maxRoundedDown}; } +bool Simplex::isFlatAlong(ArrayRef coeffs) { + assert(!isEmpty() && "cannot check for flatness of empty simplex!"); + auto upOpt = computeOptimum(Simplex::Direction::Up, coeffs); + auto downOpt = computeOptimum(Simplex::Direction::Down, coeffs); + + if (!upOpt.isBounded()) + return false; + if (!downOpt.isBounded()) + return false; + + return *upOpt == *downOpt; +} + void SimplexBase::print(raw_ostream &os) const { os << "rows = " << getNumRows() << ", columns = " << getNumColumns() << "\n"; if (empty) diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp index a8d860885ef106..f717a4de5d7283 100644 --- a/mlir/lib/Analysis/Presburger/Utils.cpp +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -564,4 +564,8 @@ std::vector presburger::multiplyPolynomials(ArrayRef a, convolution.push_back(sum); } return convolution; -} \ No newline at end of file +} + +bool presburger::isRangeZero(ArrayRef arr) { + return llvm::all_of(arr, [&](Fraction f) { return f == 0; }); +} diff --git a/mlir/unittests/Analysis/Presburger/BarvinokTest.cpp b/mlir/unittests/Analysis/Presburger/BarvinokTest.cpp index 919aaa7a428593..5e279b542fdf95 100644 --- a/mlir/unittests/Analysis/Presburger/BarvinokTest.cpp +++ b/mlir/unittests/Analysis/Presburger/BarvinokTest.cpp @@ -1,5 +1,6 @@ #include "mlir/Analysis/Presburger/Barvinok.h" #include "./Utils.h" +#include "Parser.h" #include #include @@ -59,7 +60,8 @@ TEST(BarvinokTest, unimodularConeGeneratingFunction) { ParamPoint vertex = makeFracMatrix(2, 3, {{2, 2, 0}, {-1, -Fraction(1, 2), 1}}); - GeneratingFunction gf = unimodularConeGeneratingFunction(vertex, 1, cone); + GeneratingFunction gf = + computeUnimodularConeGeneratingFunction(vertex, 1, cone); EXPECT_EQ_REPR_GENERATINGFUNCTION( gf, GeneratingFunction( @@ -74,7 +76,7 @@ TEST(BarvinokTest, unimodularConeGeneratingFunction) { vertex = makeFracMatrix(3, 2, {{5, 2}, {6, 2}, {7, 1}}); - gf = unimodularConeGeneratingFunction(vertex, 1, cone); + gf = computeUnimodularConeGeneratingFunction(vertex, 1, cone); EXPECT_EQ_REPR_GENERATINGFUNCTION( gf, @@ -125,7 +127,7 @@ TEST(BarvinokTest, getCoefficientInRationalFunction) { EXPECT_EQ(coeff.getConstantTerm(), Fraction(55, 64)); } -TEST(BarvinokTest, computeNumTerms) { +TEST(BarvinokTest, computeNumTermsCone) { // The following test is taken from // Verdoolaege, Sven, et al. "Counting integer points in parametric // polytopes using Barvinok's rational functions." Algorithmica 48 (2007): @@ -233,4 +235,69 @@ TEST(BarvinokTest, computeNumTerms) { for (unsigned j = 0; j < 2; j++) for (unsigned k = 0; k < 2; k++) EXPECT_EQ(count[i][j][k], 1); -} \ No newline at end of file +} + +/// We define some simple polyhedra with unimodular tangent cones and verify +/// that the returned generating functions correspond to those calculated by +/// hand. +TEST(BarvinokTest, computeNumTermsPolytope) { + // A cube of side 1. + PolyhedronH poly = + parseRelationFromSet("(x, y, z) : (x >= 0, y >= 0, z >= 0, -x + 1 >= 0, " + "-y + 1 >= 0, -z + 1 >= 0)", + 0); + + std::vector> count = + computePolytopeGeneratingFunction(poly); + // There is only one chamber, as it is non-parametric. + EXPECT_EQ(count.size(), 9u); + + GeneratingFunction gf = count[0].second; + EXPECT_EQ_REPR_GENERATINGFUNCTION( + gf, + GeneratingFunction( + 0, {1, 1, 1, 1, 1, 1, 1, 1}, + {makeFracMatrix(1, 3, {{1, 1, 1}}), makeFracMatrix(1, 3, {{0, 1, 1}}), + makeFracMatrix(1, 3, {{0, 1, 1}}), makeFracMatrix(1, 3, {{0, 0, 1}}), + makeFracMatrix(1, 3, {{0, 1, 1}}), makeFracMatrix(1, 3, {{0, 0, 1}}), + makeFracMatrix(1, 3, {{0, 0, 1}}), + makeFracMatrix(1, 3, {{0, 0, 0}})}, + {{{-1, 0, 0}, {0, -1, 0}, {0, 0, -1}}, + {{0, 0, 1}, {-1, 0, 0}, {0, -1, 0}}, + {{0, 1, 0}, {-1, 0, 0}, {0, 0, -1}}, + {{0, 1, 0}, {0, 0, 1}, {-1, 0, 0}}, + {{1, 0, 0}, {0, -1, 0}, {0, 0, -1}}, + {{1, 0, 0}, {0, 0, 1}, {0, -1, 0}}, + {{1, 0, 0}, {0, 1, 0}, {0, 0, -1}}, + {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}})); + + // A right-angled triangle with side p. + poly = + parseRelationFromSet("(x, y)[N] : (x >= 0, y >= 0, -x - y + N >= 0)", 0); + + count = computePolytopeGeneratingFunction(poly); + // There is only one chamber: p ≥ 0 + EXPECT_EQ(count.size(), 4u); + + gf = count[0].second; + EXPECT_EQ_REPR_GENERATINGFUNCTION( + gf, GeneratingFunction( + 1, {1, 1, 1}, + {makeFracMatrix(2, 2, {{0, 1}, {0, 0}}), + makeFracMatrix(2, 2, {{0, 1}, {0, 0}}), + makeFracMatrix(2, 2, {{0, 0}, {0, 0}})}, + {{{-1, 1}, {-1, 0}}, {{1, -1}, {0, -1}}, {{1, 0}, {0, 1}}})); + + // Cartesian product of a cube with side M and a right triangle with side N. + poly = parseRelationFromSet( + "(x, y, z, w, a)[M, N] : (x >= 0, y >= 0, z >= 0, -x + M >= 0, -y + M >= " + "0, -z + M >= 0, w >= 0, a >= 0, -w - a + N >= 0)", + 0); + + count = computePolytopeGeneratingFunction(poly); + + EXPECT_EQ(count.size(), 25u); + + gf = count[0].second; + EXPECT_EQ(gf.getNumerators().size(), 24u); +} From 6087d7bc0a9d7d4ad2c94a131c2bc427b767c9d7 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Wed, 14 Feb 2024 21:38:39 -0800 Subject: [PATCH 214/240] [clang-format][NFC] Sort options in Format.cpp --- clang/lib/Format/Format.cpp | 54 ++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 56cd9495920c7b..e67b2101f5821b 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1406,29 +1406,28 @@ static void expandPresetsSpacesInParens(FormatStyle &Expanded) { FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { FormatStyle LLVMStyle; - LLVMStyle.InheritsParentConfig = false; - LLVMStyle.Language = Language; LLVMStyle.AccessModifierOffset = -2; - LLVMStyle.AlignEscapedNewlines = FormatStyle::ENAS_Right; LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; LLVMStyle.AlignArrayOfStructures = FormatStyle::AIAS_None; - LLVMStyle.AlignOperands = FormatStyle::OAS_Align; LLVMStyle.AlignConsecutiveAssignments = {}; - LLVMStyle.AlignConsecutiveAssignments.Enabled = false; - LLVMStyle.AlignConsecutiveAssignments.AcrossEmptyLines = false; LLVMStyle.AlignConsecutiveAssignments.AcrossComments = false; + LLVMStyle.AlignConsecutiveAssignments.AcrossEmptyLines = false; LLVMStyle.AlignConsecutiveAssignments.AlignCompound = false; LLVMStyle.AlignConsecutiveAssignments.AlignFunctionPointers = false; + LLVMStyle.AlignConsecutiveAssignments.Enabled = false; LLVMStyle.AlignConsecutiveAssignments.PadOperators = true; LLVMStyle.AlignConsecutiveBitFields = {}; LLVMStyle.AlignConsecutiveDeclarations = {}; LLVMStyle.AlignConsecutiveMacros = {}; LLVMStyle.AlignConsecutiveShortCaseStatements = {}; + LLVMStyle.AlignEscapedNewlines = FormatStyle::ENAS_Right; + LLVMStyle.AlignOperands = FormatStyle::OAS_Align; LLVMStyle.AlignTrailingComments = {}; LLVMStyle.AlignTrailingComments.Kind = FormatStyle::TCAS_Always; LLVMStyle.AlignTrailingComments.OverEmptyLines = 0; LLVMStyle.AllowAllArgumentsOnNextLine = true; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; + LLVMStyle.AllowBreakBeforeNoexceptSpecifier = FormatStyle::BBNSS_Never; LLVMStyle.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Never; LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; LLVMStyle.AllowShortCompoundRequirementOnASingleLine = true; @@ -1439,11 +1438,10 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.AllowShortLoopsOnASingleLine = false; LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; - LLVMStyle.BreakTemplateDeclarations = FormatStyle::BTDS_MultiLine; LLVMStyle.AttributeMacros.push_back("__capability"); - LLVMStyle.BitFieldColonSpacing = FormatStyle::BFCS_Both; LLVMStyle.BinPackArguments = true; LLVMStyle.BinPackParameters = true; + LLVMStyle.BitFieldColonSpacing = FormatStyle::BFCS_Both; LLVMStyle.BracedInitializerIndentWidth = std::nullopt; LLVMStyle.BraceWrapping = {/*AfterCaseLabel=*/false, /*AfterClass=*/false, @@ -1472,11 +1470,11 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; LLVMStyle.BreakBeforeConceptDeclarations = FormatStyle::BBCDS_Always; LLVMStyle.BreakBeforeInlineASMColon = FormatStyle::BBIAS_OnlyMultiline; - LLVMStyle.AllowBreakBeforeNoexceptSpecifier = FormatStyle::BBNSS_Never; LLVMStyle.BreakBeforeTernaryOperators = true; LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon; LLVMStyle.BreakInheritanceList = FormatStyle::BILS_BeforeColon; LLVMStyle.BreakStringLiterals = true; + LLVMStyle.BreakTemplateDeclarations = FormatStyle::BTDS_MultiLine; LLVMStyle.ColumnLimit = 80; LLVMStyle.CommentPragmas = "^ IWYU pragma:"; LLVMStyle.CompactNamespaces = false; @@ -1493,22 +1491,23 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); LLVMStyle.IfMacros.push_back("KJ_IF_MAYBE"); + LLVMStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve; LLVMStyle.IncludeStyle.IncludeCategories = { {"^\"(llvm|llvm-c|clang|clang-c)/", 2, 0, false}, {"^(<|\"(gtest|gmock|isl|json)/)", 3, 0, false}, {".*", 1, 0, false}}; LLVMStyle.IncludeStyle.IncludeIsMainRegex = "(Test)?$"; - LLVMStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve; LLVMStyle.IncludeStyle.MainIncludeChar = tooling::IncludeStyle::MICD_Quote; LLVMStyle.IndentAccessModifiers = false; - LLVMStyle.IndentCaseLabels = false; LLVMStyle.IndentCaseBlocks = false; + LLVMStyle.IndentCaseLabels = false; LLVMStyle.IndentExternBlock = FormatStyle::IEBS_AfterExternBlock; LLVMStyle.IndentGotoLabels = true; LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None; LLVMStyle.IndentRequiresClause = true; LLVMStyle.IndentWidth = 2; LLVMStyle.IndentWrappedFunctionNames = false; + LLVMStyle.InheritsParentConfig = false; LLVMStyle.InsertBraces = false; LLVMStyle.InsertNewlineAtEOF = false; LLVMStyle.InsertTrailingCommas = FormatStyle::TCS_None; @@ -1521,6 +1520,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.KeepEmptyLinesAtEOF = false; LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; LLVMStyle.LambdaBodyIndentation = FormatStyle::LBI_Signature; + LLVMStyle.Language = Language; LLVMStyle.LineEnding = FormatStyle::LE_DeriveLF; LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; @@ -1550,7 +1550,9 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.SpaceAfterLogicalNot = false; LLVMStyle.SpaceAfterTemplateKeyword = true; LLVMStyle.SpaceAroundPointerQualifiers = FormatStyle::SAPQ_Default; + LLVMStyle.SpaceBeforeAssignmentOperators = true; LLVMStyle.SpaceBeforeCaseColon = false; + LLVMStyle.SpaceBeforeCpp11BracedList = false; LLVMStyle.SpaceBeforeCtorInitializerColon = true; LLVMStyle.SpaceBeforeInheritanceColon = true; LLVMStyle.SpaceBeforeJsonColon = false; @@ -1560,8 +1562,6 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.SpaceBeforeParensOptions.AfterForeachMacros = true; LLVMStyle.SpaceBeforeParensOptions.AfterIfMacros = true; LLVMStyle.SpaceBeforeRangeBasedForLoopColon = true; - LLVMStyle.SpaceBeforeAssignmentOperators = true; - LLVMStyle.SpaceBeforeCpp11BracedList = false; LLVMStyle.SpaceBeforeSquareBrackets = false; LLVMStyle.SpaceInEmptyBlock = false; LLVMStyle.SpacesBeforeTrailingComments = 1; @@ -1584,16 +1584,16 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.WhitespaceSensitiveMacros.push_back("STRINGIZE"); LLVMStyle.PenaltyBreakAssignment = prec::Assignment; + LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; LLVMStyle.PenaltyBreakComment = 300; LLVMStyle.PenaltyBreakFirstLessLess = 120; - LLVMStyle.PenaltyBreakString = 1000; - LLVMStyle.PenaltyExcessCharacter = 1000000; - LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; - LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; LLVMStyle.PenaltyBreakOpenParenthesis = 0; LLVMStyle.PenaltyBreakScopeResolution = 500; + LLVMStyle.PenaltyBreakString = 1000; LLVMStyle.PenaltyBreakTemplateDeclaration = prec::Relational; + LLVMStyle.PenaltyExcessCharacter = 1000000; LLVMStyle.PenaltyIndentedWhitespace = 0; + LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; // Defaults that differ when not C++. switch (Language) { @@ -1632,12 +1632,12 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; GoogleStyle.BreakTemplateDeclarations = FormatStyle::BTDS_Yes; GoogleStyle.DerivePointerAlignment = true; + GoogleStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup; GoogleStyle.IncludeStyle.IncludeCategories = {{"^", 2, 0, false}, {"^<.*\\.h>", 1, 0, false}, {"^<.*", 2, 0, false}, {".*", 3, 0, false}}; GoogleStyle.IncludeStyle.IncludeIsMainRegex = "([-_](test|unittest))?$"; - GoogleStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup; GoogleStyle.IndentCaseLabels = true; GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; GoogleStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Never; @@ -1692,8 +1692,8 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.SpacesBeforeTrailingComments = 2; GoogleStyle.Standard = FormatStyle::LS_Auto; - GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; + GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; if (Language == FormatStyle::LK_Java) { GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; @@ -1721,22 +1721,22 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { // TODO: enable once decided, in particular re disabling bin packing. // https://google.github.io/styleguide/jsguide.html#features-arrays-trailing-comma // GoogleStyle.InsertTrailingCommas = FormatStyle::TCS_Wrapped; + GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single; + GoogleStyle.JavaScriptWrapImports = false; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; GoogleStyle.SpacesInContainerLiterals = false; - GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single; - GoogleStyle.JavaScriptWrapImports = false; } else if (Language == FormatStyle::LK_Proto) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; - GoogleStyle.SpacesInContainerLiterals = false; - GoogleStyle.Cpp11BracedListStyle = false; // This affects protocol buffer options specifications and text protos. // Text protos are currently mostly formatted inside C++ raw string literals // and often the current breaking behavior of string literals is not // beneficial there. Investigate turning this on once proper string reflow // has been implemented. GoogleStyle.BreakStringLiterals = false; + GoogleStyle.Cpp11BracedListStyle = false; + GoogleStyle.SpacesInContainerLiterals = false; } else if (Language == FormatStyle::LK_ObjC) { GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.ColumnLimit = 100; @@ -1822,13 +1822,13 @@ FormatStyle getMozillaStyle() { MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; MozillaStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel; - MozillaStyle.BreakTemplateDeclarations = FormatStyle::BTDS_Yes; - MozillaStyle.BinPackParameters = false; MozillaStyle.BinPackArguments = false; + MozillaStyle.BinPackParameters = false; MozillaStyle.BreakAfterReturnType = FormatStyle::RTBS_TopLevel; MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; MozillaStyle.BreakInheritanceList = FormatStyle::BILS_BeforeComma; + MozillaStyle.BreakTemplateDeclarations = FormatStyle::BTDS_Yes; MozillaStyle.ConstructorInitializerIndentWidth = 2; MozillaStyle.ContinuationIndentWidth = 2; MozillaStyle.Cpp11BracedListStyle = false; @@ -1853,8 +1853,8 @@ FormatStyle getWebKitStyle() { Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; Style.BreakBeforeBraces = FormatStyle::BS_WebKit; Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; - Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 0; + Style.Cpp11BracedListStyle = false; Style.FixNamespaceComments = false; Style.IndentWidth = 4; Style.NamespaceIndentation = FormatStyle::NI_Inner; @@ -1873,8 +1873,8 @@ FormatStyle getGNUStyle() { Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; Style.BreakBeforeBraces = FormatStyle::BS_GNU; Style.BreakBeforeTernaryOperators = true; - Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 79; + Style.Cpp11BracedListStyle = false; Style.FixNamespaceComments = false; Style.SpaceBeforeParens = FormatStyle::SBPO_Always; Style.Standard = FormatStyle::LS_Cpp03; From 94f51649c4a574f88fd9b8a2d0a05b334c0bd490 Mon Sep 17 00:00:00 2001 From: riChar Date: Thu, 15 Feb 2024 14:04:53 +0800 Subject: [PATCH 215/240] [LLVM][X86] Add EFLAGS Defs for VERR/VERW instructions (#81824) VERR/VERW instructions will define ZF flag. --- llvm/lib/Target/X86/X86InstrSystem.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 24a334d38f9261..d051047ae46548 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -366,12 +366,14 @@ def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src), def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src), "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB; +let Defs = [EFLAGS] in { def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", []>, TB; def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", []>, TB; let mayLoad = 1 in { def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", []>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", []>, TB; } +} // Defs EFLAGS } // SchedRW //===----------------------------------------------------------------------===// From 6c74a6f6f4f01500f9deabadc7673b4935643928 Mon Sep 17 00:00:00 2001 From: Prabhuk Date: Wed, 14 Feb 2024 22:07:26 -0800 Subject: [PATCH 216/240] Revert "[lldb-dap] Add support for data breakpoint. (#81541)" (#81812) This reverts commit 8c56e78ec531f0e2460213c20fff869b6b7add99. Reverting to address the LLDB test failure in ARM64. --- .../test/tools/lldb-dap/dap_server.py | 47 --- .../tools/lldb-dap/databreakpoint/Makefile | 3 - .../TestDAP_setDataBreakpoints.py | 123 ------- .../tools/lldb-dap/databreakpoint/main.cpp | 17 - lldb/tools/lldb-dap/CMakeLists.txt | 1 - lldb/tools/lldb-dap/DAPForward.h | 2 - lldb/tools/lldb-dap/Watchpoint.cpp | 48 --- lldb/tools/lldb-dap/Watchpoint.h | 34 -- lldb/tools/lldb-dap/lldb-dap.cpp | 307 ++---------------- .../gn/secondary/lldb/tools/lldb-dap/BUILD.gn | 1 - 10 files changed, 34 insertions(+), 549 deletions(-) delete mode 100644 lldb/test/API/tools/lldb-dap/databreakpoint/Makefile delete mode 100644 lldb/test/API/tools/lldb-dap/databreakpoint/TestDAP_setDataBreakpoints.py delete mode 100644 lldb/test/API/tools/lldb-dap/databreakpoint/main.cpp delete mode 100644 lldb/tools/lldb-dap/Watchpoint.cpp delete mode 100644 lldb/tools/lldb-dap/Watchpoint.h diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 27a76a652f4063..bb863bb8719176 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -501,18 +501,6 @@ def get_local_variable_value(self, name, frameIndex=0, threadId=None): return variable["value"] return None - def get_local_variable_child(self, name, child_name, frameIndex=0, threadId=None): - local = self.get_local_variable(name, frameIndex, threadId) - if local["variablesReference"] == 0: - return None - children = self.request_variables(local["variablesReference"])["body"][ - "variables" - ] - for child in children: - if child["name"] == child_name: - return child - return None - def replay_packets(self, replay_file_path): f = open(replay_file_path, "r") mode = "invalid" @@ -907,41 +895,6 @@ def request_setFunctionBreakpoints(self, names, condition=None, hitCondition=Non } return self.send_recv(command_dict) - def request_dataBreakpointInfo( - self, variablesReference, name, frameIndex=0, threadId=None - ): - stackFrame = self.get_stackFrame(frameIndex=frameIndex, threadId=threadId) - if stackFrame is None: - return [] - args_dict = { - "variablesReference": variablesReference, - "name": name, - "frameId": stackFrame["id"], - } - command_dict = { - "command": "dataBreakpointInfo", - "type": "request", - "arguments": args_dict, - } - return self.send_recv(command_dict) - - def request_setDataBreakpoint(self, dataBreakpoints): - """dataBreakpoints is a list of dictionary with following fields: - { - dataId: (address in hex)/(size in bytes) - accessType: read/write/readWrite - [condition]: string - [hitCondition]: string - } - """ - args_dict = {"breakpoints": dataBreakpoints} - command_dict = { - "command": "setDataBreakpoints", - "type": "request", - "arguments": args_dict, - } - return self.send_recv(command_dict) - def request_compileUnits(self, moduleId): args_dict = {"moduleId": moduleId} command_dict = { diff --git a/lldb/test/API/tools/lldb-dap/databreakpoint/Makefile b/lldb/test/API/tools/lldb-dap/databreakpoint/Makefile deleted file mode 100644 index 99998b20bcb050..00000000000000 --- a/lldb/test/API/tools/lldb-dap/databreakpoint/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -CXX_SOURCES := main.cpp - -include Makefile.rules diff --git a/lldb/test/API/tools/lldb-dap/databreakpoint/TestDAP_setDataBreakpoints.py b/lldb/test/API/tools/lldb-dap/databreakpoint/TestDAP_setDataBreakpoints.py deleted file mode 100644 index 40ca6473649ea9..00000000000000 --- a/lldb/test/API/tools/lldb-dap/databreakpoint/TestDAP_setDataBreakpoints.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Test lldb-dap dataBreakpointInfo and setDataBreakpoints requests -""" - -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -import lldbdap_testcase - - -class TestDAP_setDataBreakpoints(lldbdap_testcase.DAPTestCaseBase): - def setUp(self): - lldbdap_testcase.DAPTestCaseBase.setUp(self) - self.accessTypes = ["read", "write", "readWrite"] - - @skipIfWindows - @skipIfRemote - def test_expression(self): - """Tests setting data breakpoints on expression.""" - program = self.getBuildArtifact("a.out") - self.build_and_launch(program) - source = "main.cpp" - first_loop_break_line = line_number(source, "// first loop breakpoint") - self.set_source_breakpoints(source, [first_loop_break_line]) - self.continue_to_next_stop() - self.dap_server.get_stackFrame() - # Test setting write watchpoint using expressions: &x, arr+2 - response_x = self.dap_server.request_dataBreakpointInfo(0, "4@&x") - response_arr_2 = self.dap_server.request_dataBreakpointInfo(0, "1@arr+2") - # Test response from dataBreakpointInfo request. - self.assertEquals(response_x["body"]["dataId"].split("/")[1], "4") - self.assertEquals(response_x["body"]["accessTypes"], self.accessTypes) - self.assertEquals(response_arr_2["body"]["dataId"].split("/")[1], "1") - self.assertEquals(response_arr_2["body"]["accessTypes"], self.accessTypes) - dataBreakpoints = [ - {"dataId": response_x["body"]["dataId"], "accessType": "write"}, - {"dataId": response_arr_2["body"]["dataId"], "accessType": "write"}, - ] - self.dap_server.request_setDataBreakpoint(dataBreakpoints) - - self.dap_server.request_continue() - self.dap_server.wait_for_stopped() - x_val = self.dap_server.get_local_variable_value("x") - i_val = self.dap_server.get_local_variable_value("i") - self.assertEquals(x_val, "2") - self.assertEquals(i_val, "1") - - self.dap_server.request_continue() - self.dap_server.wait_for_stopped() - arr_2 = self.dap_server.get_local_variable_child("arr", "[2]") - i_val = self.dap_server.get_local_variable_value("i") - self.assertEquals(arr_2["value"], "'z'") - self.assertEquals(i_val, "2") - - @skipIfWindows - @skipIfRemote - def test_functionality(self): - """Tests setting data breakpoints on variable.""" - program = self.getBuildArtifact("a.out") - self.build_and_launch(program) - source = "main.cpp" - first_loop_break_line = line_number(source, "// first loop breakpoint") - self.set_source_breakpoints(source, [first_loop_break_line]) - self.continue_to_next_stop() - self.dap_server.get_local_variables() - # Test write watchpoints on x, arr[2] - response_x = self.dap_server.request_dataBreakpointInfo(1, "x") - arr = self.dap_server.get_local_variable("arr") - response_arr_2 = self.dap_server.request_dataBreakpointInfo( - arr["variablesReference"], "[2]" - ) - - # Test response from dataBreakpointInfo request. - self.assertEquals(response_x["body"]["dataId"].split("/")[1], "4") - self.assertEquals(response_x["body"]["accessTypes"], self.accessTypes) - self.assertEquals(response_arr_2["body"]["dataId"].split("/")[1], "1") - self.assertEquals(response_arr_2["body"]["accessTypes"], self.accessTypes) - dataBreakpoints = [ - {"dataId": response_x["body"]["dataId"], "accessType": "write"}, - {"dataId": response_arr_2["body"]["dataId"], "accessType": "write"}, - ] - self.dap_server.request_setDataBreakpoint(dataBreakpoints) - - self.continue_to_next_stop() - x_val = self.dap_server.get_local_variable_value("x") - i_val = self.dap_server.get_local_variable_value("i") - self.assertEquals(x_val, "2") - self.assertEquals(i_val, "1") - - self.continue_to_next_stop() - arr_2 = self.dap_server.get_local_variable_child("arr", "[2]") - i_val = self.dap_server.get_local_variable_value("i") - self.assertEquals(arr_2["value"], "'z'") - self.assertEquals(i_val, "2") - self.dap_server.request_setDataBreakpoint([]) - - # Test hit condition - second_loop_break_line = line_number(source, "// second loop breakpoint") - breakpoint_ids = self.set_source_breakpoints(source, [second_loop_break_line]) - self.continue_to_breakpoints(breakpoint_ids) - dataBreakpoints = [ - { - "dataId": response_x["body"]["dataId"], - "accessType": "write", - "hitCondition": "2", - } - ] - self.dap_server.request_setDataBreakpoint(dataBreakpoints) - self.continue_to_next_stop() - x_val = self.dap_server.get_local_variable_value("x") - self.assertEquals(x_val, "3") - - # Test condition - dataBreakpoints = [ - { - "dataId": response_x["body"]["dataId"], - "accessType": "write", - "condition": "x==10", - } - ] - self.dap_server.request_setDataBreakpoint(dataBreakpoints) - self.continue_to_next_stop() - x_val = self.dap_server.get_local_variable_value("x") - self.assertEquals(x_val, "10") diff --git a/lldb/test/API/tools/lldb-dap/databreakpoint/main.cpp b/lldb/test/API/tools/lldb-dap/databreakpoint/main.cpp deleted file mode 100644 index 8082fe02f3e534..00000000000000 --- a/lldb/test/API/tools/lldb-dap/databreakpoint/main.cpp +++ /dev/null @@ -1,17 +0,0 @@ -int main(int argc, char const *argv[]) { - // Test for data breakpoint - int x = 0; - char arr[4] = {'a', 'b', 'c', 'd'}; - for (int i = 0; i < 5; ++i) { // first loop breakpoint - if (i == 1) { - x = i + 1; - } else if (i == 2) { - arr[i] = 'z'; - } - } - - x = 1; - for (int i = 0; i < 10; ++i) { // second loop breakpoint - ++x; - } -} diff --git a/lldb/tools/lldb-dap/CMakeLists.txt b/lldb/tools/lldb-dap/CMakeLists.txt index f8f0d86453f585..f8c0e4ecf36c2f 100644 --- a/lldb/tools/lldb-dap/CMakeLists.txt +++ b/lldb/tools/lldb-dap/CMakeLists.txt @@ -37,7 +37,6 @@ add_lldb_tool(lldb-dap RunInTerminal.cpp SourceBreakpoint.cpp DAP.cpp - Watchpoint.cpp LINK_LIBS liblldb diff --git a/lldb/tools/lldb-dap/DAPForward.h b/lldb/tools/lldb-dap/DAPForward.h index 8c79488fae8dbf..fffff1e3f79020 100644 --- a/lldb/tools/lldb-dap/DAPForward.h +++ b/lldb/tools/lldb-dap/DAPForward.h @@ -14,7 +14,6 @@ struct BreakpointBase; struct ExceptionBreakpoint; struct FunctionBreakpoint; struct SourceBreakpoint; -struct Watchpoint; } // namespace lldb_dap namespace lldb { @@ -40,7 +39,6 @@ class SBStringList; class SBTarget; class SBThread; class SBValue; -class SBWatchpoint; } // namespace lldb #endif diff --git a/lldb/tools/lldb-dap/Watchpoint.cpp b/lldb/tools/lldb-dap/Watchpoint.cpp deleted file mode 100644 index 2f176e0da84f15..00000000000000 --- a/lldb/tools/lldb-dap/Watchpoint.cpp +++ /dev/null @@ -1,48 +0,0 @@ -//===-- Watchpoint.cpp ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Watchpoint.h" -#include "DAP.h" -#include "JSONUtils.h" -#include "llvm/ADT/StringExtras.h" - -namespace lldb_dap { -Watchpoint::Watchpoint(const llvm::json::Object &obj) : BreakpointBase(obj) { - llvm::StringRef dataId = GetString(obj, "dataId"); - std::string accessType = GetString(obj, "accessType").str(); - auto [addr_str, size_str] = dataId.split('/'); - lldb::addr_t addr; - size_t size; - llvm::to_integer(addr_str, addr, 16); - llvm::to_integer(size_str, size); - lldb::SBWatchpointOptions options; - options.SetWatchpointTypeRead(accessType != "write"); - if (accessType != "read") - options.SetWatchpointTypeWrite(lldb::eWatchpointWriteTypeOnModify); - wp = g_dap.target.WatchpointCreateByAddress(addr, size, options, error); - SetCondition(); - SetHitCondition(); -} - -void Watchpoint::SetCondition() { wp.SetCondition(condition.c_str()); } - -void Watchpoint::SetHitCondition() { - uint64_t hitCount = 0; - if (llvm::to_integer(hitCondition, hitCount)) - wp.SetIgnoreCount(hitCount - 1); -} - -void Watchpoint::CreateJsonObject(llvm::json::Object &object) { - if (error.Success()) { - object.try_emplace("verified", true); - } else { - object.try_emplace("verified", false); - EmplaceSafeString(object, "message", error.GetCString()); - } -} -} // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/Watchpoint.h b/lldb/tools/lldb-dap/Watchpoint.h deleted file mode 100644 index 026b07d67241ce..00000000000000 --- a/lldb/tools/lldb-dap/Watchpoint.h +++ /dev/null @@ -1,34 +0,0 @@ -//===-- Watchpoint.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLDB_TOOLS_LLDB_DAP_WATCHPOINT_H -#define LLDB_TOOLS_LLDB_DAP_WATCHPOINT_H - -#include "BreakpointBase.h" -#include "lldb/API/SBError.h" -#include "lldb/API/SBWatchpoint.h" -#include "lldb/API/SBWatchpointOptions.h" - -namespace lldb_dap { - -struct Watchpoint : public BreakpointBase { - // The LLDB breakpoint associated wit this watchpoint. - lldb::SBWatchpoint wp; - lldb::SBError error; - - Watchpoint() = default; - Watchpoint(const llvm::json::Object &obj); - Watchpoint(lldb::SBWatchpoint wp) : wp(wp) {} - - void SetCondition() override; - void SetHitCondition() override; - void CreateJsonObject(llvm::json::Object &object) override; -}; -} // namespace lldb_dap - -#endif diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 6bf2ec28432cd3..67022347e6d624 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "DAP.h" -#include "Watchpoint.h" #include #include @@ -561,46 +560,6 @@ void EventThreadFunction() { } } -lldb::SBValue FindVariable(uint64_t variablesReference, llvm::StringRef name) { - lldb::SBValue variable; - if (lldb::SBValueList *top_scope = GetTopLevelScope(variablesReference)) { - bool is_duplicated_variable_name = name.contains(" @"); - // variablesReference is one of our scopes, not an actual variable it is - // asking for a variable in locals or globals or registers - int64_t end_idx = top_scope->GetSize(); - // Searching backward so that we choose the variable in closest scope - // among variables of the same name. - for (int64_t i = end_idx - 1; i >= 0; --i) { - lldb::SBValue curr_variable = top_scope->GetValueAtIndex(i); - std::string variable_name = CreateUniqueVariableNameForDisplay( - curr_variable, is_duplicated_variable_name); - if (variable_name == name) { - variable = curr_variable; - break; - } - } - } else { - // This is not under the globals or locals scope, so there are no duplicated - // names. - - // We have a named item within an actual variable so we need to find it - // withing the container variable by name. - lldb::SBValue container = g_dap.variables.GetVariable(variablesReference); - variable = container.GetChildMemberWithName(name.data()); - if (!variable.IsValid()) { - if (name.starts_with("[")) { - llvm::StringRef index_str(name.drop_front(1)); - uint64_t index = 0; - if (!index_str.consumeInteger(0, index)) { - if (index_str == "]") - variable = container.GetChildAtIndex(index); - } - } - } - } - return variable; -} - // Both attach and launch take a either a sourcePath or sourceMap // argument (or neither), from which we need to set the target.source-map. void SetSourceMapFromArguments(const llvm::json::Object &arguments) { @@ -1688,8 +1647,6 @@ void request_initialize(const llvm::json::Object &request) { body.try_emplace("supportsProgressReporting", true); // The debug adapter supports 'logMessage' in breakpoint. body.try_emplace("supportsLogPoints", true); - // The debug adapter supports data watchpoints. - body.try_emplace("supportsDataBreakpoints", true); response.try_emplace("body", std::move(body)); g_dap.SendJSON(llvm::json::Value(std::move(response))); @@ -2634,231 +2591,6 @@ void request_setFunctionBreakpoints(const llvm::json::Object &request) { g_dap.SendJSON(llvm::json::Value(std::move(response))); } -// "DataBreakpointInfoRequest": { -// "allOf": [ { "$ref": "#/definitions/Request" }, { -// "type": "object", -// "description": "Obtains information on a possible data breakpoint that -// could be set on an expression or variable.\nClients should only call this -// request if the corresponding capability `supportsDataBreakpoints` is -// true.", "properties": { -// "command": { -// "type": "string", -// "enum": [ "dataBreakpointInfo" ] -// }, -// "arguments": { -// "$ref": "#/definitions/DataBreakpointInfoArguments" -// } -// }, -// "required": [ "command", "arguments" ] -// }] -// }, -// "DataBreakpointInfoArguments": { -// "type": "object", -// "description": "Arguments for `dataBreakpointInfo` request.", -// "properties": { -// "variablesReference": { -// "type": "integer", -// "description": "Reference to the variable container if the data -// breakpoint is requested for a child of the container. The -// `variablesReference` must have been obtained in the current suspended -// state. See 'Lifetime of Object References' in the Overview section for -// details." -// }, -// "name": { -// "type": "string", -// "description": "The name of the variable's child to obtain data -// breakpoint information for.\nIf `variablesReference` isn't specified, -// this can be an expression." -// }, -// "frameId": { -// "type": "integer", -// "description": "When `name` is an expression, evaluate it in the scope -// of this stack frame. If not specified, the expression is evaluated in -// the global scope. When `variablesReference` is specified, this property -// has no effect." -// } -// }, -// "required": [ "name" ] -// }, -// "DataBreakpointInfoResponse": { -// "allOf": [ { "$ref": "#/definitions/Response" }, { -// "type": "object", -// "description": "Response to `dataBreakpointInfo` request.", -// "properties": { -// "body": { -// "type": "object", -// "properties": { -// "dataId": { -// "type": [ "string", "null" ], -// "description": "An identifier for the data on which a data -// breakpoint can be registered with the `setDataBreakpoints` -// request or null if no data breakpoint is available. If a -// `variablesReference` or `frameId` is passed, the `dataId` is -// valid in the current suspended state, otherwise it's valid -// indefinitely. See 'Lifetime of Object References' in the Overview -// section for details. Breakpoints set using the `dataId` in the -// `setDataBreakpoints` request may outlive the lifetime of the -// associated `dataId`." -// }, -// "description": { -// "type": "string", -// "description": "UI string that describes on what data the -// breakpoint is set on or why a data breakpoint is not available." -// }, -// "accessTypes": { -// "type": "array", -// "items": { -// "$ref": "#/definitions/DataBreakpointAccessType" -// }, -// "description": "Attribute lists the available access types for a -// potential data breakpoint. A UI client could surface this -// information." -// }, -// "canPersist": { -// "type": "boolean", -// "description": "Attribute indicates that a potential data -// breakpoint could be persisted across sessions." -// } -// }, -// "required": [ "dataId", "description" ] -// } -// }, -// "required": [ "body" ] -// }] -// } -void request_dataBreakpointInfo(const llvm::json::Object &request) { - llvm::json::Object response; - FillResponse(request, response); - llvm::json::Object body; - lldb::SBError error; - llvm::json::Array accessTypes{"read", "write", "readWrite"}; - const auto *arguments = request.getObject("arguments"); - const auto variablesReference = - GetUnsigned(arguments, "variablesReference", 0); - llvm::StringRef name = GetString(arguments, "name"); - lldb::SBFrame frame = g_dap.GetLLDBFrame(*arguments); - lldb::SBValue variable = FindVariable(variablesReference, name); - std::string addr, size; - - if (variable.IsValid()) { - addr = llvm::utohexstr(variable.GetLoadAddress()); - size = llvm::utostr(variable.GetByteSize()); - } else if (variablesReference == 0 && frame.IsValid()) { - // Name might be an expression. In this case we assume that name is composed - // of the number of bytes to watch and expression, separated by '@': - // "${size}@${expression}" - llvm::StringRef expr; - std::tie(size, expr) = name.split('@'); - lldb::SBValue value = frame.EvaluateExpression(expr.data()); - if (value.GetError().Fail()) { - lldb::SBError error = value.GetError(); - const char *error_cstr = error.GetCString(); - body.try_emplace("dataId", nullptr); - body.try_emplace("description", error_cstr && error_cstr[0] - ? std::string(error_cstr) - : "evaluation failed"); - } else - addr = llvm::utohexstr(value.GetValueAsUnsigned()); - } else { - auto state = g_dap.target.GetProcess().GetState(); - body.try_emplace("dataId", nullptr); - body.try_emplace("description", - "variable not found: " + llvm::utostr(state)); - } - - if (!body.getObject("dataId")) { - body.try_emplace("dataId", addr + "/" + size); - body.try_emplace("accessTypes", std::move(accessTypes)); - body.try_emplace("description", - size + " bytes at " + addr + " " + name.str()); - } - response.try_emplace("body", std::move(body)); - g_dap.SendJSON(llvm::json::Value(std::move(response))); -} - -// "SetDataBreakpointsRequest": { -// "allOf": [ { "$ref": "#/definitions/Request" }, { -// "type": "object", -// "description": "Replaces all existing data breakpoints with new data -// breakpoints.\nTo clear all data breakpoints, specify an empty -// array.\nWhen a data breakpoint is hit, a `stopped` event (with reason -// `data breakpoint`) is generated.\nClients should only call this request -// if the corresponding capability `supportsDataBreakpoints` is true.", -// "properties": { -// "command": { -// "type": "string", -// "enum": [ "setDataBreakpoints" ] -// }, -// "arguments": { -// "$ref": "#/definitions/SetDataBreakpointsArguments" -// } -// }, -// "required": [ "command", "arguments" ] -// }] -// }, -// "SetDataBreakpointsArguments": { -// "type": "object", -// "description": "Arguments for `setDataBreakpoints` request.", -// "properties": { -// "breakpoints": { -// "type": "array", -// "items": { -// "$ref": "#/definitions/DataBreakpoint" -// }, -// "description": "The contents of this array replaces all existing data -// breakpoints. An empty array clears all data breakpoints." -// } -// }, -// "required": [ "breakpoints" ] -// }, -// "SetDataBreakpointsResponse": { -// "allOf": [ { "$ref": "#/definitions/Response" }, { -// "type": "object", -// "description": "Response to `setDataBreakpoints` request.\nReturned is -// information about each breakpoint created by this request.", -// "properties": { -// "body": { -// "type": "object", -// "properties": { -// "breakpoints": { -// "type": "array", -// "items": { -// "$ref": "#/definitions/Breakpoint" -// }, -// "description": "Information about the data breakpoints. The array -// elements correspond to the elements of the input argument -// `breakpoints` array." -// } -// }, -// "required": [ "breakpoints" ] -// } -// }, -// "required": [ "body" ] -// }] -// } -void request_setDataBreakpoints(const llvm::json::Object &request) { - llvm::json::Object response; - lldb::SBError error; - FillResponse(request, response); - const auto *arguments = request.getObject("arguments"); - const auto *breakpoints = arguments->getArray("breakpoints"); - llvm::json::Array response_breakpoints; - g_dap.target.DeleteAllWatchpoints(); - if (breakpoints) { - for (const auto &bp : *breakpoints) { - const auto *bp_obj = bp.getAsObject(); - if (bp_obj) { - Watchpoint wp(*bp_obj); - AppendBreakpoint(&wp, response_breakpoints); - } - } - } - llvm::json::Object body; - body.try_emplace("breakpoints", std::move(response_breakpoints)); - response.try_emplace("body", std::move(body)); - g_dap.SendJSON(llvm::json::Value(std::move(response))); -} - // "SourceRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -3342,6 +3074,7 @@ void request_setVariable(const llvm::json::Object &request) { const auto variablesReference = GetUnsigned(arguments, "variablesReference", 0); llvm::StringRef name = GetString(arguments, "name"); + bool is_duplicated_variable_name = name.contains(" @"); const auto value = GetString(arguments, "value"); // Set success to false just in case we don't find the variable by name @@ -3362,8 +3095,40 @@ void request_setVariable(const llvm::json::Object &request) { const auto id_value = GetUnsigned(arguments, "id", UINT64_MAX); if (id_value != UINT64_MAX) { variable = g_dap.variables.GetVariable(id_value); + } else if (lldb::SBValueList *top_scope = + GetTopLevelScope(variablesReference)) { + // variablesReference is one of our scopes, not an actual variable it is + // asking for a variable in locals or globals or registers + int64_t end_idx = top_scope->GetSize(); + // Searching backward so that we choose the variable in closest scope + // among variables of the same name. + for (int64_t i = end_idx - 1; i >= 0; --i) { + lldb::SBValue curr_variable = top_scope->GetValueAtIndex(i); + std::string variable_name = CreateUniqueVariableNameForDisplay( + curr_variable, is_duplicated_variable_name); + if (variable_name == name) { + variable = curr_variable; + break; + } + } } else { - variable = FindVariable(variablesReference, name); + // This is not under the globals or locals scope, so there are no duplicated + // names. + + // We have a named item within an actual variable so we need to find it + // withing the container variable by name. + lldb::SBValue container = g_dap.variables.GetVariable(variablesReference); + variable = container.GetChildMemberWithName(name.data()); + if (!variable.IsValid()) { + if (name.starts_with("[")) { + llvm::StringRef index_str(name.drop_front(1)); + uint64_t index = 0; + if (!index_str.consumeInteger(0, index)) { + if (index_str == "]") + variable = container.GetChildAtIndex(index); + } + } + } } if (variable.IsValid()) { @@ -3846,10 +3611,6 @@ void RegisterRequestCallbacks() { request_setExceptionBreakpoints); g_dap.RegisterRequestCallback("setFunctionBreakpoints", request_setFunctionBreakpoints); - g_dap.RegisterRequestCallback("dataBreakpointInfo", - request_dataBreakpointInfo); - g_dap.RegisterRequestCallback("setDataBreakpoints", - request_setDataBreakpoints); g_dap.RegisterRequestCallback("setVariable", request_setVariable); g_dap.RegisterRequestCallback("source", request_source); g_dap.RegisterRequestCallback("stackTrace", request_stackTrace); diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn index a752b61bdbaabf..98c2068f6da291 100644 --- a/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn @@ -52,6 +52,5 @@ executable("lldb-dap") { "RunInTerminal.cpp", "SourceBreakpoint.cpp", "lldb-dap.cpp", - "Watchpoint.cpp", ] } From b200dfc15904f0f7f19443fd5a399242c80213dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 15 Feb 2024 06:47:47 +0100 Subject: [PATCH 217/240] [clang][Interp] Fix calling invalid function pointers Checking for isConstexpr() is wrong; we need to (try to) call the function and let later code diagnose the failure accordingly. --- clang/lib/AST/Interp/Interp.h | 3 +-- clang/test/AST/Interp/functions.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 77c724f08e8eef..5bbb9f169a800e 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -2071,8 +2071,7 @@ inline bool CallPtr(InterpState &S, CodePtr OpPC, uint32_t ArgSize) { const FunctionPointer &FuncPtr = S.Stk.pop(); const Function *F = FuncPtr.getFunction(); - if (!F || !F->isConstexpr()) - return false; + assert(F); assert(ArgSize >= F->getWrittenArgSize()); uint32_t VarArgSize = ArgSize - F->getWrittenArgSize(); diff --git a/clang/test/AST/Interp/functions.cpp b/clang/test/AST/Interp/functions.cpp index 7b8278cf13aa88..34a832c794c75d 100644 --- a/clang/test/AST/Interp/functions.cpp +++ b/clang/test/AST/Interp/functions.cpp @@ -187,6 +187,14 @@ namespace FunctionReturnType { static_assert(!!op, ""); constexpr int (*op2)(int, int) = nullptr; static_assert(!op2, ""); + + int m() { return 5;} // ref-note {{declared here}} \ + // expected-note {{declared here}} + constexpr int (*invalidFnPtr)() = m; + static_assert(invalidFnPtr() == 5, ""); // ref-error {{not an integral constant expression}} \ + // ref-note {{non-constexpr function 'm'}} \ + // expected-error {{not an integral constant expression}} \ + // expected-note {{non-constexpr function 'm'}} } namespace Comparison { From b93916c9798ea09488e30b9b0aae9e54ef0b1956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 15 Feb 2024 07:03:58 +0100 Subject: [PATCH 218/240] [clang][Interp][NFC] Convert test to verify=expected,both style --- clang/test/AST/Interp/functions.cpp | 198 ++++++++++------------------ 1 file changed, 68 insertions(+), 130 deletions(-) diff --git a/clang/test/AST/Interp/functions.cpp b/clang/test/AST/Interp/functions.cpp index 34a832c794c75d..320691336bdd99 100644 --- a/clang/test/AST/Interp/functions.cpp +++ b/clang/test/AST/Interp/functions.cpp @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify %s -// RUN: %clang_cc1 -std=c++14 -fexperimental-new-constant-interpreter -verify %s -// RUN: %clang_cc1 -std=c++20 -fexperimental-new-constant-interpreter -verify %s -// RUN: %clang_cc1 -verify=ref %s -// RUN: %clang_cc1 -std=c++14 -verify=ref %s -// RUN: %clang_cc1 -std=c++20 -verify=ref %s +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -std=c++14 -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -std=c++20 -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -verify=ref,both %s +// RUN: %clang_cc1 -std=c++14 -verify=ref,both %s +// RUN: %clang_cc1 -std=c++20 -verify=ref,both %s constexpr void doNothing() {} constexpr int gimme5() { @@ -23,16 +23,13 @@ static_assert(!identity(false), ""); template constexpr bool sameSize() { - static_assert(sizeof(A) == sizeof(B), ""); // expected-error {{static assertion failed}} \ - // ref-error {{static assertion failed}} \ - // expected-note {{evaluates to}} \ - // ref-note {{evaluates to}} + static_assert(sizeof(A) == sizeof(B), ""); // both-error {{static assertion failed}} \ + // both-note {{evaluates to}} return true; } static_assert(sameSize(), ""); static_assert(sameSize(), ""); -static_assert(sameSize(), ""); // expected-note {{in instantiation of function template specialization}} \ - // ref-note {{in instantiation of function template specialization}} +static_assert(sameSize(), ""); // both-note {{in instantiation of function template specialization}} constexpr auto add(int a, int b) -> int { @@ -92,12 +89,9 @@ static_assert(getNum<-2>() == -2, ""); static_assert(getNum<10>() == 10, ""); static_assert(getNum() == 5, ""); -constexpr int f(); // expected-note {{declared here}} \ - // ref-note {{declared here}} -static_assert(f() == 5, ""); // expected-error {{not an integral constant expression}} \ - // expected-note {{undefined function 'f'}} \ - // ref-error {{not an integral constant expression}} \ - // ref-note {{undefined function 'f'}} +constexpr int f(); // both-note {{declared here}} +static_assert(f() == 5, ""); // both-error {{not an integral constant expression}} \ + // both-note {{undefined function 'f'}} constexpr int a() { return f(); } @@ -108,17 +102,14 @@ static_assert(a() == 5, ""); constexpr int invalid() { // Invalid expression in visit(). - while(huh) {} // expected-error {{use of undeclared identifier}} \ - // ref-error {{use of undeclared identifier}} - + while(huh) {} // both-error {{use of undeclared identifier}} return 0; } constexpr void invalid2() { int i = 0; // Invalid expression in discard(). - huh(); // expected-error {{use of undeclared identifier}} \ - // ref-error {{use of undeclared identifier}} + huh(); // both-error {{use of undeclared identifier}} } namespace FunctionPointers { @@ -160,8 +151,7 @@ namespace FunctionReturnType { constexpr ptr fun() { return &fun1; } - static_assert(fun() == nullptr, ""); // expected-error {{static assertion failed}} \ - // ref-error {{static assertion failed}} + static_assert(fun() == nullptr, ""); // both-error {{static assertion failed}} constexpr int foo() { int (*f)(int *) = fun(); @@ -188,32 +178,23 @@ namespace FunctionReturnType { constexpr int (*op2)(int, int) = nullptr; static_assert(!op2, ""); - int m() { return 5;} // ref-note {{declared here}} \ - // expected-note {{declared here}} + int m() { return 5;} // both-note {{declared here}} constexpr int (*invalidFnPtr)() = m; - static_assert(invalidFnPtr() == 5, ""); // ref-error {{not an integral constant expression}} \ - // ref-note {{non-constexpr function 'm'}} \ - // expected-error {{not an integral constant expression}} \ - // expected-note {{non-constexpr function 'm'}} + static_assert(invalidFnPtr() == 5, ""); // both-error {{not an integral constant expression}} \ + // both-note {{non-constexpr function 'm'}} } namespace Comparison { void f(), g(); constexpr void (*pf)() = &f, (*pg)() = &g; - constexpr bool u13 = pf < pg; // ref-warning {{ordered comparison of function pointers}} \ - // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{comparison between '&f' and '&g' has unspecified value}} \ - // expected-warning {{ordered comparison of function pointers}} \ - // expected-error {{must be initialized by a constant expression}} \ - // expected-note {{comparison between '&f' and '&g' has unspecified value}} + constexpr bool u13 = pf < pg; // both-warning {{ordered comparison of function pointers}} \ + // both-error {{must be initialized by a constant expression}} \ + // both-note {{comparison between '&f' and '&g' has unspecified value}} - constexpr bool u14 = pf < (void(*)())nullptr; // ref-warning {{ordered comparison of function pointers}} \ - // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{comparison between '&f' and 'nullptr' has unspecified value}} \ - // expected-warning {{ordered comparison of function pointers}} \ - // expected-error {{must be initialized by a constant expression}} \ - // expected-note {{comparison between '&f' and 'nullptr' has unspecified value}} + constexpr bool u14 = pf < (void(*)())nullptr; // both-warning {{ordered comparison of function pointers}} \ + // both-error {{must be initialized by a constant expression}} \ + // both-note {{comparison between '&f' and 'nullptr' has unspecified value}} @@ -249,31 +230,22 @@ static_assert(doit() == 10, ""); namespace InvalidCall { struct S { - constexpr int a() const { // expected-error {{never produces a constant expression}} \ - // ref-error {{never produces a constant expression}} - return 1 / 0; // expected-note 2{{division by zero}} \ - // expected-warning {{is undefined}} \ - // ref-note 2{{division by zero}} \ - // ref-warning {{is undefined}} + constexpr int a() const { // both-error {{never produces a constant expression}} + return 1 / 0; // both-note 2{{division by zero}} \ + // both-warning {{is undefined}} } }; constexpr S s; - static_assert(s.a() == 1, ""); // expected-error {{not an integral constant expression}} \ - // expected-note {{in call to}} \ - // ref-error {{not an integral constant expression}} \ - // ref-note {{in call to}} + static_assert(s.a() == 1, ""); // both-error {{not an integral constant expression}} \ + // both-note {{in call to}} /// This used to cause an assertion failure in the new constant interpreter. - constexpr void func(); // expected-note {{declared here}} \ - // ref-note {{declared here}} + constexpr void func(); // both-note {{declared here}} struct SS { - constexpr SS() { func(); } // expected-note {{undefined function }} \ - // ref-note {{undefined function}} + constexpr SS() { func(); } // both-note {{undefined function }} }; - constexpr SS ss; // expected-error {{must be initialized by a constant expression}} \ - // expected-note {{in call to 'SS()'}} \ - // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{in call to 'SS()'}} + constexpr SS ss; // both-error {{must be initialized by a constant expression}} \ + // both-note {{in call to 'SS()'}} /// This should not emit a diagnostic. @@ -299,8 +271,7 @@ namespace CallWithArgs { namespace ReturnLocalPtr { constexpr int *p() { int a = 12; - return &a; // ref-warning {{address of stack memory}} \ - // expected-warning {{address of stack memory}} + return &a; // both-warning {{address of stack memory}} } /// GCC rejects the expression below, just like the new interpreter. The current interpreter @@ -313,13 +284,11 @@ namespace ReturnLocalPtr { /// new one does not. constexpr const int &p2() { int a = 12; // ref-note {{declared here}} - return a; // ref-warning {{reference to stack memory associated with local variable}} \ - // expected-warning {{reference to stack memory associated with local variable}} + return a; // both-warning {{reference to stack memory associated with local variable}} } - static_assert(p2() == 12, ""); // ref-error {{not an integral constant expression}} \ - // ref-note {{read of variable whose lifetime has ended}} \ - // expected-error {{not an integral constant expression}} + static_assert(p2() == 12, ""); // both-error {{not an integral constant expression}} \ + // ref-note {{read of variable whose lifetime has ended}} } namespace VoidReturn { @@ -332,22 +301,16 @@ namespace VoidReturn { } namespace InvalidReclRefs { - void param(bool b) { // ref-note {{declared here}} \ - // expected-note {{declared here}} - static_assert(b, ""); // ref-error {{not an integral constant expression}} \ - // ref-note {{function parameter 'b' with unknown value}} \ - // expected-error {{not an integral constant expression}} \ - // expected-note {{function parameter 'b' with unknown value}} + void param(bool b) { // both-note {{declared here}} + static_assert(b, ""); // both-error {{not an integral constant expression}} \ + // both-note {{function parameter 'b' with unknown value}} static_assert(true ? true : b, ""); } #if __cplusplus >= 202002L - consteval void param2(bool b) { // ref-note {{declared here}} \ - // expected-note {{declared here}} - static_assert(b, ""); // ref-error {{not an integral constant expression}} \ - // ref-note {{function parameter 'b' with unknown value}} \ - // expected-error {{not an integral constant expression}} \ - // expected-note {{function parameter 'b' with unknown value}} + consteval void param2(bool b) { // both-note {{declared here}} + static_assert(b, ""); // both-error {{not an integral constant expression}} \ + // both-note {{function parameter 'b' with unknown value}} } #endif } @@ -482,13 +445,10 @@ namespace AddressOf { static_assert(&pt->n == &t.n, ""); struct U { int n : 5; } u; - int *pbf = __builtin_addressof(u.n); // expected-error {{address of bit-field requested}} \ - // ref-error {{address of bit-field requested}} + int *pbf = __builtin_addressof(u.n); // both-error {{address of bit-field requested}} - S *ptmp = __builtin_addressof(S{}); // expected-error {{taking the address of a temporary}} \ - // expected-warning {{temporary whose address is used as value of local variable 'ptmp' will be destroyed at the end of the full-expression}} \ - // ref-error {{taking the address of a temporary}} \ - // ref-warning {{temporary whose address is used as value of local variable 'ptmp' will be destroyed at the end of the full-expression}} + S *ptmp = __builtin_addressof(S{}); // both-error {{taking the address of a temporary}} \ + // both-warning {{temporary whose address is used as value of local variable 'ptmp' will be destroyed at the end of the full-expression}} constexpr int foo() {return 1;} static_assert(__builtin_addressof(foo) == foo, ""); @@ -509,8 +469,7 @@ constexpr typename std::remove_reference::type&& move(T &&t) noexcept { /// The std::move declaration above gets translated to a builtin function. namespace Move { #if __cplusplus >= 202002L - consteval int f_eval() { // expected-note 12{{declared here}} \ - // ref-note 12{{declared here}} + consteval int f_eval() { // both-note 12{{declared here}} return 0; } @@ -530,56 +489,35 @@ namespace Move { // there is no the copy constructor call when its argument is a prvalue because of garanteed copy elision. // so we need to test with both prvalue and xvalues. { Copy c(C); } - { Copy c((Copy(&f_eval))); } // expected-error {{cannot take address of consteval}} \ - // ref-error {{cannot take address of consteval}} + { Copy c((Copy(&f_eval))); } // both-error {{cannot take address of consteval}} { Copy c(std::move(C)); } - { Copy c(std::move(Copy(&f_eval))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} - { Copy c(to_lvalue_ref((Copy(&f_eval)))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} + { Copy c(std::move(Copy(&f_eval))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} + { Copy c(to_lvalue_ref((Copy(&f_eval)))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} { Copy c(to_lvalue_ref(std::move(C))); } - { Copy c(to_lvalue_ref(std::move(Copy(&f_eval)))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} + { Copy c(to_lvalue_ref(std::move(Copy(&f_eval)))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} { Copy c = Copy(C); } - { Copy c = Copy(Copy(&f_eval)); } // expected-error {{cannot take address of consteval}} \ - // ref-error {{cannot take address of consteval}} + { Copy c = Copy(Copy(&f_eval)); } // both-error {{cannot take address of consteval}} { Copy c = Copy(std::move(C)); } - { Copy c = Copy(std::move(Copy(&f_eval))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} - { Copy c = Copy(to_lvalue_ref(Copy(&f_eval))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} + { Copy c = Copy(std::move(Copy(&f_eval))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} + { Copy c = Copy(to_lvalue_ref(Copy(&f_eval))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} { Copy c = Copy(to_lvalue_ref(std::move(C))); } - { Copy c = Copy(to_lvalue_ref(std::move(Copy(&f_eval)))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} + { Copy c = Copy(to_lvalue_ref(std::move(Copy(&f_eval)))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} { Copy c; c = Copy(C); } - { Copy c; c = Copy(Copy(&f_eval)); } // expected-error {{cannot take address of consteval}} \ - // ref-error {{cannot take address of consteval}} + { Copy c; c = Copy(Copy(&f_eval)); } // both-error {{cannot take address of consteval}} { Copy c; c = Copy(std::move(C)); } - { Copy c; c = Copy(std::move(Copy(&f_eval))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} - { Copy c; c = Copy(to_lvalue_ref(Copy(&f_eval))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} + { Copy c; c = Copy(std::move(Copy(&f_eval))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} + { Copy c; c = Copy(to_lvalue_ref(Copy(&f_eval))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} { Copy c; c = Copy(to_lvalue_ref(std::move(C))); } - { Copy c; c = Copy(to_lvalue_ref(std::move(Copy(&f_eval)))); } // expected-error {{is not a constant expression}} \ - // expected-note {{to a consteval}} \ - // ref-error {{is not a constant expression}} \ - // ref-note {{to a consteval}} + { Copy c; c = Copy(to_lvalue_ref(std::move(Copy(&f_eval)))); } // both-error {{is not a constant expression}} \ + // both-note {{to a consteval}} } #endif constexpr int A = std::move(5); From 36adfec155de366d722f2bac8ff9162289dcf06c Mon Sep 17 00:00:00 2001 From: Rohit Aggarwal <44664450+rohitaggarwal007@users.noreply.github.com> Date: Thu, 15 Feb 2024 12:13:07 +0530 Subject: [PATCH 219/240] Adding support of AMDLIBM vector library (#78560) Hi, AMD has it's own implementation of vector calls. This patch include the changes to enable the use of AMD's math library using -fveclib=AMDLIBM. Please refer https://github.com/amd/aocl-libm-ose --------- Co-authored-by: Rohit Aggarwal --- clang/include/clang/Driver/Options.td | 4 +- clang/test/Driver/autocomplete.c | 1 + .../include/llvm/Analysis/TargetLibraryInfo.h | 3 +- llvm/include/llvm/Analysis/VecFuncs.def | 194 ++++ .../llvm/Frontend/Driver/CodeGenOptions.h | 3 +- llvm/lib/Analysis/TargetLibraryInfo.cpp | 14 +- llvm/lib/Frontend/Driver/CodeGenOptions.cpp | 4 + .../Generic/replace-intrinsics-with-veclib.ll | 11 + .../LoopVectorize/X86/amdlibm-calls-finite.ll | 332 +++++++ .../LoopVectorize/X86/amdlibm-calls.ll | 869 ++++++++++++++++++ .../Transforms/SLPVectorizer/X86/sin-sqrt.ll | 29 +- llvm/test/Transforms/Util/add-TLI-mappings.ll | 23 + 12 files changed, 1481 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 95b464e7d61834..b302afd65e2811 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3204,10 +3204,10 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group, - Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,none">, + Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,AMDLIBM,none">, NormalizedValuesScope<"llvm::driver::VectorLibrary">, NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF", - "Darwin_libsystem_m", "ArmPL", "NoLibrary"]>, + "Darwin_libsystem_m", "ArmPL", "AMDLIBM", "NoLibrary"]>, MarshallingInfoEnum, "NoLibrary">; def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group, Alias, AliasArgs<["none"]>; diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c index d6f57708b67eb6..c8ceaaf404672f 100644 --- a/clang/test/Driver/autocomplete.c +++ b/clang/test/Driver/autocomplete.c @@ -80,6 +80,7 @@ // FLTOALL-NEXT: thin // RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL // FVECLIBALL: Accelerate +// FVECLIBALL-NEXT: AMDLIBM // FVECLIBALL-NEXT: ArmPL // FVECLIBALL-NEXT: Darwin_libsystem_m // FVECLIBALL-NEXT: libmvec diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index daf1d8e2079f85..46f31f918e7b61 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -129,7 +129,8 @@ class TargetLibraryInfoImpl { MASSV, // IBM MASS vector library. SVML, // Intel short vector math library. SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions. - ArmPL // Arm Performance Libraries. + ArmPL, // Arm Performance Libraries. + AMDLIBM // AMD Math Vector library. }; TargetLibraryInfoImpl(); diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 07edf68c667a27..394e4a05fbc0cf 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -1067,6 +1067,199 @@ TLI_DEFINE_VECFUNC("tgammaf", "armpl_vtgammaq_f32", FIXED(4), NOMASK, "_ZGV_LLVM TLI_DEFINE_VECFUNC("tgamma", "armpl_svtgamma_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("tgammaf", "armpl_svtgamma_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +#elif defined(TLI_DEFINE_AMDLIBM_VECFUNCS) +TLI_DEFINE_VECFUNC("sinf", "amd_vrs16_sinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("sinf", "amd_vrs8_sinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("sinf", "amd_vrs4_sinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sin", "amd_vrd8_sin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("sin", "amd_vrd4_sin", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sin", "amd_vrd2_sin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("llvm.sin.f32", "amd_vrs16_sinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.sin.f32", "amd_vrs8_sinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.sin.f32", "amd_vrs4_sinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.sin.f64", "amd_vrd8_sin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.sin.f64", "amd_vrd4_sin", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.sin.f64", "amd_vrd2_sin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("cosf", "amd_vrs16_cosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("cosf", "amd_vrs8_cosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("cosf", "amd_vrs4_cosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cos", "amd_vrd8_cos", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("cos", "amd_vrd4_cos", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("cos", "amd_vrd2_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("llvm.cos.f32", "amd_vrs16_cosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.cos.f32", "amd_vrs8_cosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.cos.f32", "amd_vrs4_cosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.cos.f64", "amd_vrd8_cos", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.cos.f64", "amd_vrd4_cos", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.cos.f64", "amd_vrd2_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("expf", "amd_vrs16_expf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("expf", "amd_vrs8_expf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("expf", "amd_vrs4_expf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp", "amd_vrd2_exp", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp", "amd_vrd4_exp", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp", "amd_vrd8_exp", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("__expf_finite", "amd_vrs16_expf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__expf_finite", "amd_vrs8_expf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__expf_finite", "amd_vrs4_expf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__exp_finite", "amd_vrd2_exp", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("__exp_finite", "amd_vrd4_exp", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__exp_finite", "amd_vrd8_exp", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("llvm.exp.f32", "amd_vrs16_expf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.exp.f32", "amd_vrs8_expf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.exp.f32", "amd_vrs4_expf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp.f64", "amd_vrd2_exp", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp.f64", "amd_vrd4_exp", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp.f64", "amd_vrd8_exp", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("exp2f", "amd_vrs16_exp2f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("exp2f", "amd_vrs8_exp2f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("exp2f", "amd_vrs4_exp2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp2", "amd_vrd2_exp2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp2", "amd_vrd4_exp2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("exp2", "amd_vrd8_exp2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("__exp2f_finite", "amd_vrs16_exp2f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__exp2f_finite", "amd_vrs8_exp2f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__exp2f_finite", "amd_vrs4_exp2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__exp2_finite", "amd_vrd2_exp2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("__exp2_finite", "amd_vrd4_exp2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__exp2_finite", "amd_vrd8_exp2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "amd_vrs16_exp2f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "amd_vrs8_exp2f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "amd_vrs4_exp2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "amd_vrd2_exp2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "amd_vrd4_exp2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "amd_vrd8_exp2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("powf", "amd_vrs16_powf", FIXED(16), NOMASK, "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("powf", "amd_vrs8_powf", FIXED(8), NOMASK, "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("powf", "amd_vrs4_powf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("pow", "amd_vrd2_pow", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("pow", "amd_vrd4_pow", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("pow", "amd_vrd8_pow", FIXED(8), NOMASK, "_ZGV_LLVM_N8vv") + +TLI_DEFINE_VECFUNC("__powf_finite", "amd_vrs16_powf", FIXED(16), NOMASK, "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__powf_finite", "amd_vrs8_powf", FIXED(8), NOMASK, "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__powf_finite", "amd_vrs4_powf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("__pow_finite", "amd_vrd2_pow", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("__pow_finite", "amd_vrd4_pow", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("__pow_finite", "amd_vrd8_pow", FIXED(8), NOMASK, "_ZGV_LLVM_N8vv") + +TLI_DEFINE_VECFUNC("llvm.pow.f32", "amd_vrs16_powf", FIXED(16), NOMASK, "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("llvm.pow.f32", "amd_vrs8_powf", FIXED(8), NOMASK, "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("llvm.pow.f32", "amd_vrs4_powf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.pow.f64", "amd_vrd2_pow", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.pow.f64", "amd_vrd4_pow", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.pow.f64", "amd_vrd8_pow", FIXED(8), NOMASK, "_ZGV_LLVM_N8vv") + +TLI_DEFINE_VECFUNC("logf", "amd_vrs16_logf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("logf", "amd_vrs8_logf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("logf", "amd_vrs4_logf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log", "amd_vrd2_log", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log", "amd_vrd4_log", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log", "amd_vrd8_log", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("__logf_finite", "amd_vrs16_logf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__logf_finite", "amd_vrs8_logf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__logf_finite", "amd_vrs4_logf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__log_finite", "amd_vrd2_log", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("__log_finite", "amd_vrd4_log", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__log_finite", "amd_vrd8_log", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("llvm.log.f32", "amd_vrs16_logf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.log.f32", "amd_vrs8_logf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.log.f32", "amd_vrs4_logf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log.f64", "amd_vrd2_log", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log.f64", "amd_vrd4_log", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log.f64", "amd_vrd8_log", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("log2f", "amd_vrs16_log2f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("log2f", "amd_vrs8_log2f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("log2f", "amd_vrs4_log2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log2", "amd_vrd2_log2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log2", "amd_vrd4_log2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("log2", "amd_vrd8_log2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("__log2f_finite", "amd_vrs16_log2f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__log2f_finite", "amd_vrs8_log2f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__log2f_finite", "amd_vrs4_log2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__log2_finite", "amd_vrd2_log2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("__log2_finite", "amd_vrd4_log2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__log2_finite", "amd_vrd8_log2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("llvm.log2.f32", "amd_vrs16_log2f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "amd_vrs8_log2f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "amd_vrs4_log2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd2_log2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd4_log2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd8_log2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("log10f", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("log10f", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("log10f", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("erff", "amd_vrs4_erff", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("erff", "amd_vrs8_erff", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("erff", "amd_vrs16_erff", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("erf", "amd_vrd2_erf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("erf", "amd_vrd4_erf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("erf", "amd_vrd8_erf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("exp10", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp10f", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("expm1", "amd_vrd2_expm1", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("expm1f", "amd_vrs4_expm1f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("log1p", "amd_vrd2_log1p", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log1pf", "amd_vrs4_log1pf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("tan", "amd_vrd2_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tan", "amd_vrd4_tan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tan", "amd_vrd8_tan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("asin", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("asinf", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("atan", "amd_vrd2_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("atan", "amd_vrd4_atan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("atan", "amd_vrd8_atan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("atanf", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("atanf", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("atanf", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("coshf", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("coshf", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("tanhf", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("tanhf", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") + +TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + #else #error "Must choose which vector library functions are to be defined." #endif @@ -1087,3 +1280,4 @@ TLI_DEFINE_VECFUNC("tgammaf", "armpl_svtgamma_f32_x", SCALABLE(4), MASKED, "_ZGV #undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS_NAMES #undef TLI_DEFINE_ARMPL_VECFUNCS +#undef TLI_DEFINE_AMDLIBM_VECFUNCS diff --git a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h index 0b1d924a26b2de..0180670c4c6991 100644 --- a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h +++ b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h @@ -29,7 +29,8 @@ enum class VectorLibrary { SVML, // Intel short vector math library. SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions. Darwin_libsystem_m, // Use Darwin's libsystem_m vector functions. - ArmPL // Arm Performance Libraries. + ArmPL, // Arm Performance Libraries. + AMDLIBM // AMD vector math library. }; TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 25951d2a7fe63c..710762a6c0ad1d 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -37,7 +37,9 @@ static cl::opt ClVectorLibrary( clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi", "SIMD Library for Evaluating Elementary Functions"), clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL", - "Arm Performance Libraries"))); + "Arm Performance Libraries"), + clEnumValN(TargetLibraryInfoImpl::AMDLIBM, "AMDLIBM", + "AMD vector math library"))); StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { @@ -1273,6 +1275,16 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( } break; } + case AMDLIBM: { + const VecDesc VecFuncs[] = { +#define TLI_DEFINE_AMDLIBM_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ + {SCAL, VEC, VF, MASK, VABI_PREFIX}, +#include "llvm/Analysis/VecFuncs.def" + }; + addVectorizableFunctions(VecFuncs); + break; + } case NoLibrary: break; } diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp index 96c5b19a4a5913..2d74a91f62dc07 100644 --- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp +++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp @@ -46,6 +46,10 @@ TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL, TargetTriple); break; + case VectorLibrary::AMDLIBM: + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::AMDLIBM, + TargetTriple); + break; default: break; } diff --git a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll index df8b7c498bd002..fde6cb788b46f9 100644 --- a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll +++ b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes ; RUN: opt -vector-library=SVML -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,SVML +; RUN: opt -vector-library=AMDLIBM -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,AMDLIBM ; RUN: opt -vector-library=LIBMVEC-X86 -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86 ; RUN: opt -vector-library=MASSV -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV ; RUN: opt -vector-library=Accelerate -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE @@ -13,6 +14,11 @@ define <4 x double> @exp_v4(<4 x double> %in) { ; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]]) ; SVML-NEXT: ret <4 x double> [[TMP1]] ; +; AMDLIBM-LABEL: define {{[^@]+}}@exp_v4 +; AMDLIBM-SAME: (<4 x double> [[IN:%.*]]) { +; AMDLIBM-NEXT: [[TMP1:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[IN]]) +; AMDLIBM-NEXT: ret <4 x double> [[TMP1]] +; ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4 ; LIBMVEC-X86-SAME: (<4 x double> [[IN:%.*]]) { ; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> [[IN]]) @@ -40,6 +46,11 @@ define <4 x float> @exp_f32(<4 x float> %in) { ; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]]) ; SVML-NEXT: ret <4 x float> [[TMP1]] ; +; AMDLIBM-LABEL: define {{[^@]+}}@exp_f32 +; AMDLIBM-SAME: (<4 x float> [[IN:%.*]]) { +; AMDLIBM-NEXT: [[TMP1:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[IN]]) +; AMDLIBM-NEXT: ret <4 x float> [[TMP1]] +; ; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32 ; LIBMVEC-X86-SAME: (<4 x float> [[IN:%.*]]) { ; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> [[IN]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll new file mode 100644 index 00000000000000..54bb9352f3c89c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll @@ -0,0 +1,332 @@ +; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s + +; Test to verify that when math headers are built with +; __FINITE_MATH_ONLY__ enabled, causing use of ___finite +; function versions, vectorization can map these to vector versions. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare float @__expf_finite(float) #0 + +; CHECK-LABEL: @exp_f32 +; CHECK: <4 x float> @amd_vrs4_expf +; CHECK: ret +define void @exp_f32(ptr nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__expf_finite(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: ; preds = %for.body + ret void +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__exp_finite(double) #0 + +; CHECK-LABEL: @exp_f64 +; CHECK: <4 x double> @amd_vrd4_exp +; CHECK: ret +define void @exp_f64(ptr nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__exp_finite(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv + store double %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11 + +for.end: ; preds = %for.body + ret void +} + +!11 = distinct !{!11, !12, !13} +!12 = !{!"llvm.loop.vectorize.width", i32 4} +!13 = !{!"llvm.loop.vectorize.enable", i1 true} + + + + +declare float @__logf_finite(float) #0 + +; CHECK-LABEL: @log_f32 +; CHECK: <4 x float> @amd_vrs4_logf +; CHECK: ret +define void @log_f32(ptr nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__logf_finite(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!21 = distinct !{!21, !22, !23} +!22 = !{!"llvm.loop.vectorize.width", i32 4} +!23 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__log_finite(double) #0 + +; CHECK-LABEL: @log_f64 +; CHECK: <4 x double> @amd_vrd4_log +; CHECK: ret +define void @log_f64(ptr nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log_finite(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv + store double %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!31 = distinct !{!31, !32, !33} +!32 = !{!"llvm.loop.vectorize.width", i32 4} +!33 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare float @__powf_finite(float, float) #0 + +; CHECK-LABEL: @pow_f32 +; CHECK: <4 x float> @amd_vrs4_powf +; CHECK: ret +define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv + %tmp1 = load float, ptr %arrayidx, align 4 + %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %tmp2, ptr %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + +for.end: ; preds = %for.body + ret void +} + +!41 = distinct !{!41, !42, !43} +!42 = !{!"llvm.loop.vectorize.width", i32 4} +!43 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__pow_finite(double, double) #0 + +; CHECK-LABEL: @pow_f64 +; CHECK: <4 x double> @amd_vrd4_pow +; CHECK: ret +define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %arrayidx = getelementptr inbounds double, ptr %exp, i64 %indvars.iv + %tmp1 = load double, ptr %arrayidx, align 4 + %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1) + %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %indvars.iv + store double %tmp2, ptr %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + +for.end: ; preds = %for.body + ret void +} + +!51 = distinct !{!51, !52, !53} +!52 = !{!"llvm.loop.vectorize.width", i32 4} +!53 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__exp2f_finite(float) #0 + +define void @exp2f_finite(ptr nocapture %varray) { +; CHECK-LABEL: @exp2f_finite( +; CHECK: call <4 x float> @amd_vrs4_exp2f(<4 x float> %{{.*}}) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @__exp2f_finite(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61 + +for.end: + ret void +} + +!61 = distinct !{!61, !62, !63} +!62 = !{!"llvm.loop.vectorize.width", i32 4} +!63 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare double @__exp2_finite(double) #0 + +define void @exp2_finite(ptr nocapture %varray) { +; CHECK-LABEL: @exp2_finite( +; CHECK: call <4 x double> @amd_vrd4_exp2(<4 x double> {{.*}}) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @__exp2_finite(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71 + +for.end: + ret void +} + +!71 = distinct !{!71, !72, !73} +!72 = !{!"llvm.loop.vectorize.width", i32 4} +!73 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__log2f_finite(float) #0 + +; CHECK-LABEL: @log2_f32 +; CHECK: <4 x float> @amd_vrs4_log2f +; CHECK: ret +define void @log2_f32(ptr nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__log2f_finite(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!81 = distinct !{!21, !22, !23} +!82 = !{!"llvm.loop.vectorize.width", i32 4} +!83 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__log2_finite(double) #0 + +; CHECK-LABEL: @log2_f64 +; CHECK: <4 x double> @amd_vrd4_log2 +; CHECK: ret +define void @log2_f64(ptr nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log2_finite(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv + store double %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!91 = distinct !{!31, !32, !33} +!92 = !{!"llvm.loop.vectorize.width", i32 4} +!93 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__log10f_finite(float) #0 + +; CHECK-LABEL: @log10_f32 +; CHECK: <4 x float> @amd_vrs4_log10f +; CHECK: ret +define void @log10_f32(ptr nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__log10f_finite(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!101 = distinct !{!21, !22, !23} +!102 = !{!"llvm.loop.vectorize.width", i32 4} +!103 = !{!"llvm.loop.vectorize.enable", i1 true} + + diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll new file mode 100644 index 00000000000000..8d2820a245d952 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll @@ -0,0 +1,869 @@ +; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s +; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF8 +; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=16 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefix=CHECK-AVX512-VF16 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare double @sin(double) #0 +declare float @sinf(float) #0 +declare double @llvm.sin.f64(double) #0 +declare float @llvm.sin.f32(float) #0 + +declare double @cos(double) #0 +declare float @cosf(float) #0 +declare double @llvm.cos.f64(double) #0 +declare float @llvm.cos.f32(float) #0 + +declare double @pow(double, double) #0 +declare float @powf(float, float) #0 +declare double @llvm.pow.f64(double, double) #0 +declare float @llvm.pow.f32(float, float) #0 + +declare double @exp(double) #0 +declare float @expf(float) #0 +declare double @llvm.exp.f64(double) #0 +declare float @llvm.exp.f32(float) #0 + +declare double @log(double) #0 +declare float @logf(float) #0 +declare double @llvm.log.f64(double) #0 +declare float @llvm.log.f32(float) #0 + +declare double @log2(double) #0 +declare float @log2f(float) #0 +declare double @llvm.log2.f64(double) #0 +declare float @llvm.log2.f32(float) #0 + +declare double @log10(double) #0 +declare float @log10f(float) #0 +declare double @llvm.log10.f64(double) #0 +declare float @llvm.log10.f32(float) #0 + +declare double @sqrt(double) #0 +declare float @sqrtf(float) #0 + +declare double @exp2(double) #0 +declare float @exp2f(float) #0 +declare double @llvm.exp2.f64(double) #0 +declare float @llvm.exp2.f32(float) #0 + +define void @sin_f64(ptr nocapture %varray) { +; CHECK-LABEL: @sin_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @sin_f64( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sin_f32(ptr nocapture %varray) { +; CHECK-LABEL: @sin_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @sin_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @sinf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sin_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @sin_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @sin_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sin.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sin_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @sin_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @sin_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sin.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @cos_f64(ptr nocapture %varray) { +; CHECK-LABEL: @cos_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @cos_f64( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @cos(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @cos_f32(ptr nocapture %varray) { +; CHECK-LABEL: @cos_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @cos_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @cosf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @cos_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @cos_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @cos_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.cos.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @cos_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @cos_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @cos_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.cos.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { +; CHECK-LABEL: @pow_f64( +; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @pow_f64( +; CHECK-AVX512-VF8: [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv + %tmp1 = load double, ptr %arrayidx, align 4 + %tmp2 = tail call double @pow(double %conv, double %tmp1) + %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv + store double %tmp2, ptr %arrayidx2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) { +; CHECK-LABEL: @pow_f64_intrinsic( +; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @pow_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv + %tmp1 = load double, ptr %arrayidx, align 4 + %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1) + %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv + store double %tmp2, ptr %arrayidx2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) { +; CHECK-LABEL: @pow_f32( +; CHECK: [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @pow_f32( +; CHECK-AVX512-VF16: [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv + %tmp1 = load float, ptr %arrayidx, align 4 + %tmp2 = tail call float @powf(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv + store float %tmp2, ptr %arrayidx2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) { +; CHECK-LABEL: @pow_f32_intrinsic( +; CHECK: [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @pow_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv + %tmp1 = load float, ptr %arrayidx, align 4 + %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv + store float %tmp2, ptr %arrayidx2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp_f64(ptr nocapture %varray) { +; CHECK-LABEL: @exp_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @exp_f64( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @exp(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp_f32(ptr nocapture %varray) { +; CHECK-LABEL: @exp_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @exp_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @expf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @exp_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @exp_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.exp.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @exp_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @exp_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.exp.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log_f64(ptr nocapture %varray) { +; CHECK-LABEL: @log_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @log_f64( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log_f32(ptr nocapture %varray) { +; CHECK-LABEL: @log_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @log_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @logf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @log_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @log_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @log_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @log_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f64(ptr nocapture %varray) { +; CHECK-LABEL: @log2_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @log2_f64( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log2(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f32(ptr nocapture %varray) { +; CHECK-LABEL: @log2_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @log2_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @log2f(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @log2_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @log2_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log2.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @log2_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @log2_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log2.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log10_f32(ptr nocapture %varray) { +; CHECK-LABEL: @log10_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @log10_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @log10f(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log10_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @log10_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @log10_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log10.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp2_f64(ptr nocapture %varray) { +; CHECK-LABEL: @exp2_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @exp2_f64( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @exp2(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp2_f32(ptr nocapture %varray) { +; CHECK-LABEL: @exp2_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @exp2_f32( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @exp2f(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp2_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @exp2_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF8-LABEL: @exp2_f64_intrinsic( +; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]]) +; CHECK-AVX512-VF8: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.exp2.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp2_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @exp2_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +; CHECK-AVX512-VF16-LABEL: @exp2_f32_intrinsic( +; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]]) +; CHECK-AVX512-VF16: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.exp2.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll b/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll index c02b031c39839b..9810d50beea736 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skylake-avx512 -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skylake-avx512 -passes=inject-tli-mappings,slp-vectorizer -vector-library=SVML -S | FileCheck %s --check-prefix=VECLIB +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skylake-avx512 -passes=inject-tli-mappings,slp-vectorizer -vector-library=AMDLIBM -S | FileCheck %s --check-prefix=AMDLIBM @src = common global [8 x double] zeroinitializer, align 64 @dst = common global [8 x double] zeroinitializer, align 64 @@ -63,7 +64,33 @@ define void @test() { ; VECLIB-NEXT: store <2 x double> [[TMP15]], ptr @dst, align 8 ; VECLIB-NEXT: ret void ; - +; AMDLIBM-LABEL: @test( +; AMDLIBM-NEXT: [[A0:%.*]] = load double, ptr @src, align 8 +; AMDLIBM-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 1), align 8 +; AMDLIBM-NEXT: [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 2), align 8 +; AMDLIBM-NEXT: [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 3), align 8 +; AMDLIBM-NEXT: [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 4), align 8 +; AMDLIBM-NEXT: [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 5), align 8 +; AMDLIBM-NEXT: [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 6), align 8 +; AMDLIBM-NEXT: [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 7), align 8 +; AMDLIBM-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 +; AMDLIBM-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A6]], i32 1 +; AMDLIBM-NEXT: [[TMP3:%.*]] = call fast <2 x double> @amd_vrd2_sin(<2 x double> [[TMP2]]) +; AMDLIBM-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[A3]], i32 0 +; AMDLIBM-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A7]], i32 1 +; AMDLIBM-NEXT: [[TMP6:%.*]] = call fast <2 x double> @amd_vrd2_sin(<2 x double> [[TMP5]]) +; AMDLIBM-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 +; AMDLIBM-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A4]], i32 1 +; AMDLIBM-NEXT: [[TMP9:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP8]]) +; AMDLIBM-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[A1]], i32 0 +; AMDLIBM-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[A5]], i32 1 +; AMDLIBM-NEXT: [[TMP12:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP11]]) +; AMDLIBM-NEXT: [[TMP13:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]] +; AMDLIBM-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP12]] +; AMDLIBM-NEXT: [[TMP15:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP14]] +; AMDLIBM-NEXT: store <2 x double> [[TMP15]], ptr @dst, align 8 +; AMDLIBM-NEXT: ret void +; %a0 = load double, ptr @src, align 8 %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 1), align 8 %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 2), align 8 diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll index 7b12de90319012..d86e44f199b391 100644 --- a/llvm/test/Transforms/Util/add-TLI-mappings.ll +++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -1,4 +1,5 @@ ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=SVML -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,SVML +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=AMDLIBM -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,AMDLIBM ; RUN: opt -mtriple=powerpc64-unknown-linux-gnu -vector-library=MASSV -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE @@ -13,6 +14,13 @@ ; SVML-SAME: ptr @__svml_log10f4, ; SVML-SAME: ptr @__svml_log10f8, ; SVML-SAME: ptr @__svml_log10f16 +; AMDLIBM-SAME: [6 x ptr] [ +; AMDLIBM-SAME: ptr @amd_vrd2_sin, +; AMDLIBM-SAME: ptr @amd_vrd4_sin, +; AMDLIBM-SAME: ptr @amd_vrd8_sin, +; AMDLIBM-SAME: ptr @amd_vrs4_log10f, +; AMDLIBM-SAME: ptr @amd_vrs8_log10f, +; AMDLIBM-SAME: ptr @amd_vrs16_log10f ; MASSV-SAME: [2 x ptr] [ ; MASSV-SAME: ptr @__sind2, ; MASSV-SAME: ptr @__log10f4 @@ -74,6 +82,7 @@ declare float @modff(float, ptr) #0 define double @sin_f64(double %in) { ; COMMON-LABEL: @sin_f64( ; SVML: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] +; AMDLIBM: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; MASSV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; ACCELERATE: call double @sin(double %{{.*}}) ; LIBMVEC-X86: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] @@ -130,6 +139,7 @@ declare void @sincospif(float, ptr, ptr) #0 define float @call_llvm.log10.f32(float %in) { ; COMMON-LABEL: @call_llvm.log10.f32( ; SVML: call float @llvm.log10.f32(float %{{.*}}) +; AMDLIBM: call float @llvm.log10.f32(float %{{.*}}) ; LIBMVEC-X86: call float @llvm.log10.f32(float %{{.*}}) ; MASSV: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; ACCELERATE: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] @@ -137,6 +147,7 @@ define float @call_llvm.log10.f32(float %in) { ; ARMPL: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; No mapping of "llvm.log10.f32" to a vector function for SVML. ; SVML-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) +; AMDLIBM-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) ; LIBMVEC-X86-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) %call = tail call float @llvm.log10.f32(float %in) ret float %call @@ -151,6 +162,13 @@ declare float @llvm.log10.f32(float) #0 ; SVML: declare <8 x float> @__svml_log10f8(<8 x float>) ; SVML: declare <16 x float> @__svml_log10f16(<16 x float>) +; AMDLIBM: declare <2 x double> @amd_vrd2_sin(<2 x double>) +; AMDLIBM: declare <4 x double> @amd_vrd4_sin(<4 x double>) +; AMDLIBM: declare <8 x double> @amd_vrd8_sin(<8 x double>) +; AMDLIBM: declare <4 x float> @amd_vrs4_log10f(<4 x float>) +; AMDLIBM: declare <8 x float> @amd_vrs8_log10f(<8 x float>) +; AMDLIBM: declare <16 x float> @amd_vrs16_log10f(<16 x float>) + ; MASSV: declare <2 x double> @__sind2(<2 x double>) ; MASSV: declare <4 x float> @__log10f4(<4 x float>) @@ -194,6 +212,11 @@ attributes #0 = { nounwind readnone } ; SVML-SAME: _ZGV_LLVM_N4v_sin(__svml_sin4), ; SVML-SAME: _ZGV_LLVM_N8v_sin(__svml_sin8)" } +; AMDLIBM: attributes #[[SIN]] = { "vector-function-abi-variant"= +; AMDLIBM-SAME: "_ZGV_LLVM_N2v_sin(amd_vrd2_sin), +; AMDLIBM-SAME: _ZGV_LLVM_N4v_sin(amd_vrd4_sin), +; AMDLIBM-SAME: _ZGV_LLVM_N8v_sin(amd_vrd8_sin)" } + ; MASSV: attributes #[[SIN]] = { "vector-function-abi-variant"= ; MASSV-SAME: "_ZGV_LLVM_N2v_sin(__sind2)" } ; MASSV: attributes #[[LOG10]] = { "vector-function-abi-variant"= From ab76e48ac2c2dbfc7d6a600b9b0dd0672e6d9439 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 15 Feb 2024 16:24:37 +0900 Subject: [PATCH 220/240] [MC/DC] Refactor: Let MCDCConditionID int16_t with zero-origin (#81257) Also, Let `NumConditions` `uint16_t`. It is smarter to handle the ID as signed. Narrowing to `int16_t` will reduce costs of handling byvalue. (See also #81221 and #81227) External behavior doesn't change. They below handle values as internal values plus 1. * `-dump-coverage-mapping` * `CoverageMappingReader.cpp` * `CoverageMappingWriter.cpp` --- clang/lib/CodeGen/CodeGenPGO.cpp | 8 +++--- clang/lib/CodeGen/CodeGenPGO.h | 2 ++ clang/lib/CodeGen/CoverageMappingGen.cpp | 28 ++++++++++--------- .../llvm/ProfileData/Coverage/MCDCTypes.h | 4 +-- .../ProfileData/Coverage/CoverageMapping.cpp | 26 ++++++++--------- .../Coverage/CoverageMappingReader.cpp | 21 ++++++++------ .../Coverage/CoverageMappingWriter.cpp | 12 +++++--- .../ProfileData/CoverageMappingTest.cpp | 10 +++---- 8 files changed, 61 insertions(+), 50 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index b5ce1aad7ea1e5..48c5e68a3b7ba4 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1031,7 +1031,7 @@ void CodeGenPGO::emitCounterRegionMapping(const Decl *D) { std::string CoverageMapping; llvm::raw_string_ostream OS(CoverageMapping); - RegionMCDCState->CondIDMap.clear(); + RegionCondIDMap.reset(new llvm::DenseMap); CoverageMappingGen MappingGen( *CGM.getCoverageMapping(), CGM.getContext().getSourceManager(), CGM.getLangOpts(), RegionCounterMap.get(), RegionMCDCState.get()); @@ -1195,8 +1195,8 @@ void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, return; // Extract the ID of the condition we are setting in the bitmap. - unsigned CondID = ExprMCDCConditionIDMapIterator->second; - assert(CondID > 0 && "Condition has no ID!"); + auto CondID = ExprMCDCConditionIDMapIterator->second; + assert(CondID >= 0 && "Condition has no ID!"); auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); @@ -1205,7 +1205,7 @@ void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, // the resulting value is used to update the boolean expression's bitmap. llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FunctionHash), - Builder.getInt32(CondID - 1), + Builder.getInt32(CondID), MCDCCondBitmapAddr.getPointer(), Val}; Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_condbitmap_update), diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h index d3c2b277238fc7..369bf05b59a0d2 100644 --- a/clang/lib/CodeGen/CodeGenPGO.h +++ b/clang/lib/CodeGen/CodeGenPGO.h @@ -36,6 +36,8 @@ class CodeGenPGO { unsigned NumRegionCounters; uint64_t FunctionHash; std::unique_ptr> RegionCounterMap; + std::unique_ptr> RegionMCDCBitmapMap; + std::unique_ptr> RegionCondIDMap; std::unique_ptr> StmtCountMap; std::unique_ptr ProfRecord; std::unique_ptr RegionMCDCState; diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 93c3c31e71fa83..fdf821a0eb6928 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -686,11 +686,12 @@ struct MCDCCoverageBuilder { llvm::SmallVector DecisionStack; MCDC::State &MCDCState; llvm::DenseMap &CondIDs; - mcdc::ConditionID NextID = 1; + mcdc::ConditionID NextID = 0; bool NotMapped = false; - /// Represent a sentinel value of [0,0] for the bottom of DecisionStack. - static constexpr mcdc::ConditionIDs DecisionStackSentinel{0, 0}; + /// Represent a sentinel value as a pair of final decisions for the bottom + // of DecisionStack. + static constexpr mcdc::ConditionIDs DecisionStackSentinel{-1, -1}; /// Is this a logical-AND operation? bool isLAnd(const BinaryOperator *E) const { @@ -705,12 +706,12 @@ struct MCDCCoverageBuilder { /// Return whether the build of the control flow map is at the top-level /// (root) of a logical operator nest in a boolean expression prior to the /// assignment of condition IDs. - bool isIdle() const { return (NextID == 1 && !NotMapped); } + bool isIdle() const { return (NextID == 0 && !NotMapped); } /// Return whether any IDs have been assigned in the build of the control /// flow map, indicating that the map is being generated for this boolean /// expression. - bool isBuilding() const { return (NextID > 1); } + bool isBuilding() const { return (NextID > 0); } /// Set the given condition's ID. void setCondID(const Expr *Cond, mcdc::ConditionID ID) { @@ -721,7 +722,7 @@ struct MCDCCoverageBuilder { mcdc::ConditionID getCondID(const Expr *Cond) const { auto I = CondIDs.find(CodeGenFunction::stripCond(Cond)); if (I == CondIDs.end()) - return 0; + return -1; else return I->second; } @@ -789,15 +790,15 @@ struct MCDCCoverageBuilder { // Reset state if not doing mapping. if (NotMapped) { NotMapped = false; - assert(NextID == 1); + assert(NextID == 0); return 0; } // Set number of conditions and reset. - unsigned TotalConds = NextID - 1; + unsigned TotalConds = NextID; // Reset ID back to beginning. - NextID = 1; + NextID = 0; return TotalConds; } @@ -889,7 +890,7 @@ struct CounterCoverageMappingBuilder return RegionStack.size() - 1; } - size_t pushRegion(unsigned BitmapIdx, unsigned Conditions, + size_t pushRegion(unsigned BitmapIdx, uint16_t Conditions, std::optional StartLoc = std::nullopt, std::optional EndLoc = std::nullopt) { @@ -1038,7 +1039,7 @@ struct CounterCoverageMappingBuilder if (CodeGenFunction::isInstrumentedCondition(C)) { mcdc::Parameters BranchParams; mcdc::ConditionID ID = MCDCBuilder.getCondID(C); - if (ID > 0) + if (ID >= 0) BranchParams = mcdc::BranchParameters{ID, Conds}; // If a condition can fold to true or false, the corresponding branch @@ -2125,8 +2126,9 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, if (const auto *BranchParams = std::get_if(&R.MCDCParams)) { - OS << " [" << BranchParams->ID << "," << BranchParams->Conds[true]; - OS << "," << BranchParams->Conds[false] << "] "; + OS << " [" << BranchParams->ID + 1 << "," + << BranchParams->Conds[true] + 1; + OS << "," << BranchParams->Conds[false] + 1 << "] "; } if (R.Kind == CounterMappingRegion::ExpansionRegion) diff --git a/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h b/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h index 61272174fef827..51f528b7e78804 100644 --- a/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h +++ b/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h @@ -19,7 +19,7 @@ namespace llvm::coverage::mcdc { /// The ID for MCDCBranch. -using ConditionID = unsigned int; +using ConditionID = int16_t; using ConditionIDs = std::array; struct DecisionParameters { @@ -27,7 +27,7 @@ struct DecisionParameters { unsigned BitmapIdx; /// Number of Conditions used for a Decision Region. - unsigned NumConditions; + uint16_t NumConditions; DecisionParameters() = delete; DecisionParameters(unsigned BitmapIdx, unsigned NumConditions) diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index 9adeceb1faee2b..ddce7580729170 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -272,17 +272,17 @@ class MCDCRecordProcessor { // Walk the binary decision diagram and try assigning both false and true to // each node. When a terminal node (ID == 0) is reached, fill in the value in // the truth table. - void buildTestVector(MCDCRecord::TestVector &TV, unsigned ID, + void buildTestVector(MCDCRecord::TestVector &TV, mcdc::ConditionID ID, unsigned Index) { - assert((Index & (1 << (ID - 1))) == 0); + assert((Index & (1 << ID)) == 0); for (auto MCDCCond : {MCDCRecord::MCDC_False, MCDCRecord::MCDC_True}) { static_assert(MCDCRecord::MCDC_False == 0); static_assert(MCDCRecord::MCDC_True == 1); - Index |= MCDCCond << (ID - 1); - TV[ID - 1] = MCDCCond; + Index |= MCDCCond << ID; + TV[ID] = MCDCCond; auto NextID = CondsMap[ID][MCDCCond]; - if (NextID > 0) { + if (NextID >= 0) { buildTestVector(TV, NextID, Index); continue; } @@ -299,17 +299,17 @@ class MCDCRecordProcessor { } // Reset back to DontCare. - TV[ID - 1] = MCDCRecord::MCDC_DontCare; + TV[ID] = MCDCRecord::MCDC_DontCare; } /// Walk the bits in the bitmap. A bit set to '1' indicates that the test /// vector at the corresponding index was executed during a test run. void findExecutedTestVectors() { // Walk the binary decision diagram to enumerate all possible test vectors. - // We start at the root node (ID == 1) with all values being DontCare. + // We start at the root node (ID == 0) with all values being DontCare. // `Index` encodes the bitmask of true values and is initially 0. MCDCRecord::TestVector TV(NumConditions, MCDCRecord::MCDC_DontCare); - buildTestVector(TV, 1, 0); + buildTestVector(TV, 0, 0); } // Find an independence pair for each condition: @@ -371,7 +371,7 @@ class MCDCRecordProcessor { for (const auto *B : Branches) { const auto &BranchParams = B->getBranchParams(); CondsMap[BranchParams.ID] = BranchParams.Conds; - PosToID[I] = BranchParams.ID - 1; + PosToID[I] = BranchParams.ID; CondLoc[I] = B->startLoc(); Folded[I++] = (B->Count.isZero() && B->FalseCount.isZero()); } @@ -566,10 +566,10 @@ class MCDCDecisionRecorder { assert(Branch.Kind == CounterMappingRegion::MCDCBranchRegion); auto ConditionID = Branch.getBranchParams().ID; - assert(ConditionID > 0 && "ConditionID should begin with 1"); + assert(ConditionID >= 0 && "ConditionID should be positive"); if (ConditionIDs.contains(ConditionID) || - ConditionID > DecisionParams.NumConditions) + ConditionID >= DecisionParams.NumConditions) return NotProcessed; if (!this->dominates(Branch)) @@ -577,9 +577,9 @@ class MCDCDecisionRecorder { assert(MCDCBranches.size() < DecisionParams.NumConditions); - // Put `ID=1` in front of `MCDCBranches` for convenience + // Put `ID=0` in front of `MCDCBranches` for convenience // even if `MCDCBranches` is not topological. - if (ConditionID == 1) + if (ConditionID == 0) MCDCBranches.insert(MCDCBranches.begin(), &Branch); else MCDCBranches.push_back(&Branch); diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index de7be523ef33ca..d328460510830a 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -244,7 +244,9 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray( unsigned LineStart = 0; for (size_t I = 0; I < NumRegions; ++I) { Counter C, C2; - uint64_t BIDX, NC, ID, TID, FID; + uint64_t BIDX, NC; + // They are stored as internal values plus 1 (min is -1) + uint64_t ID1, TID1, FID1; mcdc::Parameters Params; CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion; @@ -303,28 +305,29 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray( return Err; if (auto Err = readCounter(C2)) return Err; - if (auto Err = readIntMax(ID, std::numeric_limits::max())) + if (auto Err = readIntMax(ID1, std::numeric_limits::max())) return Err; - if (auto Err = readIntMax(TID, std::numeric_limits::max())) + if (auto Err = readIntMax(TID1, std::numeric_limits::max())) return Err; - if (auto Err = readIntMax(FID, std::numeric_limits::max())) + if (auto Err = readIntMax(FID1, std::numeric_limits::max())) return Err; - if (ID == 0) + if (ID1 == 0) return make_error( coveragemap_error::malformed, "MCDCConditionID shouldn't be zero"); Params = mcdc::BranchParameters{ - static_cast(ID), - {static_cast(FID), static_cast(TID)}}; + static_cast(static_cast(ID1) - 1), + {static_cast(static_cast(FID1) - 1), + static_cast(static_cast(TID1) - 1)}}; break; case CounterMappingRegion::MCDCDecisionRegion: Kind = CounterMappingRegion::MCDCDecisionRegion; if (auto Err = readIntMax(BIDX, std::numeric_limits::max())) return Err; - if (auto Err = readIntMax(NC, std::numeric_limits::max())) + if (auto Err = readIntMax(NC, std::numeric_limits::max())) return Err; Params = mcdc::DecisionParameters{static_cast(BIDX), - static_cast(NC)}; + static_cast(NC)}; break; default: return make_error(coveragemap_error::malformed, diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index 6125cce0fa4cd9..5036bde5aca723 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -253,12 +253,16 @@ void CoverageMappingWriter::write(raw_ostream &OS) { writeCounter(MinExpressions, Count, OS); writeCounter(MinExpressions, FalseCount, OS); { + // They are written as internal values plus 1. const auto &BranchParams = I->getBranchParams(); ParamsShouldBeNull = false; - assert(BranchParams.ID > 0); - encodeULEB128(static_cast(BranchParams.ID), OS); - encodeULEB128(static_cast(BranchParams.Conds[true]), OS); - encodeULEB128(static_cast(BranchParams.Conds[false]), OS); + assert(BranchParams.ID >= 0); + unsigned ID1 = BranchParams.ID + 1; + unsigned TID1 = BranchParams.Conds[true] + 1; + unsigned FID1 = BranchParams.Conds[false] + 1; + encodeULEB128(ID1, OS); + encodeULEB128(TID1, OS); + encodeULEB128(FID1, OS); } break; case CounterMappingRegion::MCDCDecisionRegion: diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp index db6689bc58839c..425b3d10510af7 100644 --- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp +++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp @@ -192,7 +192,7 @@ struct CoverageMappingTest : ::testing::TestWithParam> { addCMR(Counter::getZero(), File, LS, CS, LE, CE, true); } - void addMCDCDecisionCMR(unsigned Mask, unsigned NC, StringRef File, + void addMCDCDecisionCMR(unsigned Mask, uint16_t NC, StringRef File, unsigned LS, unsigned CS, unsigned LE, unsigned CE) { auto &Regions = InputFunctions.back().Regions; unsigned FileID = getFileIndexForFunction(File); @@ -872,9 +872,9 @@ TEST_P(CoverageMappingTest, non_code_region_bitmask) { addCMR(Counter::getCounter(3), "file", 1, 1, 5, 5); addMCDCDecisionCMR(0, 2, "file", 7, 1, 7, 6); - addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 1, {0, 2}, + addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 0, {-1, 1}, "file", 7, 2, 7, 3); - addMCDCBranchCMR(Counter::getCounter(2), Counter::getCounter(3), 2, {0, 0}, + addMCDCBranchCMR(Counter::getCounter(2), Counter::getCounter(3), 1, {-1, -1}, "file", 7, 4, 7, 5); EXPECT_THAT_ERROR(loadCoverageMapping(), Succeeded()); @@ -900,10 +900,10 @@ TEST_P(CoverageMappingTest, decision_before_expansion) { addExpansionCMR("foo", "B", 4, 19, 4, 20); addCMR(Counter::getCounter(0), "A", 1, 14, 1, 17); addCMR(Counter::getCounter(0), "A", 1, 14, 1, 17); - addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 1, {0, 2}, + addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 0, {-1, 1}, "A", 1, 14, 1, 17); addCMR(Counter::getCounter(1), "B", 1, 14, 1, 17); - addMCDCBranchCMR(Counter::getCounter(1), Counter::getCounter(2), 2, {0, 0}, + addMCDCBranchCMR(Counter::getCounter(1), Counter::getCounter(2), 1, {-1, -1}, "B", 1, 14, 1, 17); // InputFunctionCoverageData::Regions is rewritten after the write. From 28d4385020920ec5f54ef3de4e69f3649d417d81 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Thu, 15 Feb 2024 08:53:52 +0100 Subject: [PATCH 221/240] [bazel][libc] Fix BUILD after ff409d39ce4673c70f474c3fdb7120bab8f94eef. --- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 198c110b7e304e..1518d7910299d0 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -724,6 +724,7 @@ libc_support_library( ":__support_cpp_bit", ":__support_cpp_limits", ":__support_cpp_type_traits", + ":__support_fputil_dyadic_float", ":__support_fputil_fp_bits", ":__support_fputil_nearest_integer_operations", ":__support_fputil_normal_float", From 5f6e0f35f936495563b5758a7ff9d4417a9f651b Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Thu, 15 Feb 2024 09:04:42 +0100 Subject: [PATCH 222/240] [flang][runtime] Destroy nested allocatable components (#81117) The runtime was currently only deallocating the direct allocatable components, which caused leaks when there are allocatable components nested in the direct components. Update Destroy to recursively destroy components. Also call Destroy from Assign to deallocate nested allocatable components before doing the assignment as required by F2018 9.7.3.2 point 7. This lack of deallocation was visible if the nested components had user defined assignment "observing" the allocation state. --- flang/runtime/assign.cpp | 4 +++ flang/runtime/derived.cpp | 59 ++++++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/flang/runtime/assign.cpp b/flang/runtime/assign.cpp index 879b413efe1270..25d2ba4501c115 100644 --- a/flang/runtime/assign.cpp +++ b/flang/runtime/assign.cpp @@ -320,6 +320,8 @@ RT_API_ATTRS static void Assign( if ((flags & NeedFinalization) && toDerived) { Finalize(to, *toDerived, &terminator); flags &= ~NeedFinalization; + } else if (toDerived && !toDerived->noDestructionNeeded()) { + Destroy(to, /*finalize=*/false, *toDerived, &terminator); } } else { to.Destroy((flags & NeedFinalization) != 0, /*destroyPointers=*/false, @@ -389,6 +391,8 @@ RT_API_ATTRS static void Assign( // The target is first finalized if still necessary (7.5.6.3(1)) if (flags & NeedFinalization) { Finalize(to, *updatedToDerived, &terminator); + } else if (updatedToDerived && !updatedToDerived->noDestructionNeeded()) { + Destroy(to, /*finalize=*/false, *updatedToDerived, &terminator); } // Copy the data components (incl. the parent) first. const Descriptor &componentDesc{updatedToDerived->component()}; diff --git a/flang/runtime/derived.cpp b/flang/runtime/derived.cpp index 8a0d0ab2bb7836..67eb901c1a3d9a 100644 --- a/flang/runtime/derived.cpp +++ b/flang/runtime/derived.cpp @@ -17,6 +17,19 @@ namespace Fortran::runtime { RT_OFFLOAD_API_GROUP_BEGIN +// Fill "extents" array with the extents of component "comp" from derived type +// instance "derivedInstance". +static RT_API_ATTRS void GetComponentExtents(SubscriptValue (&extents)[maxRank], + const typeInfo::Component &comp, const Descriptor &derivedInstance) { + const typeInfo::Value *bounds{comp.bounds()}; + for (int dim{0}; dim < comp.rank(); ++dim) { + SubscriptValue lb{bounds[2 * dim].GetValue(&derivedInstance).value_or(0)}; + SubscriptValue ub{ + bounds[2 * dim + 1].GetValue(&derivedInstance).value_or(0)}; + extents[dim] = ub >= lb ? ub - lb + 1 : 0; + } +} + RT_API_ATTRS int Initialize(const Descriptor &instance, const typeInfo::DerivedType &derived, Terminator &terminator, bool hasStat, const Descriptor *errMsg) { @@ -77,22 +90,15 @@ RT_API_ATTRS int Initialize(const Descriptor &instance, comp.derivedType() && !comp.derivedType()->noInitializationNeeded()) { // Default initialization of non-pointer non-allocatable/automatic // data component. Handles parent component's elements. Recursive. - SubscriptValue extent[maxRank]; - const typeInfo::Value *bounds{comp.bounds()}; - for (int dim{0}; dim < comp.rank(); ++dim) { - typeInfo::TypeParameterValue lb{ - bounds[2 * dim].GetValue(&instance).value_or(0)}; - typeInfo::TypeParameterValue ub{ - bounds[2 * dim + 1].GetValue(&instance).value_or(0)}; - extent[dim] = ub >= lb ? ub - lb + 1 : 0; - } + SubscriptValue extents[maxRank]; + GetComponentExtents(extents, comp, instance); StaticDescriptor staticDescriptor; Descriptor &compDesc{staticDescriptor.descriptor()}; const typeInfo::DerivedType &compType{*comp.derivedType()}; for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) { compDesc.Establish(compType, instance.ElementComponent(at, comp.offset()), comp.rank(), - extent); + extents); stat = Initialize(compDesc, compType, terminator, hasStat, errMsg); if (stat != StatOk) { break; @@ -253,14 +259,8 @@ RT_API_ATTRS void Finalize(const Descriptor &descriptor, } } else if (comp.genre() == typeInfo::Component::Genre::Data && comp.derivedType() && !comp.derivedType()->noFinalizationNeeded()) { - SubscriptValue extent[maxRank]; - const typeInfo::Value *bounds{comp.bounds()}; - for (int dim{0}; dim < comp.rank(); ++dim) { - SubscriptValue lb{bounds[2 * dim].GetValue(&descriptor).value_or(0)}; - SubscriptValue ub{ - bounds[2 * dim + 1].GetValue(&descriptor).value_or(0)}; - extent[dim] = ub >= lb ? ub - lb + 1 : 0; - } + SubscriptValue extents[maxRank]; + GetComponentExtents(extents, comp, descriptor); StaticDescriptor staticDescriptor; Descriptor &compDesc{staticDescriptor.descriptor()}; const typeInfo::DerivedType &compType{*comp.derivedType()}; @@ -268,7 +268,7 @@ RT_API_ATTRS void Finalize(const Descriptor &descriptor, descriptor.IncrementSubscripts(at)) { compDesc.Establish(compType, descriptor.ElementComponent(at, comp.offset()), comp.rank(), - extent); + extents); Finalize(compDesc, compType, terminator); } } @@ -296,6 +296,8 @@ RT_API_ATTRS void Destroy(const Descriptor &descriptor, bool finalize, if (finalize && !derived.noFinalizationNeeded()) { Finalize(descriptor, derived, terminator); } + // Deallocate all direct and indirect allocatable and automatic components. + // Contrary to finalization, the order of deallocation does not matter. const Descriptor &componentDesc{derived.component()}; std::size_t myComponents{componentDesc.Elements()}; std::size_t elements{descriptor.Elements()}; @@ -304,14 +306,33 @@ RT_API_ATTRS void Destroy(const Descriptor &descriptor, bool finalize, for (std::size_t k{0}; k < myComponents; ++k) { const auto &comp{ *componentDesc.ZeroBasedIndexedElement(k)}; + const bool destroyComp{ + comp.derivedType() && !comp.derivedType()->noDestructionNeeded()}; if (comp.genre() == typeInfo::Component::Genre::Allocatable || comp.genre() == typeInfo::Component::Genre::Automatic) { for (std::size_t j{0}; j < elements; ++j) { Descriptor *d{ descriptor.ElementComponent(at, comp.offset())}; + if (destroyComp) { + Destroy(*d, /*finalize=*/false, *comp.derivedType(), terminator); + } d->Deallocate(); descriptor.IncrementSubscripts(at); } + } else if (destroyComp && + comp.genre() == typeInfo::Component::Genre::Data) { + SubscriptValue extents[maxRank]; + GetComponentExtents(extents, comp, descriptor); + StaticDescriptor staticDescriptor; + Descriptor &compDesc{staticDescriptor.descriptor()}; + const typeInfo::DerivedType &compType{*comp.derivedType()}; + for (std::size_t j{0}; j++ < elements; + descriptor.IncrementSubscripts(at)) { + compDesc.Establish(compType, + descriptor.ElementComponent(at, comp.offset()), comp.rank(), + extents); + Destroy(compDesc, /*finalize=*/false, *comp.derivedType(), terminator); + } } } } From 0d0bd3ef55a2317c4254e97a3657e702bcf22214 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Thu, 15 Feb 2024 09:05:33 +0100 Subject: [PATCH 223/240] [flang] Deep copy nested allocatable components in transformational (#81736) Spread, reshape, pack, and other transformational intrinsic runtimes are using `CopyElement` utility to copy elements. This utility was dealing with deep copies, but only when the allocatable components where "immediate" components of the type being copied. If the allocatable components were nested inside a nonpointer/nonallocatable component, they were not deep copied, leading to bugs later when manipulating the value (or double free when applying #81117). Visit data components with allocatable components (using the noDestructionNeeded flag to avoid expensive and useless type visit when there are no such components). --- flang/runtime/copy.cpp | 26 ++++++++++++++++++++++++-- flang/runtime/derived.cpp | 13 +++---------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/flang/runtime/copy.cpp b/flang/runtime/copy.cpp index 9e62d1e24a4731..7cf94836541415 100644 --- a/flang/runtime/copy.cpp +++ b/flang/runtime/copy.cpp @@ -20,11 +20,13 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[], const Descriptor &from, const SubscriptValue fromAt[], Terminator &terminator) { char *toPtr{to.Element(toAt)}; - const char *fromPtr{from.Element(fromAt)}; + char *fromPtr{from.Element(fromAt)}; RUNTIME_CHECK(terminator, to.ElementBytes() == from.ElementBytes()); std::memcpy(toPtr, fromPtr, to.ElementBytes()); + // Deep copy allocatable and automatic components if any. if (const auto *addendum{to.Addendum()}) { - if (const auto *derived{addendum->derivedType()}) { + if (const auto *derived{addendum->derivedType()}; + derived && !derived->noDestructionNeeded()) { RUNTIME_CHECK(terminator, from.Addendum() && derived == from.Addendum()->derivedType()); const Descriptor &componentDesc{derived->component()}; @@ -43,6 +45,26 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[], fromPtr + component->offset())}; CopyArray(toDesc, fromDesc, terminator); } + } else if (component->genre() == typeInfo::Component::Genre::Data && + component->derivedType() && + !component->derivedType()->noDestructionNeeded()) { + SubscriptValue extents[maxRank]; + const typeInfo::Value *bounds{component->bounds()}; + for (int dim{0}; dim < component->rank(); ++dim) { + SubscriptValue lb{bounds[2 * dim].GetValue(&to).value_or(0)}; + SubscriptValue ub{bounds[2 * dim + 1].GetValue(&to).value_or(0)}; + extents[dim] = ub >= lb ? ub - lb + 1 : 0; + } + const typeInfo::DerivedType &compType{*component->derivedType()}; + StaticDescriptor toStaticDescriptor; + Descriptor &toCompDesc{toStaticDescriptor.descriptor()}; + toCompDesc.Establish(compType, toPtr + component->offset(), + component->rank(), extents); + StaticDescriptor fromStaticDescriptor; + Descriptor &fromCompDesc{fromStaticDescriptor.descriptor()}; + fromCompDesc.Establish(compType, fromPtr + component->offset(), + component->rank(), extents); + CopyArray(toCompDesc, fromCompDesc, terminator); } } } diff --git a/flang/runtime/derived.cpp b/flang/runtime/derived.cpp index 67eb901c1a3d9a..0d9e033df4e27e 100644 --- a/flang/runtime/derived.cpp +++ b/flang/runtime/derived.cpp @@ -340,16 +340,9 @@ RT_API_ATTRS void Destroy(const Descriptor &descriptor, bool finalize, RT_API_ATTRS bool HasDynamicComponent(const Descriptor &descriptor) { if (const DescriptorAddendum * addendum{descriptor.Addendum()}) { if (const auto *derived = addendum->derivedType()) { - const Descriptor &componentDesc{derived->component()}; - std::size_t myComponents{componentDesc.Elements()}; - for (std::size_t k{0}; k < myComponents; ++k) { - const auto &comp{ - *componentDesc.ZeroBasedIndexedElement(k)}; - if (comp.genre() == typeInfo::Component::Genre::Allocatable || - comp.genre() == typeInfo::Component::Genre::Automatic) { - return true; - } - } + // Destruction is needed if and only if there are direct or indirect + // allocatable or automatic components. + return !derived->noDestructionNeeded(); } } return false; From e769fb8699e3fa8e40623764f7713bfc783b0330 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Thu, 15 Feb 2024 09:06:42 +0100 Subject: [PATCH 224/240] [flang] prevent legacy lowering from being called in pointer assignment (#81750) When doing a pointer assignment with an RHS that is an array section, the code fell in the legacy lowering code even with HLFIR enabled. Escape this old code when HLFIR is on. Should fix #80884. --- flang/lib/Lower/Bridge.cpp | 9 +++++++- flang/test/Lower/HLFIR/issue80884.f90 | 32 +++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/HLFIR/issue80884.f90 diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 76e127207d764e..2d7f748cefa2d8 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3419,6 +3419,13 @@ class FirConverter : public Fortran::lower::AbstractConverter { fir::factory::disassociateMutableBox(*builder, loc, lhs); return; } + if (lowerToHighLevelFIR()) { + fir::ExtendedValue rhs = genExprAddr(assign.rhs, stmtCtx); + fir::factory::associateMutableBoxWithRemap(*builder, loc, lhs, rhs, + lbounds, ubounds); + return; + } + // Legacy lowering below. // Do not generate a temp in case rhs is an array section. fir::ExtendedValue rhs = Fortran::lower::isArraySectionWithoutVectorSubscript(assign.rhs) @@ -3427,7 +3434,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { : genExprAddr(assign.rhs, stmtCtx); fir::factory::associateMutableBoxWithRemap(*builder, loc, lhs, rhs, lbounds, ubounds); - if (!lowerToHighLevelFIR() && explicitIterationSpace()) { + if (explicitIterationSpace()) { mlir::ValueRange inners = explicitIterSpace.getInnerArgs(); if (!inners.empty()) builder->create(loc, inners); diff --git a/flang/test/Lower/HLFIR/issue80884.f90 b/flang/test/Lower/HLFIR/issue80884.f90 new file mode 100644 index 00000000000000..2a7792b6004c4d --- /dev/null +++ b/flang/test/Lower/HLFIR/issue80884.f90 @@ -0,0 +1,32 @@ +! Test lowering of pointer remapping with component ref in the RHS. +! RUN: bbc -emit-hlfir -o - %s -I nw | FileCheck %s + +subroutine issue80884(p, targ) + type t0 + real :: array(10, 10) + end type + type, extends(t0) :: t + end type + type(t), target :: targ + real, pointer :: p(:) + p(1:100) => targ%array +end subroutine +! CHECK-LABEL: func.func @_QPissue80884( +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFissue80884Ep"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFissue80884Etarg"} : (!fir.ref}>}>>) -> (!fir.ref}>}>>, !fir.ref}>}>>) +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i64 +! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i64 +! CHECK: %[[VAL_6:.*]] = hlfir.designate %[[VAL_3]]#0{"t0"} : (!fir.ref}>}>>) -> !fir.ref}>> +! CHECK: %[[VAL_7:.*]] = arith.constant 10 : index +! CHECK: %[[VAL_8:.*]] = arith.constant 10 : index +! CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_7]], %[[VAL_8]] : (index, index) -> !fir.shape<2> +! CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_6]]{"array"} shape %[[VAL_9]] : (!fir.ref}>>, !fir.shape<2>) -> !fir.ref> +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_4]] : (i64) -> index +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_5]] : (i64) -> index +! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_13]], %[[VAL_12]] : index +! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_11]] : index +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_10]] : (!fir.ref>) -> !fir.ref> +! CHECK: %[[VAL_17:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_15]] : (i64, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_18:.*]] = fir.embox %[[VAL_16]](%[[VAL_17]]) : (!fir.ref>, !fir.shapeshift<1>) -> !fir.box>> +! CHECK: fir.store %[[VAL_18]] to %[[VAL_2]]#1 : !fir.ref>>> From cd55e230e629f57db742e831c79488d04f68f4e7 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 15 Feb 2024 17:11:28 +0800 Subject: [PATCH 225/240] [RISCV] Use $noreg in vsetvli-insert.mir test. NFC This reflects what actually comes out of SelectionDAG after the noreg passthru peephole added in a63bd7e99b00c. --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir index a37a672e30a9eb..1850abe6363bc9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -320,8 +320,8 @@ body: | ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) - ; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 $x0, 152 /* e64, m1, tu, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 undef $v2, 0, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype @@ -331,7 +331,7 @@ body: | %0:gpr = COPY $x10 %pt:vr = IMPLICIT_DEF %1:vr = PseudoVLE64_V_M1 %pt, %0, 2, 6, 0 :: (load (s128) from %ir.x) - %2:vr = PseudoVMV_V_I_M1 undef $v2, 0, -1, 6, 0 + %2:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 6, 0 %4:vr = IMPLICIT_DEF %3:vr = PseudoVREDSUM_VS_M1_E8 %4, killed %1, killed %2, 2, 6, 1 %5:gpr = PseudoVMV_X_S killed %3, 6 @@ -442,12 +442,12 @@ body: | ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 4, 217 /* e64, m2, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVID_V_M2_:%[0-9]+]]:vrm2 = PseudoVID_V_M2 %pt, 4, 6 /* e64 */, 3 /* ta, ma */, implicit $vl, implicit $vtype - ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 134 /* e8, mf4, tu, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl - ; CHECK-NEXT: [[PseudoVMV_V_I_MF4_:%[0-9]+]]:vr = PseudoVMV_V_I_MF4 undef [[PseudoVMV_V_I_MF4_]], 0, 4, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 198 /* e8, mf4, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl + ; CHECK-NEXT: [[PseudoVMV_V_I_MF4_:%[0-9]+]]:vr = PseudoVMV_V_I_MF4 $noreg, 0, 4, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET %pt:vrm2 = IMPLICIT_DEF %0:vrm2 = PseudoVID_V_M2 %pt, 4, 6, 3 - %4:vr = PseudoVMV_V_I_MF4 undef %4, 0, 4, 3, 0 + %4:vr = PseudoVMV_V_I_MF4 $noreg, 0, 4, 3, 0 PseudoRET ... --- @@ -484,7 +484,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: $x0 = PseudoVSETIVLI 2, 215 /* e32, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVMV_X_S_:%[0-9]+]]:gpr = PseudoVMV_X_S $noreg, 5 /* e32 */, implicit $vtype + ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S $noreg, 5 /* e32 */, implicit $vtype ; CHECK-NEXT: [[PseudoVMV_V_I_MF2_1:%[0-9]+]]:vr = PseudoVMV_V_I_MF2 $noreg, 1, 2, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET bb.0: From 4a32a414ee07bf15c47ba7fb7286d968e73a04fe Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Thu, 15 Feb 2024 10:15:16 +0100 Subject: [PATCH 226/240] [clang] Fix two gcc warnings about unused variables [NFC] Without the fix gcc warns like ../../clang/lib/Sema/SemaDecl.cpp:2963:24: warning: unused variable 'SupA' [-Wunused-variable] 2963 | else if (const auto *SupA = dyn_cast(Attr)) | ^~~~ and ../../clang/lib/Driver/Driver.cpp:4192:17: warning: unused variable 'IAA' [-Wunused-variable] 4192 | if (auto *IAA = dyn_cast(Current)) { | ^~~ Remove the unused variables and change the "dyn_cast"s into "isa"s. --- clang/lib/Driver/Driver.cpp | 2 +- clang/lib/Sema/SemaDecl.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index cf84ef21dfa8ce..5a323bf4c0c5f4 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4189,7 +4189,7 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, break; } - if (auto *IAA = dyn_cast(Current)) { + if (isa(Current)) { Current = nullptr; break; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 09a35fddba1954..0aaaba0e5d15f9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2960,7 +2960,7 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D, S.mergeHLSLNumThreadsAttr(D, *NT, NT->getX(), NT->getY(), NT->getZ()); else if (const auto *SA = dyn_cast(Attr)) NewAttr = S.mergeHLSLShaderAttr(D, *SA, SA->getType()); - else if (const auto *SupA = dyn_cast(Attr)) + else if (isa(Attr)) // Do nothing. Each redeclaration should be suppressed separately. NewAttr = nullptr; else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr)) From ba279934c6ab09d5394a89d8318651aefd8d565b Mon Sep 17 00:00:00 2001 From: Paul Semel Date: Thu, 15 Feb 2024 10:59:51 +0100 Subject: [PATCH 227/240] [dataflow] Fix crash when InitListExpr is not a prvalue (#80970) --- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 7 ++++--- .../Analysis/FlowSensitive/TransferTest.cpp | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index f0b15f43b1f423..fc7395457f551d 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -664,9 +664,10 @@ class TransferVisitor : public ConstStmtVisitor { QualType Type = S->getType(); if (!Type->isStructureOrClassType()) { - if (auto *Val = Env.createValue(Type)) - Env.setValue(*S, *Val); - + // Until array initialization is implemented, we don't need to care about + // cases where `getNumInits() > 1`. + if (S->getNumInits() == 1) + propagateValueOrStorageLocation(*S->getInit(0), *S, Env); return; } diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 4b3b3511f848e8..87e6e83d2e03a9 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -2349,6 +2349,24 @@ TEST(TransferTest, AssignmentOperatorReturnsByValue) { ASTContext &ASTCtx) {}); } +TEST(TransferTest, InitListExprAsXValue) { + // This is a crash repro. + std::string Code = R"( + void target() { + bool&& Foo{false}; + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + const auto &FooVal = getValueForDecl(ASTCtx, Env, "Foo"); + ASSERT_TRUE(FooVal.formula().isLiteral(false)); + }); +} + TEST(TransferTest, CopyConstructor) { std::string Code = R"( struct A { From c6a7c4d70a7c25f73e46b7d2e7e4867c3b78223e Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 15 Feb 2024 02:03:03 -0800 Subject: [PATCH 228/240] [AMDGPU] Add 256-bit vdst and 96-bit src to profile switches. NFC. (#81801) I need these operands for a future patch. Also simplify conditions there. If nothing using !cond instead of nesting !if's does not need to realign code every time a new type is added. --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 62 +++++++++--------------- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 5 ++ 2 files changed, 28 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 22599773d562cb..4b2b79335c8a20 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1468,11 +1468,12 @@ class getVALUDstForVT { defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, VOPDstOperand_t16Lo128), VOPDstOperand); - RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, - !if(!eq(VT.Size, 128), VOPDstOperand, - !if(!eq(VT.Size, 64), VOPDstOperand, - !if(!eq(VT.Size, 16), op16, - VOPDstS64orS32)))); // else VT == i1 + RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand, + !eq(VT.Size, 128) : VOPDstOperand, + !eq(VT.Size, 64) : VOPDstOperand, + !eq(VT.Size, 32) : VOPDstOperand, + !eq(VT.Size, 16) : op16, + 1 : VOPDstS64orS32); // else VT == i1 } class getVALUDstForVT_fake16 { @@ -1556,40 +1557,23 @@ class getSDWASrcForVT { // given VT. class getVOP3SrcForVT { RegisterOperand ret = - !if(!eq(VT.Size, 128), - VRegSrc_128, - !if(!eq(VT.Size, 64), - !if(VT.isFP, - !if(!eq(VT.Value, v2f32.Value), - VSrc_v2f32, - VSrc_f64), - !if(!eq(VT.Value, v2i32.Value), - VSrc_v2b32, - VSrc_b64)), - !if(!eq(VT.Value, i1.Value), - SSrc_i1, - !if(VT.isFP, - !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), - !if(IsTrue16, VSrcT_f16, VSrc_f16), - !if(!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)), - VSrc_v2f16, - !if(!or(!eq(VT.Value, v4f16.Value), !eq(VT.Value, v4bf16.Value)), - AVSrc_64, - VSrc_f32 - ) - ) - ), - !if(!eq(VT.Value, i16.Value), - !if(IsTrue16, VSrcT_b16, VSrc_b16), - !if(!eq(VT.Value, v2i16.Value), - VSrc_v2b16, - VSrc_b32 - ) - ) - ) - ) - ) - ); + !cond(!eq(VT, f64) : VSrc_f64, + !eq(VT, f32) : VSrc_f32, + !eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16), + !eq(VT, bf16) : !if(IsTrue16, VSrcT_f16, VSrc_f16), + !eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16), + !eq(VT, i1) : SSrc_i1, + !eq(VT, v2f32) : VSrc_v2f32, + !eq(VT, v2i32) : VSrc_v2b32, + !eq(VT, v2f16) : VSrc_v2f16, + !eq(VT, v2bf16) : VSrc_v2f16, + !eq(VT, v2i16) : VSrc_v2b16, + !eq(VT, v4f16) : AVSrc_64, + !eq(VT, v4bf16) : AVSrc_64, + !eq(VT.Size, 128) : VRegSrc_128, + !eq(VT.Size, 96) : VRegSrc_96, + !eq(VT.Size, 64) : VSrc_b64, + 1 : VSrc_b32); } // Src2 of VOP3 DPP instructions cannot be a literal diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index d4a1e8d185a1d5..176b3c199eafde 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1214,6 +1214,7 @@ class SrcReg9 : RegisterOperand def VRegSrc_32 : SrcReg9; def VRegSrc_64 : SrcReg9; +def VRegSrc_96 : SrcReg9; def VRegSrc_128: SrcReg9; def VRegSrc_256: SrcReg9; def VRegOrLdsSrc_32 : SrcReg9; @@ -1230,6 +1231,10 @@ def VGPRSrc_32_Lo128 : RegisterOperand { let DecoderMethod = "DecodeVGPR_32RegisterClass"; } +def VGPRSrc_96 : RegisterOperand { + let DecoderMethod = "DecodeVReg_96RegisterClass"; +} + def VGPRSrc_16_Lo128 : RegisterOperand { let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass"; let EncoderMethod = "getMachineOpValueT16Lo128"; From f6f8e202f59f54429878f41bcc9aea614613a4af Mon Sep 17 00:00:00 2001 From: chuongg3 Date: Thu, 15 Feb 2024 10:09:20 +0000 Subject: [PATCH 229/240] [AArch64][GlobalISel] Refactor Combine G_CONCAT_VECTOR (#80866) The combine now works using tablegen and checks if new instruction is legal before creating it. --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 20 +- .../include/llvm/Target/GlobalISel/Combine.td | 13 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 37 +- llvm/lib/Target/AArch64/AArch64Combine.td | 2 +- .../GISel/AArch64O0PreLegalizerCombiner.cpp | 2 - .../GISel/AArch64PreLegalizerCombiner.cpp | 2 - .../AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 2 - llvm/test/CodeGen/AArch64/itofp.ll | 570 +++++++++--------- .../AArch64/neon-bitwise-instructions.ll | 14 +- llvm/test/CodeGen/AArch64/vecreduce-add.ll | 352 +++++------ 10 files changed, 506 insertions(+), 508 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 10eeafdd09a8ee..6a805ee40a7d8f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -224,22 +224,18 @@ class CombinerHelper { /// - concat_vector(undef, undef) => undef /// - concat_vector(build_vector(A, B), build_vector(C, D)) => /// build_vector(A, B, C, D) - /// - /// \pre MI.getOpcode() == G_CONCAT_VECTORS. - bool tryCombineConcatVectors(MachineInstr &MI); + /// ========================================================== /// Check if the G_CONCAT_VECTORS \p MI is undef or if it /// can be flattened into a build_vector. - /// In the first case \p IsUndef will be true. - /// In the second case \p Ops will contain the operands needed - /// to produce the flattened build_vector. + /// In the first case \p Ops will be empty + /// In the second case \p Ops will contain the operands + /// needed to produce the flattened build_vector. /// /// \pre MI.getOpcode() == G_CONCAT_VECTORS. - bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, - SmallVectorImpl &Ops); - /// Replace \p MI with a flattened build_vector with \p Ops or an - /// implicit_def if IsUndef is true. - void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef, - const ArrayRef Ops); + bool matchCombineConcatVectors(MachineInstr &MI, SmallVector &Ops); + /// Replace \p MI with a flattened build_vector with \p Ops + /// or an implicit_def if \p Ops is empty. + void applyCombineConcatVectors(MachineInstr &MI, SmallVector &Ops); /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS. /// Returns true if MI changed. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 9b0e1b0d7c4f9e..7eadb718f16415 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1253,6 +1253,14 @@ def match_ors : GICombineRule< [{ return Helper.matchOr(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; +// Combines concat operations +def concat_matchinfo : GIDefMatchData<"SmallVector">; +def combine_concat_vector : GICombineRule< + (defs root:$root, concat_matchinfo:$matchinfo), + (match (wip_match_opcode G_CONCAT_VECTORS):$root, + [{ return Helper.matchCombineConcatVectors(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyCombineConcatVectors(*${root}, ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -1326,11 +1334,12 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, sub_add_reg, select_to_minmax, redundant_binop_in_equality, - fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors]>; + fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, + combine_concat_vector]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and // compile time performance. def optnone_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, combines_for_extload, - not_cmp_fold, opt_brcond_by_inverting_cond]>; + not_cmp_fold, opt_brcond_by_inverting_cond, combine_concat_vector]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1b199cfd41d231..b400eb34e2901b 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -222,21 +222,11 @@ void CombinerHelper::applyCombineCopy(MachineInstr &MI) { replaceRegWith(MRI, DstReg, SrcReg); } -bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { - bool IsUndef = false; - SmallVector Ops; - if (matchCombineConcatVectors(MI, IsUndef, Ops)) { - applyCombineConcatVectors(MI, IsUndef, Ops); - return true; - } - return false; -} - -bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, - SmallVectorImpl &Ops) { +bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, + SmallVector &Ops) { assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Invalid instruction"); - IsUndef = true; + bool IsUndef = true; MachineInstr *Undef = nullptr; // Walk over all the operands of concat vectors and check if they are @@ -246,6 +236,8 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, Register Reg = MO.getReg(); MachineInstr *Def = MRI.getVRegDef(Reg); assert(Def && "Operand not defined"); + if (!MRI.hasOneNonDBGUse(Reg)) + return false; switch (Def->getOpcode()) { case TargetOpcode::G_BUILD_VECTOR: IsUndef = false; @@ -275,10 +267,21 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, return false; } } + + // Check if the combine is illegal + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) { + return false; + } + + if (IsUndef) + Ops.clear(); + return true; } -void CombinerHelper::applyCombineConcatVectors( - MachineInstr &MI, bool IsUndef, const ArrayRef Ops) { +void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI, + SmallVector &Ops) { // We determined that the concat_vectors can be flatten. // Generate the flattened build_vector. Register DstReg = MI.getOperand(0).getReg(); @@ -289,9 +292,9 @@ void CombinerHelper::applyCombineConcatVectors( // checking that at all Ops are undef. Alternatively, we could have // generate a build_vector of undefs and rely on another combine to // clean that up. For now, given we already gather this information - // in tryCombineConcatVectors, just save compile time and issue the + // in matchCombineConcatVectors, just save compile time and issue the // right thing. - if (IsUndef) + if (Ops.empty()) Builder.buildUndef(NewDstReg); else Builder.buildBuildVector(NewDstReg, Ops); diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index fdea974d4540a0..1e1c6ece85b24c 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -288,6 +288,6 @@ def AArch64PostLegalizerCombiner constant_fold_binops, identity_combines, ptr_add_immed_chain, overlapping_and, split_store_zero_128, undef_combines, - select_to_minmax, or_to_bsp, + select_to_minmax, or_to_bsp, combine_concat_vector, commute_constant_to_rhs]> { } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp index 0b82ed1280ddd4..17dd8f2314a2b3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp @@ -91,8 +91,6 @@ bool AArch64O0PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_CONCAT_VECTORS: - return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); case TargetOpcode::G_MEMCPY_INLINE: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index 574d065ab01bb2..a82d3cd095659b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -720,8 +720,6 @@ bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_CONCAT_VECTORS: - return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); case TargetOpcode::G_UADDO: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index 0c7e198810da76..f14d970f1e5de7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -106,8 +106,6 @@ bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { return true; switch (MI.getOpcode()) { - case TargetOpcode::G_CONCAT_VECTORS: - return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); } diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index 85689b6b2a6028..c40867ff73920c 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -4043,28 +4043,28 @@ define <8 x half> @stofp_v8i64_v8f16(<8 x i64> %a) { ; CHECK-GI-FP16-LABEL: stofp_v8i64_v8f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: scvtf v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: scvtf v2.2d, v2.2d ; CHECK-GI-FP16-NEXT: scvtf v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: scvtf v2.2d, v2.2d ; CHECK-GI-FP16-NEXT: scvtf v3.2d, v3.2d ; CHECK-GI-FP16-NEXT: mov d4, v0.d[1] -; CHECK-GI-FP16-NEXT: mov d5, v2.d[1] ; CHECK-GI-FP16-NEXT: fcvt h0, d0 -; CHECK-GI-FP16-NEXT: fcvt h2, d2 -; CHECK-GI-FP16-NEXT: fcvt h4, d4 -; CHECK-GI-FP16-NEXT: fcvt h5, d5 -; CHECK-GI-FP16-NEXT: mov v0.h[1], v4.h[0] -; CHECK-GI-FP16-NEXT: mov d4, v1.d[1] +; CHECK-GI-FP16-NEXT: mov d5, v1.d[1] ; CHECK-GI-FP16-NEXT: fcvt h1, d1 -; CHECK-GI-FP16-NEXT: mov v2.h[1], v5.h[0] -; CHECK-GI-FP16-NEXT: mov d5, v3.d[1] -; CHECK-GI-FP16-NEXT: fcvt h3, d3 ; CHECK-GI-FP16-NEXT: fcvt h4, d4 +; CHECK-GI-FP16-NEXT: mov v0.h[1], v4.h[0] +; CHECK-GI-FP16-NEXT: fcvt h4, d5 ; CHECK-GI-FP16-NEXT: mov v0.h[2], v1.h[0] -; CHECK-GI-FP16-NEXT: fcvt h1, d5 -; CHECK-GI-FP16-NEXT: mov v2.h[2], v3.h[0] +; CHECK-GI-FP16-NEXT: mov d1, v2.d[1] +; CHECK-GI-FP16-NEXT: fcvt h2, d2 ; CHECK-GI-FP16-NEXT: mov v0.h[3], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v2.h[3], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] +; CHECK-GI-FP16-NEXT: fcvt h1, d1 +; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0] +; CHECK-GI-FP16-NEXT: mov d2, v3.d[1] +; CHECK-GI-FP16-NEXT: fcvt h3, d3 +; CHECK-GI-FP16-NEXT: mov v0.h[5], v1.h[0] +; CHECK-GI-FP16-NEXT: fcvt h1, d2 +; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v1.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <8 x i64> %a to <8 x half> @@ -4103,28 +4103,28 @@ define <8 x half> @utofp_v8i64_v8f16(<8 x i64> %a) { ; CHECK-GI-FP16-LABEL: utofp_v8i64_v8f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: ucvtf v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v2.2d ; CHECK-GI-FP16-NEXT: ucvtf v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v2.2d ; CHECK-GI-FP16-NEXT: ucvtf v3.2d, v3.2d ; CHECK-GI-FP16-NEXT: mov d4, v0.d[1] -; CHECK-GI-FP16-NEXT: mov d5, v2.d[1] ; CHECK-GI-FP16-NEXT: fcvt h0, d0 -; CHECK-GI-FP16-NEXT: fcvt h2, d2 -; CHECK-GI-FP16-NEXT: fcvt h4, d4 -; CHECK-GI-FP16-NEXT: fcvt h5, d5 -; CHECK-GI-FP16-NEXT: mov v0.h[1], v4.h[0] -; CHECK-GI-FP16-NEXT: mov d4, v1.d[1] +; CHECK-GI-FP16-NEXT: mov d5, v1.d[1] ; CHECK-GI-FP16-NEXT: fcvt h1, d1 -; CHECK-GI-FP16-NEXT: mov v2.h[1], v5.h[0] -; CHECK-GI-FP16-NEXT: mov d5, v3.d[1] -; CHECK-GI-FP16-NEXT: fcvt h3, d3 ; CHECK-GI-FP16-NEXT: fcvt h4, d4 +; CHECK-GI-FP16-NEXT: mov v0.h[1], v4.h[0] +; CHECK-GI-FP16-NEXT: fcvt h4, d5 ; CHECK-GI-FP16-NEXT: mov v0.h[2], v1.h[0] -; CHECK-GI-FP16-NEXT: fcvt h1, d5 -; CHECK-GI-FP16-NEXT: mov v2.h[2], v3.h[0] +; CHECK-GI-FP16-NEXT: mov d1, v2.d[1] +; CHECK-GI-FP16-NEXT: fcvt h2, d2 ; CHECK-GI-FP16-NEXT: mov v0.h[3], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v2.h[3], v1.h[0] -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] +; CHECK-GI-FP16-NEXT: fcvt h1, d1 +; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0] +; CHECK-GI-FP16-NEXT: mov d2, v3.d[1] +; CHECK-GI-FP16-NEXT: fcvt h3, d3 +; CHECK-GI-FP16-NEXT: mov v0.h[5], v1.h[0] +; CHECK-GI-FP16-NEXT: fcvt h1, d2 +; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v1.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <8 x i64> %a to <8 x half> @@ -4183,51 +4183,51 @@ define <16 x half> @stofp_v16i64_v16f16(<16 x i64> %a) { ; CHECK-GI-FP16-LABEL: stofp_v16i64_v16f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: scvtf v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: scvtf v16.2d, v2.2d ; CHECK-GI-FP16-NEXT: scvtf v4.2d, v4.2d -; CHECK-GI-FP16-NEXT: scvtf v2.2d, v6.2d -; CHECK-GI-FP16-NEXT: scvtf v20.2d, v1.2d -; CHECK-GI-FP16-NEXT: scvtf v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: scvtf v18.2d, v1.2d ; CHECK-GI-FP16-NEXT: scvtf v5.2d, v5.2d -; CHECK-GI-FP16-NEXT: scvtf v7.2d, v7.2d -; CHECK-GI-FP16-NEXT: mov d6, v0.d[1] -; CHECK-GI-FP16-NEXT: mov d17, v16.d[1] -; CHECK-GI-FP16-NEXT: mov d18, v4.d[1] -; CHECK-GI-FP16-NEXT: mov d19, v2.d[1] +; CHECK-GI-FP16-NEXT: scvtf v2.2d, v2.2d +; CHECK-GI-FP16-NEXT: scvtf v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: mov d16, v0.d[1] +; CHECK-GI-FP16-NEXT: mov d17, v4.d[1] ; CHECK-GI-FP16-NEXT: fcvt h0, d0 -; CHECK-GI-FP16-NEXT: fcvt h16, d16 ; CHECK-GI-FP16-NEXT: fcvt h1, d4 -; CHECK-GI-FP16-NEXT: fcvt h2, d2 -; CHECK-GI-FP16-NEXT: fcvt h6, d6 -; CHECK-GI-FP16-NEXT: fcvt h17, d17 -; CHECK-GI-FP16-NEXT: fcvt h4, d18 -; CHECK-GI-FP16-NEXT: fcvt h18, d19 -; CHECK-GI-FP16-NEXT: fcvt h19, d20 -; CHECK-GI-FP16-NEXT: mov v0.h[1], v6.h[0] -; CHECK-GI-FP16-NEXT: mov d6, v20.d[1] -; CHECK-GI-FP16-NEXT: mov v16.h[1], v17.h[0] -; CHECK-GI-FP16-NEXT: mov d17, v3.d[1] -; CHECK-GI-FP16-NEXT: fcvt h3, d3 -; CHECK-GI-FP16-NEXT: mov v1.h[1], v4.h[0] -; CHECK-GI-FP16-NEXT: mov d4, v5.d[1] +; CHECK-GI-FP16-NEXT: mov d19, v5.d[1] ; CHECK-GI-FP16-NEXT: fcvt h5, d5 -; CHECK-GI-FP16-NEXT: mov v2.h[1], v18.h[0] -; CHECK-GI-FP16-NEXT: mov d18, v7.d[1] -; CHECK-GI-FP16-NEXT: fcvt h7, d7 -; CHECK-GI-FP16-NEXT: mov v0.h[2], v19.h[0] -; CHECK-GI-FP16-NEXT: mov v16.h[2], v3.h[0] -; CHECK-GI-FP16-NEXT: fcvt h3, d6 +; CHECK-GI-FP16-NEXT: fcvt h16, d16 +; CHECK-GI-FP16-NEXT: fcvt h4, d17 +; CHECK-GI-FP16-NEXT: mov d17, v18.d[1] +; CHECK-GI-FP16-NEXT: fcvt h18, d18 +; CHECK-GI-FP16-NEXT: mov v0.h[1], v16.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[1], v4.h[0] +; CHECK-GI-FP16-NEXT: scvtf v4.2d, v6.2d ; CHECK-GI-FP16-NEXT: fcvt h6, d17 -; CHECK-GI-FP16-NEXT: fcvt h4, d4 +; CHECK-GI-FP16-NEXT: fcvt h16, d19 +; CHECK-GI-FP16-NEXT: mov v0.h[2], v18.h[0] ; CHECK-GI-FP16-NEXT: mov v1.h[2], v5.h[0] -; CHECK-GI-FP16-NEXT: fcvt h5, d18 -; CHECK-GI-FP16-NEXT: mov v2.h[2], v7.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v16.h[3], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v2.h[3], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.d[1], v16.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-FP16-NEXT: mov d5, v2.d[1] +; CHECK-GI-FP16-NEXT: fcvt h2, d2 +; CHECK-GI-FP16-NEXT: mov d17, v4.d[1] +; CHECK-GI-FP16-NEXT: fcvt h4, d4 +; CHECK-GI-FP16-NEXT: mov v0.h[3], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[3], v16.h[0] +; CHECK-GI-FP16-NEXT: scvtf v6.2d, v7.2d +; CHECK-GI-FP16-NEXT: fcvt h5, d5 +; CHECK-GI-FP16-NEXT: fcvt h7, d17 +; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov d2, v3.d[1] +; CHECK-GI-FP16-NEXT: fcvt h3, d3 +; CHECK-GI-FP16-NEXT: mov d4, v6.d[1] +; CHECK-GI-FP16-NEXT: fcvt h6, d6 +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[5], v7.h[0] +; CHECK-GI-FP16-NEXT: fcvt h2, d2 +; CHECK-GI-FP16-NEXT: fcvt h4, d4 +; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[7], v4.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <16 x i64> %a to <16 x half> @@ -4286,51 +4286,51 @@ define <16 x half> @utofp_v16i64_v16f16(<16 x i64> %a) { ; CHECK-GI-FP16-LABEL: utofp_v16i64_v16f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: ucvtf v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: ucvtf v16.2d, v2.2d ; CHECK-GI-FP16-NEXT: ucvtf v4.2d, v4.2d -; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v6.2d -; CHECK-GI-FP16-NEXT: ucvtf v20.2d, v1.2d -; CHECK-GI-FP16-NEXT: ucvtf v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: ucvtf v18.2d, v1.2d ; CHECK-GI-FP16-NEXT: ucvtf v5.2d, v5.2d -; CHECK-GI-FP16-NEXT: ucvtf v7.2d, v7.2d -; CHECK-GI-FP16-NEXT: mov d6, v0.d[1] -; CHECK-GI-FP16-NEXT: mov d17, v16.d[1] -; CHECK-GI-FP16-NEXT: mov d18, v4.d[1] -; CHECK-GI-FP16-NEXT: mov d19, v2.d[1] +; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v2.2d +; CHECK-GI-FP16-NEXT: ucvtf v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: mov d16, v0.d[1] +; CHECK-GI-FP16-NEXT: mov d17, v4.d[1] ; CHECK-GI-FP16-NEXT: fcvt h0, d0 -; CHECK-GI-FP16-NEXT: fcvt h16, d16 ; CHECK-GI-FP16-NEXT: fcvt h1, d4 -; CHECK-GI-FP16-NEXT: fcvt h2, d2 -; CHECK-GI-FP16-NEXT: fcvt h6, d6 -; CHECK-GI-FP16-NEXT: fcvt h17, d17 -; CHECK-GI-FP16-NEXT: fcvt h4, d18 -; CHECK-GI-FP16-NEXT: fcvt h18, d19 -; CHECK-GI-FP16-NEXT: fcvt h19, d20 -; CHECK-GI-FP16-NEXT: mov v0.h[1], v6.h[0] -; CHECK-GI-FP16-NEXT: mov d6, v20.d[1] -; CHECK-GI-FP16-NEXT: mov v16.h[1], v17.h[0] -; CHECK-GI-FP16-NEXT: mov d17, v3.d[1] -; CHECK-GI-FP16-NEXT: fcvt h3, d3 -; CHECK-GI-FP16-NEXT: mov v1.h[1], v4.h[0] -; CHECK-GI-FP16-NEXT: mov d4, v5.d[1] +; CHECK-GI-FP16-NEXT: mov d19, v5.d[1] ; CHECK-GI-FP16-NEXT: fcvt h5, d5 -; CHECK-GI-FP16-NEXT: mov v2.h[1], v18.h[0] -; CHECK-GI-FP16-NEXT: mov d18, v7.d[1] -; CHECK-GI-FP16-NEXT: fcvt h7, d7 -; CHECK-GI-FP16-NEXT: mov v0.h[2], v19.h[0] -; CHECK-GI-FP16-NEXT: mov v16.h[2], v3.h[0] -; CHECK-GI-FP16-NEXT: fcvt h3, d6 +; CHECK-GI-FP16-NEXT: fcvt h16, d16 +; CHECK-GI-FP16-NEXT: fcvt h4, d17 +; CHECK-GI-FP16-NEXT: mov d17, v18.d[1] +; CHECK-GI-FP16-NEXT: fcvt h18, d18 +; CHECK-GI-FP16-NEXT: mov v0.h[1], v16.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[1], v4.h[0] +; CHECK-GI-FP16-NEXT: ucvtf v4.2d, v6.2d ; CHECK-GI-FP16-NEXT: fcvt h6, d17 -; CHECK-GI-FP16-NEXT: fcvt h4, d4 +; CHECK-GI-FP16-NEXT: fcvt h16, d19 +; CHECK-GI-FP16-NEXT: mov v0.h[2], v18.h[0] ; CHECK-GI-FP16-NEXT: mov v1.h[2], v5.h[0] -; CHECK-GI-FP16-NEXT: fcvt h5, d18 -; CHECK-GI-FP16-NEXT: mov v2.h[2], v7.h[0] -; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] -; CHECK-GI-FP16-NEXT: mov v16.h[3], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-FP16-NEXT: mov v2.h[3], v5.h[0] -; CHECK-GI-FP16-NEXT: mov v0.d[1], v16.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-FP16-NEXT: mov d5, v2.d[1] +; CHECK-GI-FP16-NEXT: fcvt h2, d2 +; CHECK-GI-FP16-NEXT: mov d17, v4.d[1] +; CHECK-GI-FP16-NEXT: fcvt h4, d4 +; CHECK-GI-FP16-NEXT: mov v0.h[3], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[3], v16.h[0] +; CHECK-GI-FP16-NEXT: ucvtf v6.2d, v7.2d +; CHECK-GI-FP16-NEXT: fcvt h5, d5 +; CHECK-GI-FP16-NEXT: fcvt h7, d17 +; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov d2, v3.d[1] +; CHECK-GI-FP16-NEXT: fcvt h3, d3 +; CHECK-GI-FP16-NEXT: mov d4, v6.d[1] +; CHECK-GI-FP16-NEXT: fcvt h6, d6 +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[5], v7.h[0] +; CHECK-GI-FP16-NEXT: fcvt h2, d2 +; CHECK-GI-FP16-NEXT: fcvt h4, d4 +; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[7], v4.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <16 x i64> %a to <16 x half> @@ -4436,103 +4436,104 @@ define <32 x half> @stofp_v32i64_v32f16(<32 x i64> %a) { ; ; CHECK-GI-FP16-LABEL: stofp_v32i64_v32f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: scvtf v16.2d, v2.2d +; CHECK-GI-FP16-NEXT: ldp q16, q18, [sp] ; CHECK-GI-FP16-NEXT: scvtf v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: scvtf v18.2d, v4.2d -; CHECK-GI-FP16-NEXT: scvtf v17.2d, v6.2d -; CHECK-GI-FP16-NEXT: scvtf v4.2d, v1.2d -; CHECK-GI-FP16-NEXT: scvtf v3.2d, v3.2d -; CHECK-GI-FP16-NEXT: ldp q1, q23, [sp] +; CHECK-GI-FP16-NEXT: ldp q17, q19, [sp, #64] +; CHECK-GI-FP16-NEXT: scvtf v4.2d, v4.2d +; CHECK-GI-FP16-NEXT: scvtf v1.2d, v1.2d ; CHECK-GI-FP16-NEXT: scvtf v5.2d, v5.2d -; CHECK-GI-FP16-NEXT: scvtf v6.2d, v7.2d -; CHECK-GI-FP16-NEXT: mov d20, v16.d[1] -; CHECK-GI-FP16-NEXT: mov d19, v0.d[1] -; CHECK-GI-FP16-NEXT: mov d21, v18.d[1] -; CHECK-GI-FP16-NEXT: mov d22, v17.d[1] -; CHECK-GI-FP16-NEXT: fcvt h16, d16 -; CHECK-GI-FP16-NEXT: scvtf v2.2d, v1.2d +; CHECK-GI-FP16-NEXT: scvtf v6.2d, v6.2d +; CHECK-GI-FP16-NEXT: scvtf v20.2d, v16.2d +; CHECK-GI-FP16-NEXT: scvtf v24.2d, v18.2d +; CHECK-GI-FP16-NEXT: scvtf v2.2d, v2.2d +; CHECK-GI-FP16-NEXT: scvtf v16.2d, v17.2d +; CHECK-GI-FP16-NEXT: mov d21, v0.d[1] +; CHECK-GI-FP16-NEXT: scvtf v25.2d, v19.2d +; CHECK-GI-FP16-NEXT: mov d22, v4.d[1] ; CHECK-GI-FP16-NEXT: fcvt h0, d0 -; CHECK-GI-FP16-NEXT: fcvt h1, d18 -; CHECK-GI-FP16-NEXT: ldr q18, [sp, #32] -; CHECK-GI-FP16-NEXT: fcvt h7, d17 -; CHECK-GI-FP16-NEXT: ldp q25, q17, [sp, #48] -; CHECK-GI-FP16-NEXT: fcvt h20, d20 -; CHECK-GI-FP16-NEXT: fcvt h24, d19 -; CHECK-GI-FP16-NEXT: fcvt h21, d21 -; CHECK-GI-FP16-NEXT: fcvt h22, d22 -; CHECK-GI-FP16-NEXT: scvtf v18.2d, v18.2d -; CHECK-GI-FP16-NEXT: fcvt h26, d4 -; CHECK-GI-FP16-NEXT: scvtf v17.2d, v17.2d -; CHECK-GI-FP16-NEXT: fcvt h27, d3 -; CHECK-GI-FP16-NEXT: fcvt h28, d6 -; CHECK-GI-FP16-NEXT: scvtf v23.2d, v23.2d -; CHECK-GI-FP16-NEXT: scvtf v25.2d, v25.2d -; CHECK-GI-FP16-NEXT: mov d4, v4.d[1] -; CHECK-GI-FP16-NEXT: mov v16.h[1], v20.h[0] -; CHECK-GI-FP16-NEXT: ldp q19, q20, [sp, #80] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v24.h[0] -; CHECK-GI-FP16-NEXT: fcvt h24, d5 -; CHECK-GI-FP16-NEXT: mov v1.h[1], v21.h[0] -; CHECK-GI-FP16-NEXT: ldr q21, [sp, #112] -; CHECK-GI-FP16-NEXT: mov v7.h[1], v22.h[0] -; CHECK-GI-FP16-NEXT: mov d22, v2.d[1] -; CHECK-GI-FP16-NEXT: scvtf v20.2d, v20.2d -; CHECK-GI-FP16-NEXT: fcvt h2, d2 -; CHECK-GI-FP16-NEXT: scvtf v19.2d, v19.2d -; CHECK-GI-FP16-NEXT: mov v16.h[2], v27.h[0] -; CHECK-GI-FP16-NEXT: scvtf v21.2d, v21.2d -; CHECK-GI-FP16-NEXT: mov d5, v5.d[1] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v26.h[0] -; CHECK-GI-FP16-NEXT: mov d26, v18.d[1] -; CHECK-GI-FP16-NEXT: mov v1.h[2], v24.h[0] -; CHECK-GI-FP16-NEXT: mov d24, v17.d[1] -; CHECK-GI-FP16-NEXT: fcvt h22, d22 -; CHECK-GI-FP16-NEXT: mov v7.h[2], v28.h[0] -; CHECK-GI-FP16-NEXT: mov d27, v20.d[1] +; CHECK-GI-FP16-NEXT: scvtf v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: mov d18, v1.d[1] +; CHECK-GI-FP16-NEXT: fcvt h17, d1 +; CHECK-GI-FP16-NEXT: mov d19, v5.d[1] +; CHECK-GI-FP16-NEXT: mov d23, v20.d[1] +; CHECK-GI-FP16-NEXT: fcvt h1, d4 +; CHECK-GI-FP16-NEXT: fcvt h4, d20 +; CHECK-GI-FP16-NEXT: mov d26, v16.d[1] +; CHECK-GI-FP16-NEXT: fcvt h20, d5 +; CHECK-GI-FP16-NEXT: fcvt h5, d16 +; CHECK-GI-FP16-NEXT: fcvt h28, d21 +; CHECK-GI-FP16-NEXT: fcvt h29, d22 +; CHECK-GI-FP16-NEXT: fcvt h22, d24 +; CHECK-GI-FP16-NEXT: fcvt h21, d25 ; CHECK-GI-FP16-NEXT: fcvt h18, d18 -; CHECK-GI-FP16-NEXT: mov d28, v3.d[1] -; CHECK-GI-FP16-NEXT: fcvt h3, d17 -; CHECK-GI-FP16-NEXT: fcvt h20, d20 -; CHECK-GI-FP16-NEXT: mov d6, v6.d[1] -; CHECK-GI-FP16-NEXT: fcvt h26, d26 -; CHECK-GI-FP16-NEXT: fcvt h4, d4 -; CHECK-GI-FP16-NEXT: fcvt h5, d5 -; CHECK-GI-FP16-NEXT: fcvt h17, d24 -; CHECK-GI-FP16-NEXT: mov v2.h[1], v22.h[0] -; CHECK-GI-FP16-NEXT: fcvt h24, d23 -; CHECK-GI-FP16-NEXT: fcvt h22, d27 -; CHECK-GI-FP16-NEXT: mov d23, v23.d[1] -; CHECK-GI-FP16-NEXT: fcvt h6, d6 -; CHECK-GI-FP16-NEXT: mov v18.h[1], v26.h[0] -; CHECK-GI-FP16-NEXT: fcvt h26, d25 -; CHECK-GI-FP16-NEXT: mov d25, v25.d[1] -; CHECK-GI-FP16-NEXT: mov v3.h[1], v17.h[0] -; CHECK-GI-FP16-NEXT: mov d17, v19.d[1] ; CHECK-GI-FP16-NEXT: fcvt h19, d19 -; CHECK-GI-FP16-NEXT: mov v20.h[1], v22.h[0] -; CHECK-GI-FP16-NEXT: mov d22, v21.d[1] -; CHECK-GI-FP16-NEXT: fcvt h21, d21 -; CHECK-GI-FP16-NEXT: mov v2.h[2], v24.h[0] -; CHECK-GI-FP16-NEXT: fcvt h24, d28 +; CHECK-GI-FP16-NEXT: fcvt h27, d23 +; CHECK-GI-FP16-NEXT: mov d23, v24.d[1] +; CHECK-GI-FP16-NEXT: mov d24, v25.d[1] +; CHECK-GI-FP16-NEXT: ldp q25, q16, [sp, #32] +; CHECK-GI-FP16-NEXT: fcvt h26, d26 +; CHECK-GI-FP16-NEXT: mov v0.h[1], v28.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[1], v29.h[0] +; CHECK-GI-FP16-NEXT: scvtf v7.2d, v7.2d +; CHECK-GI-FP16-NEXT: mov v4.h[1], v27.h[0] +; CHECK-GI-FP16-NEXT: scvtf v25.2d, v25.2d ; CHECK-GI-FP16-NEXT: fcvt h23, d23 -; CHECK-GI-FP16-NEXT: mov v18.h[2], v26.h[0] -; CHECK-GI-FP16-NEXT: fcvt h25, d25 -; CHECK-GI-FP16-NEXT: mov v0.h[3], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v5.h[1], v26.h[0] +; CHECK-GI-FP16-NEXT: ldp q26, q27, [sp, #96] +; CHECK-GI-FP16-NEXT: fcvt h24, d24 +; CHECK-GI-FP16-NEXT: mov v0.h[2], v17.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[2], v20.h[0] +; CHECK-GI-FP16-NEXT: mov d20, v6.d[1] +; CHECK-GI-FP16-NEXT: mov d17, v2.d[1] +; CHECK-GI-FP16-NEXT: fcvt h2, d2 +; CHECK-GI-FP16-NEXT: scvtf v26.2d, v26.2d +; CHECK-GI-FP16-NEXT: mov v4.h[2], v22.h[0] +; CHECK-GI-FP16-NEXT: fcvt h22, d25 +; CHECK-GI-FP16-NEXT: mov v5.h[2], v21.h[0] +; CHECK-GI-FP16-NEXT: mov d21, v25.d[1] +; CHECK-GI-FP16-NEXT: fcvt h6, d6 +; CHECK-GI-FP16-NEXT: mov v0.h[3], v18.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[3], v19.h[0] +; CHECK-GI-FP16-NEXT: scvtf v16.2d, v16.2d +; CHECK-GI-FP16-NEXT: scvtf v18.2d, v27.2d +; CHECK-GI-FP16-NEXT: fcvt h19, d20 ; CHECK-GI-FP16-NEXT: fcvt h17, d17 -; CHECK-GI-FP16-NEXT: mov v3.h[2], v19.h[0] -; CHECK-GI-FP16-NEXT: mov v1.h[3], v5.h[0] -; CHECK-GI-FP16-NEXT: fcvt h19, d22 -; CHECK-GI-FP16-NEXT: mov v20.h[2], v21.h[0] -; CHECK-GI-FP16-NEXT: mov v7.h[3], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v16.h[3], v24.h[0] -; CHECK-GI-FP16-NEXT: mov v2.h[3], v23.h[0] -; CHECK-GI-FP16-NEXT: mov v18.h[3], v25.h[0] -; CHECK-GI-FP16-NEXT: mov v3.h[3], v17.h[0] -; CHECK-GI-FP16-NEXT: mov v20.h[3], v19.h[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v7.d[0] -; CHECK-GI-FP16-NEXT: mov v0.d[1], v16.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v18.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v20.d[0] +; CHECK-GI-FP16-NEXT: mov d25, v26.d[1] +; CHECK-GI-FP16-NEXT: fcvt h26, d26 +; CHECK-GI-FP16-NEXT: mov v4.h[3], v23.h[0] +; CHECK-GI-FP16-NEXT: mov v5.h[3], v24.h[0] +; CHECK-GI-FP16-NEXT: fcvt h20, d21 +; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[4], v6.h[0] +; CHECK-GI-FP16-NEXT: mov d2, v3.d[1] +; CHECK-GI-FP16-NEXT: mov d23, v18.d[1] +; CHECK-GI-FP16-NEXT: fcvt h18, d18 +; CHECK-GI-FP16-NEXT: fcvt h3, d3 +; CHECK-GI-FP16-NEXT: fcvt h21, d25 +; CHECK-GI-FP16-NEXT: mov v4.h[4], v22.h[0] +; CHECK-GI-FP16-NEXT: mov d22, v16.d[1] +; CHECK-GI-FP16-NEXT: mov v5.h[4], v26.h[0] +; CHECK-GI-FP16-NEXT: fcvt h16, d16 +; CHECK-GI-FP16-NEXT: mov d6, v7.d[1] +; CHECK-GI-FP16-NEXT: fcvt h7, d7 +; CHECK-GI-FP16-NEXT: mov v0.h[5], v17.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[5], v19.h[0] +; CHECK-GI-FP16-NEXT: fcvt h19, d23 +; CHECK-GI-FP16-NEXT: fcvt h2, d2 +; CHECK-GI-FP16-NEXT: mov v4.h[5], v20.h[0] +; CHECK-GI-FP16-NEXT: fcvt h17, d22 +; CHECK-GI-FP16-NEXT: mov v5.h[5], v21.h[0] +; CHECK-GI-FP16-NEXT: fcvt h6, d6 +; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[6], v7.h[0] +; CHECK-GI-FP16-NEXT: mov v4.h[6], v16.h[0] +; CHECK-GI-FP16-NEXT: mov v5.h[6], v18.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[7], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v4.h[7], v17.h[0] +; CHECK-GI-FP16-NEXT: mov v5.h[7], v19.h[0] +; CHECK-GI-FP16-NEXT: mov v2.16b, v4.16b +; CHECK-GI-FP16-NEXT: mov v3.16b, v5.16b ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <32 x i64> %a to <32 x half> @@ -4638,103 +4639,104 @@ define <32 x half> @utofp_v32i64_v32f16(<32 x i64> %a) { ; ; CHECK-GI-FP16-LABEL: utofp_v32i64_v32f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: ucvtf v16.2d, v2.2d +; CHECK-GI-FP16-NEXT: ldp q16, q18, [sp] ; CHECK-GI-FP16-NEXT: ucvtf v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: ucvtf v18.2d, v4.2d -; CHECK-GI-FP16-NEXT: ucvtf v17.2d, v6.2d -; CHECK-GI-FP16-NEXT: ucvtf v4.2d, v1.2d -; CHECK-GI-FP16-NEXT: ucvtf v3.2d, v3.2d -; CHECK-GI-FP16-NEXT: ldp q1, q23, [sp] +; CHECK-GI-FP16-NEXT: ldp q17, q19, [sp, #64] +; CHECK-GI-FP16-NEXT: ucvtf v4.2d, v4.2d +; CHECK-GI-FP16-NEXT: ucvtf v1.2d, v1.2d ; CHECK-GI-FP16-NEXT: ucvtf v5.2d, v5.2d -; CHECK-GI-FP16-NEXT: ucvtf v6.2d, v7.2d -; CHECK-GI-FP16-NEXT: mov d20, v16.d[1] -; CHECK-GI-FP16-NEXT: mov d19, v0.d[1] -; CHECK-GI-FP16-NEXT: mov d21, v18.d[1] -; CHECK-GI-FP16-NEXT: mov d22, v17.d[1] -; CHECK-GI-FP16-NEXT: fcvt h16, d16 -; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v1.2d +; CHECK-GI-FP16-NEXT: ucvtf v6.2d, v6.2d +; CHECK-GI-FP16-NEXT: ucvtf v20.2d, v16.2d +; CHECK-GI-FP16-NEXT: ucvtf v24.2d, v18.2d +; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v2.2d +; CHECK-GI-FP16-NEXT: ucvtf v16.2d, v17.2d +; CHECK-GI-FP16-NEXT: mov d21, v0.d[1] +; CHECK-GI-FP16-NEXT: ucvtf v25.2d, v19.2d +; CHECK-GI-FP16-NEXT: mov d22, v4.d[1] ; CHECK-GI-FP16-NEXT: fcvt h0, d0 -; CHECK-GI-FP16-NEXT: fcvt h1, d18 -; CHECK-GI-FP16-NEXT: ldr q18, [sp, #32] -; CHECK-GI-FP16-NEXT: fcvt h7, d17 -; CHECK-GI-FP16-NEXT: ldp q25, q17, [sp, #48] -; CHECK-GI-FP16-NEXT: fcvt h20, d20 -; CHECK-GI-FP16-NEXT: fcvt h24, d19 -; CHECK-GI-FP16-NEXT: fcvt h21, d21 -; CHECK-GI-FP16-NEXT: fcvt h22, d22 -; CHECK-GI-FP16-NEXT: ucvtf v18.2d, v18.2d -; CHECK-GI-FP16-NEXT: fcvt h26, d4 -; CHECK-GI-FP16-NEXT: ucvtf v17.2d, v17.2d -; CHECK-GI-FP16-NEXT: fcvt h27, d3 -; CHECK-GI-FP16-NEXT: fcvt h28, d6 -; CHECK-GI-FP16-NEXT: ucvtf v23.2d, v23.2d -; CHECK-GI-FP16-NEXT: ucvtf v25.2d, v25.2d -; CHECK-GI-FP16-NEXT: mov d4, v4.d[1] -; CHECK-GI-FP16-NEXT: mov v16.h[1], v20.h[0] -; CHECK-GI-FP16-NEXT: ldp q19, q20, [sp, #80] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v24.h[0] -; CHECK-GI-FP16-NEXT: fcvt h24, d5 -; CHECK-GI-FP16-NEXT: mov v1.h[1], v21.h[0] -; CHECK-GI-FP16-NEXT: ldr q21, [sp, #112] -; CHECK-GI-FP16-NEXT: mov v7.h[1], v22.h[0] -; CHECK-GI-FP16-NEXT: mov d22, v2.d[1] -; CHECK-GI-FP16-NEXT: ucvtf v20.2d, v20.2d -; CHECK-GI-FP16-NEXT: fcvt h2, d2 -; CHECK-GI-FP16-NEXT: ucvtf v19.2d, v19.2d -; CHECK-GI-FP16-NEXT: mov v16.h[2], v27.h[0] -; CHECK-GI-FP16-NEXT: ucvtf v21.2d, v21.2d -; CHECK-GI-FP16-NEXT: mov d5, v5.d[1] -; CHECK-GI-FP16-NEXT: mov v0.h[2], v26.h[0] -; CHECK-GI-FP16-NEXT: mov d26, v18.d[1] -; CHECK-GI-FP16-NEXT: mov v1.h[2], v24.h[0] -; CHECK-GI-FP16-NEXT: mov d24, v17.d[1] -; CHECK-GI-FP16-NEXT: fcvt h22, d22 -; CHECK-GI-FP16-NEXT: mov v7.h[2], v28.h[0] -; CHECK-GI-FP16-NEXT: mov d27, v20.d[1] +; CHECK-GI-FP16-NEXT: ucvtf v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: mov d18, v1.d[1] +; CHECK-GI-FP16-NEXT: fcvt h17, d1 +; CHECK-GI-FP16-NEXT: mov d19, v5.d[1] +; CHECK-GI-FP16-NEXT: mov d23, v20.d[1] +; CHECK-GI-FP16-NEXT: fcvt h1, d4 +; CHECK-GI-FP16-NEXT: fcvt h4, d20 +; CHECK-GI-FP16-NEXT: mov d26, v16.d[1] +; CHECK-GI-FP16-NEXT: fcvt h20, d5 +; CHECK-GI-FP16-NEXT: fcvt h5, d16 +; CHECK-GI-FP16-NEXT: fcvt h28, d21 +; CHECK-GI-FP16-NEXT: fcvt h29, d22 +; CHECK-GI-FP16-NEXT: fcvt h22, d24 +; CHECK-GI-FP16-NEXT: fcvt h21, d25 ; CHECK-GI-FP16-NEXT: fcvt h18, d18 -; CHECK-GI-FP16-NEXT: mov d28, v3.d[1] -; CHECK-GI-FP16-NEXT: fcvt h3, d17 -; CHECK-GI-FP16-NEXT: fcvt h20, d20 -; CHECK-GI-FP16-NEXT: mov d6, v6.d[1] -; CHECK-GI-FP16-NEXT: fcvt h26, d26 -; CHECK-GI-FP16-NEXT: fcvt h4, d4 -; CHECK-GI-FP16-NEXT: fcvt h5, d5 -; CHECK-GI-FP16-NEXT: fcvt h17, d24 -; CHECK-GI-FP16-NEXT: mov v2.h[1], v22.h[0] -; CHECK-GI-FP16-NEXT: fcvt h24, d23 -; CHECK-GI-FP16-NEXT: fcvt h22, d27 -; CHECK-GI-FP16-NEXT: mov d23, v23.d[1] -; CHECK-GI-FP16-NEXT: fcvt h6, d6 -; CHECK-GI-FP16-NEXT: mov v18.h[1], v26.h[0] -; CHECK-GI-FP16-NEXT: fcvt h26, d25 -; CHECK-GI-FP16-NEXT: mov d25, v25.d[1] -; CHECK-GI-FP16-NEXT: mov v3.h[1], v17.h[0] -; CHECK-GI-FP16-NEXT: mov d17, v19.d[1] ; CHECK-GI-FP16-NEXT: fcvt h19, d19 -; CHECK-GI-FP16-NEXT: mov v20.h[1], v22.h[0] -; CHECK-GI-FP16-NEXT: mov d22, v21.d[1] -; CHECK-GI-FP16-NEXT: fcvt h21, d21 -; CHECK-GI-FP16-NEXT: mov v2.h[2], v24.h[0] -; CHECK-GI-FP16-NEXT: fcvt h24, d28 +; CHECK-GI-FP16-NEXT: fcvt h27, d23 +; CHECK-GI-FP16-NEXT: mov d23, v24.d[1] +; CHECK-GI-FP16-NEXT: mov d24, v25.d[1] +; CHECK-GI-FP16-NEXT: ldp q25, q16, [sp, #32] +; CHECK-GI-FP16-NEXT: fcvt h26, d26 +; CHECK-GI-FP16-NEXT: mov v0.h[1], v28.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[1], v29.h[0] +; CHECK-GI-FP16-NEXT: ucvtf v7.2d, v7.2d +; CHECK-GI-FP16-NEXT: mov v4.h[1], v27.h[0] +; CHECK-GI-FP16-NEXT: ucvtf v25.2d, v25.2d ; CHECK-GI-FP16-NEXT: fcvt h23, d23 -; CHECK-GI-FP16-NEXT: mov v18.h[2], v26.h[0] -; CHECK-GI-FP16-NEXT: fcvt h25, d25 -; CHECK-GI-FP16-NEXT: mov v0.h[3], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v5.h[1], v26.h[0] +; CHECK-GI-FP16-NEXT: ldp q26, q27, [sp, #96] +; CHECK-GI-FP16-NEXT: fcvt h24, d24 +; CHECK-GI-FP16-NEXT: mov v0.h[2], v17.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[2], v20.h[0] +; CHECK-GI-FP16-NEXT: mov d20, v6.d[1] +; CHECK-GI-FP16-NEXT: mov d17, v2.d[1] +; CHECK-GI-FP16-NEXT: fcvt h2, d2 +; CHECK-GI-FP16-NEXT: ucvtf v26.2d, v26.2d +; CHECK-GI-FP16-NEXT: mov v4.h[2], v22.h[0] +; CHECK-GI-FP16-NEXT: fcvt h22, d25 +; CHECK-GI-FP16-NEXT: mov v5.h[2], v21.h[0] +; CHECK-GI-FP16-NEXT: mov d21, v25.d[1] +; CHECK-GI-FP16-NEXT: fcvt h6, d6 +; CHECK-GI-FP16-NEXT: mov v0.h[3], v18.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[3], v19.h[0] +; CHECK-GI-FP16-NEXT: ucvtf v16.2d, v16.2d +; CHECK-GI-FP16-NEXT: ucvtf v18.2d, v27.2d +; CHECK-GI-FP16-NEXT: fcvt h19, d20 ; CHECK-GI-FP16-NEXT: fcvt h17, d17 -; CHECK-GI-FP16-NEXT: mov v3.h[2], v19.h[0] -; CHECK-GI-FP16-NEXT: mov v1.h[3], v5.h[0] -; CHECK-GI-FP16-NEXT: fcvt h19, d22 -; CHECK-GI-FP16-NEXT: mov v20.h[2], v21.h[0] -; CHECK-GI-FP16-NEXT: mov v7.h[3], v6.h[0] -; CHECK-GI-FP16-NEXT: mov v16.h[3], v24.h[0] -; CHECK-GI-FP16-NEXT: mov v2.h[3], v23.h[0] -; CHECK-GI-FP16-NEXT: mov v18.h[3], v25.h[0] -; CHECK-GI-FP16-NEXT: mov v3.h[3], v17.h[0] -; CHECK-GI-FP16-NEXT: mov v20.h[3], v19.h[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v7.d[0] -; CHECK-GI-FP16-NEXT: mov v0.d[1], v16.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v18.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v20.d[0] +; CHECK-GI-FP16-NEXT: mov d25, v26.d[1] +; CHECK-GI-FP16-NEXT: fcvt h26, d26 +; CHECK-GI-FP16-NEXT: mov v4.h[3], v23.h[0] +; CHECK-GI-FP16-NEXT: mov v5.h[3], v24.h[0] +; CHECK-GI-FP16-NEXT: fcvt h20, d21 +; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[4], v6.h[0] +; CHECK-GI-FP16-NEXT: mov d2, v3.d[1] +; CHECK-GI-FP16-NEXT: mov d23, v18.d[1] +; CHECK-GI-FP16-NEXT: fcvt h18, d18 +; CHECK-GI-FP16-NEXT: fcvt h3, d3 +; CHECK-GI-FP16-NEXT: fcvt h21, d25 +; CHECK-GI-FP16-NEXT: mov v4.h[4], v22.h[0] +; CHECK-GI-FP16-NEXT: mov d22, v16.d[1] +; CHECK-GI-FP16-NEXT: mov v5.h[4], v26.h[0] +; CHECK-GI-FP16-NEXT: fcvt h16, d16 +; CHECK-GI-FP16-NEXT: mov d6, v7.d[1] +; CHECK-GI-FP16-NEXT: fcvt h7, d7 +; CHECK-GI-FP16-NEXT: mov v0.h[5], v17.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[5], v19.h[0] +; CHECK-GI-FP16-NEXT: fcvt h19, d23 +; CHECK-GI-FP16-NEXT: fcvt h2, d2 +; CHECK-GI-FP16-NEXT: mov v4.h[5], v20.h[0] +; CHECK-GI-FP16-NEXT: fcvt h17, d22 +; CHECK-GI-FP16-NEXT: mov v5.h[5], v21.h[0] +; CHECK-GI-FP16-NEXT: fcvt h6, d6 +; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[6], v7.h[0] +; CHECK-GI-FP16-NEXT: mov v4.h[6], v16.h[0] +; CHECK-GI-FP16-NEXT: mov v5.h[6], v18.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v1.h[7], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v4.h[7], v17.h[0] +; CHECK-GI-FP16-NEXT: mov v5.h[7], v19.h[0] +; CHECK-GI-FP16-NEXT: mov v2.16b, v4.16b +; CHECK-GI-FP16-NEXT: mov v3.16b, v5.16b ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <32 x i64> %a to <32 x half> diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index f17b9724aadba3..01620652301ed4 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1137,11 +1137,8 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) { ; ; CHECK-GI-LABEL: vselect_constant_cond_zero_v4i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI85_1 -; CHECK-GI-NEXT: adrp x9, .LCPI85_0 -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI85_1] -; CHECK-GI-NEXT: ldr d2, [x9, :lo12:.LCPI85_0] -; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: adrp x8, .LCPI85_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b @@ -1204,11 +1201,8 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) { ; ; CHECK-GI-LABEL: vselect_constant_cond_v4i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI88_1 -; CHECK-GI-NEXT: adrp x9, .LCPI88_0 -; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI88_1] -; CHECK-GI-NEXT: ldr d3, [x9, :lo12:.LCPI88_0] -; CHECK-GI-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-NEXT: adrp x8, .LCPI88_0 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI88_0] ; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll index 1531154b8a03c2..86dd1bdd511eb3 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -2092,104 +2092,104 @@ define i32 @test_udot_v24i8(ptr %p1, ptr %p2) { ; CHECK-GI-DOT-LABEL: test_udot_v24i8: ; CHECK-GI-DOT: // %bb.0: // %entry ; CHECK-GI-DOT-NEXT: ldr b1, [x0] -; CHECK-GI-DOT-NEXT: ldr b5, [x0, #1] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #1] ; CHECK-GI-DOT-NEXT: movi v0.2d, #0000000000000000 -; CHECK-GI-DOT-NEXT: ldr b3, [x0, #8] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #9] -; CHECK-GI-DOT-NEXT: ldr b2, [x0, #16] -; CHECK-GI-DOT-NEXT: ldr b4, [x1] -; CHECK-GI-DOT-NEXT: mov v1.b[1], v5.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x1, #1] +; CHECK-GI-DOT-NEXT: ldr b2, [x1] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #1] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #8] +; CHECK-GI-DOT-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #2] ; CHECK-GI-DOT-NEXT: ldr b6, [x1, #8] +; CHECK-GI-DOT-NEXT: mov v2.b[1], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #2] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #17] +; CHECK-GI-DOT-NEXT: ldr b16, [x1, #17] +; CHECK-GI-DOT-NEXT: mov v1.b[2], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #3] +; CHECK-GI-DOT-NEXT: mov v2.b[2], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #3] +; CHECK-GI-DOT-NEXT: mov v1.b[3], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #4] +; CHECK-GI-DOT-NEXT: mov v2.b[3], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #4] +; CHECK-GI-DOT-NEXT: mov v1.b[4], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #5] +; CHECK-GI-DOT-NEXT: mov v2.b[4], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #5] +; CHECK-GI-DOT-NEXT: mov v1.b[5], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #6] +; CHECK-GI-DOT-NEXT: mov v2.b[5], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #6] +; CHECK-GI-DOT-NEXT: mov v1.b[6], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #7] +; CHECK-GI-DOT-NEXT: mov v2.b[6], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #7] +; CHECK-GI-DOT-NEXT: mov v1.b[7], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #16] +; CHECK-GI-DOT-NEXT: mov v2.b[7], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #16] ; CHECK-GI-DOT-NEXT: mov v3.b[1], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #9] -; CHECK-GI-DOT-NEXT: ldr b18, [x0, #17] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #18] ; CHECK-GI-DOT-NEXT: mov v4.b[1], v16.b[0] -; CHECK-GI-DOT-NEXT: ldr b5, [x1, #16] -; CHECK-GI-DOT-NEXT: ldr b19, [x1, #17] -; CHECK-GI-DOT-NEXT: mov v6.b[1], v17.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #2] -; CHECK-GI-DOT-NEXT: mov v2.b[1], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #10] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #2] -; CHECK-GI-DOT-NEXT: mov v5.b[1], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #10] -; CHECK-GI-DOT-NEXT: mov v1.b[2], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #18] -; CHECK-GI-DOT-NEXT: ldr b7, [x1, #18] -; CHECK-GI-DOT-NEXT: mov v3.b[2], v16.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[2], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[2], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #3] -; CHECK-GI-DOT-NEXT: mov v2.b[2], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #3] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #11] -; CHECK-GI-DOT-NEXT: mov v5.b[2], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #11] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #19] -; CHECK-GI-DOT-NEXT: mov v1.b[3], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b16, [x1, #18] +; CHECK-GI-DOT-NEXT: mov v1.b[8], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #9] +; CHECK-GI-DOT-NEXT: mov v2.b[8], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #9] +; CHECK-GI-DOT-NEXT: mov v3.b[2], v7.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #19] +; CHECK-GI-DOT-NEXT: mov v4.b[2], v16.b[0] ; CHECK-GI-DOT-NEXT: ldr b16, [x1, #19] +; CHECK-GI-DOT-NEXT: mov v1.b[9], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #10] +; CHECK-GI-DOT-NEXT: mov v2.b[9], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #10] ; CHECK-GI-DOT-NEXT: mov v3.b[3], v7.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[3], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[3], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #4] -; CHECK-GI-DOT-NEXT: mov v2.b[3], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #4] -; CHECK-GI-DOT-NEXT: mov v5.b[3], v16.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #12] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #12] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #20] -; CHECK-GI-DOT-NEXT: mov v1.b[4], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x1, #20] -; CHECK-GI-DOT-NEXT: mov v3.b[4], v16.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[4], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[4], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #5] -; CHECK-GI-DOT-NEXT: mov v2.b[4], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #5] -; CHECK-GI-DOT-NEXT: mov v5.b[4], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #13] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #13] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #21] -; CHECK-GI-DOT-NEXT: mov v1.b[5], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #20] +; CHECK-GI-DOT-NEXT: mov v4.b[3], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b16, [x1, #20] +; CHECK-GI-DOT-NEXT: mov v1.b[10], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #11] +; CHECK-GI-DOT-NEXT: mov v2.b[10], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #11] +; CHECK-GI-DOT-NEXT: mov v3.b[4], v7.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #21] +; CHECK-GI-DOT-NEXT: mov v4.b[4], v16.b[0] ; CHECK-GI-DOT-NEXT: ldr b16, [x1, #21] +; CHECK-GI-DOT-NEXT: mov v1.b[11], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #12] +; CHECK-GI-DOT-NEXT: mov v2.b[11], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #12] ; CHECK-GI-DOT-NEXT: mov v3.b[5], v7.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[5], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[5], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #6] -; CHECK-GI-DOT-NEXT: mov v2.b[5], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #6] -; CHECK-GI-DOT-NEXT: mov v5.b[5], v16.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #14] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #14] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #22] -; CHECK-GI-DOT-NEXT: mov v1.b[6], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x1, #22] -; CHECK-GI-DOT-NEXT: mov v3.b[6], v16.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[6], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[6], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #7] -; CHECK-GI-DOT-NEXT: mov v2.b[6], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #7] -; CHECK-GI-DOT-NEXT: mov v5.b[6], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #15] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #15] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #23] -; CHECK-GI-DOT-NEXT: mov v1.b[7], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #22] +; CHECK-GI-DOT-NEXT: mov v4.b[5], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b16, [x1, #22] +; CHECK-GI-DOT-NEXT: mov v1.b[12], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #13] +; CHECK-GI-DOT-NEXT: mov v2.b[12], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #13] +; CHECK-GI-DOT-NEXT: mov v3.b[6], v7.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #23] +; CHECK-GI-DOT-NEXT: mov v4.b[6], v16.b[0] ; CHECK-GI-DOT-NEXT: ldr b16, [x1, #23] +; CHECK-GI-DOT-NEXT: mov v1.b[13], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #14] +; CHECK-GI-DOT-NEXT: mov v2.b[13], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #14] ; CHECK-GI-DOT-NEXT: mov v3.b[7], v7.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[7], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[7], v18.b[0] -; CHECK-GI-DOT-NEXT: mov v2.b[7], v19.b[0] -; CHECK-GI-DOT-NEXT: mov v5.b[7], v16.b[0] -; CHECK-GI-DOT-NEXT: mov v4.d[1], v6.d[0] -; CHECK-GI-DOT-NEXT: mov v1.d[1], v3.d[0] -; CHECK-GI-DOT-NEXT: movi v3.2d, #0000000000000000 -; CHECK-GI-DOT-NEXT: mov v2.d[1], v0.d[0] -; CHECK-GI-DOT-NEXT: mov v5.d[1], v0.d[0] -; CHECK-GI-DOT-NEXT: udot v3.4s, v4.16b, v1.16b -; CHECK-GI-DOT-NEXT: udot v0.4s, v5.16b, v2.16b -; CHECK-GI-DOT-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-GI-DOT-NEXT: mov v4.b[7], v16.b[0] +; CHECK-GI-DOT-NEXT: mov v1.b[14], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #15] +; CHECK-GI-DOT-NEXT: mov v2.b[14], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #15] +; CHECK-GI-DOT-NEXT: mov v3.d[1], v0.d[0] +; CHECK-GI-DOT-NEXT: mov v4.d[1], v0.d[0] +; CHECK-GI-DOT-NEXT: mov v1.b[15], v5.b[0] +; CHECK-GI-DOT-NEXT: movi v5.2d, #0000000000000000 +; CHECK-GI-DOT-NEXT: mov v2.b[15], v6.b[0] +; CHECK-GI-DOT-NEXT: udot v0.4s, v4.16b, v3.16b +; CHECK-GI-DOT-NEXT: udot v5.4s, v2.16b, v1.16b +; CHECK-GI-DOT-NEXT: add v0.4s, v5.4s, v0.4s ; CHECK-GI-DOT-NEXT: addv s0, v0.4s ; CHECK-GI-DOT-NEXT: fmov w0, s0 ; CHECK-GI-DOT-NEXT: ret @@ -2670,104 +2670,104 @@ define i32 @test_sdot_v24i8(ptr %p1, ptr %p2) { ; CHECK-GI-DOT-LABEL: test_sdot_v24i8: ; CHECK-GI-DOT: // %bb.0: // %entry ; CHECK-GI-DOT-NEXT: ldr b1, [x0] -; CHECK-GI-DOT-NEXT: ldr b5, [x0, #1] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #1] ; CHECK-GI-DOT-NEXT: movi v0.2d, #0000000000000000 -; CHECK-GI-DOT-NEXT: ldr b3, [x0, #8] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #9] -; CHECK-GI-DOT-NEXT: ldr b2, [x0, #16] -; CHECK-GI-DOT-NEXT: ldr b4, [x1] -; CHECK-GI-DOT-NEXT: mov v1.b[1], v5.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x1, #1] +; CHECK-GI-DOT-NEXT: ldr b2, [x1] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #1] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #8] +; CHECK-GI-DOT-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #2] ; CHECK-GI-DOT-NEXT: ldr b6, [x1, #8] +; CHECK-GI-DOT-NEXT: mov v2.b[1], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #2] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #17] +; CHECK-GI-DOT-NEXT: ldr b16, [x1, #17] +; CHECK-GI-DOT-NEXT: mov v1.b[2], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #3] +; CHECK-GI-DOT-NEXT: mov v2.b[2], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #3] +; CHECK-GI-DOT-NEXT: mov v1.b[3], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #4] +; CHECK-GI-DOT-NEXT: mov v2.b[3], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #4] +; CHECK-GI-DOT-NEXT: mov v1.b[4], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #5] +; CHECK-GI-DOT-NEXT: mov v2.b[4], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #5] +; CHECK-GI-DOT-NEXT: mov v1.b[5], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #6] +; CHECK-GI-DOT-NEXT: mov v2.b[5], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #6] +; CHECK-GI-DOT-NEXT: mov v1.b[6], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #7] +; CHECK-GI-DOT-NEXT: mov v2.b[6], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #7] +; CHECK-GI-DOT-NEXT: mov v1.b[7], v3.b[0] +; CHECK-GI-DOT-NEXT: ldr b3, [x0, #16] +; CHECK-GI-DOT-NEXT: mov v2.b[7], v4.b[0] +; CHECK-GI-DOT-NEXT: ldr b4, [x1, #16] ; CHECK-GI-DOT-NEXT: mov v3.b[1], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #9] -; CHECK-GI-DOT-NEXT: ldr b18, [x0, #17] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #18] ; CHECK-GI-DOT-NEXT: mov v4.b[1], v16.b[0] -; CHECK-GI-DOT-NEXT: ldr b5, [x1, #16] -; CHECK-GI-DOT-NEXT: ldr b19, [x1, #17] -; CHECK-GI-DOT-NEXT: mov v6.b[1], v17.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #2] -; CHECK-GI-DOT-NEXT: mov v2.b[1], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #10] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #2] -; CHECK-GI-DOT-NEXT: mov v5.b[1], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #10] -; CHECK-GI-DOT-NEXT: mov v1.b[2], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #18] -; CHECK-GI-DOT-NEXT: ldr b7, [x1, #18] -; CHECK-GI-DOT-NEXT: mov v3.b[2], v16.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[2], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[2], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #3] -; CHECK-GI-DOT-NEXT: mov v2.b[2], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #3] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #11] -; CHECK-GI-DOT-NEXT: mov v5.b[2], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #11] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #19] -; CHECK-GI-DOT-NEXT: mov v1.b[3], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b16, [x1, #18] +; CHECK-GI-DOT-NEXT: mov v1.b[8], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #9] +; CHECK-GI-DOT-NEXT: mov v2.b[8], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #9] +; CHECK-GI-DOT-NEXT: mov v3.b[2], v7.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #19] +; CHECK-GI-DOT-NEXT: mov v4.b[2], v16.b[0] ; CHECK-GI-DOT-NEXT: ldr b16, [x1, #19] +; CHECK-GI-DOT-NEXT: mov v1.b[9], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #10] +; CHECK-GI-DOT-NEXT: mov v2.b[9], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #10] ; CHECK-GI-DOT-NEXT: mov v3.b[3], v7.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[3], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[3], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #4] -; CHECK-GI-DOT-NEXT: mov v2.b[3], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #4] -; CHECK-GI-DOT-NEXT: mov v5.b[3], v16.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #12] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #12] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #20] -; CHECK-GI-DOT-NEXT: mov v1.b[4], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x1, #20] -; CHECK-GI-DOT-NEXT: mov v3.b[4], v16.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[4], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[4], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #5] -; CHECK-GI-DOT-NEXT: mov v2.b[4], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #5] -; CHECK-GI-DOT-NEXT: mov v5.b[4], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #13] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #13] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #21] -; CHECK-GI-DOT-NEXT: mov v1.b[5], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #20] +; CHECK-GI-DOT-NEXT: mov v4.b[3], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b16, [x1, #20] +; CHECK-GI-DOT-NEXT: mov v1.b[10], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #11] +; CHECK-GI-DOT-NEXT: mov v2.b[10], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #11] +; CHECK-GI-DOT-NEXT: mov v3.b[4], v7.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #21] +; CHECK-GI-DOT-NEXT: mov v4.b[4], v16.b[0] ; CHECK-GI-DOT-NEXT: ldr b16, [x1, #21] +; CHECK-GI-DOT-NEXT: mov v1.b[11], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #12] +; CHECK-GI-DOT-NEXT: mov v2.b[11], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #12] ; CHECK-GI-DOT-NEXT: mov v3.b[5], v7.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[5], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[5], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #6] -; CHECK-GI-DOT-NEXT: mov v2.b[5], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #6] -; CHECK-GI-DOT-NEXT: mov v5.b[5], v16.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #14] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #14] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #22] -; CHECK-GI-DOT-NEXT: mov v1.b[6], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x1, #22] -; CHECK-GI-DOT-NEXT: mov v3.b[6], v16.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[6], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[6], v18.b[0] -; CHECK-GI-DOT-NEXT: ldr b16, [x0, #7] -; CHECK-GI-DOT-NEXT: mov v2.b[6], v19.b[0] -; CHECK-GI-DOT-NEXT: ldr b17, [x1, #7] -; CHECK-GI-DOT-NEXT: mov v5.b[6], v7.b[0] -; CHECK-GI-DOT-NEXT: ldr b7, [x0, #15] -; CHECK-GI-DOT-NEXT: ldr b18, [x1, #15] -; CHECK-GI-DOT-NEXT: ldr b19, [x0, #23] -; CHECK-GI-DOT-NEXT: mov v1.b[7], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #22] +; CHECK-GI-DOT-NEXT: mov v4.b[5], v16.b[0] +; CHECK-GI-DOT-NEXT: ldr b16, [x1, #22] +; CHECK-GI-DOT-NEXT: mov v1.b[12], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #13] +; CHECK-GI-DOT-NEXT: mov v2.b[12], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #13] +; CHECK-GI-DOT-NEXT: mov v3.b[6], v7.b[0] +; CHECK-GI-DOT-NEXT: ldr b7, [x0, #23] +; CHECK-GI-DOT-NEXT: mov v4.b[6], v16.b[0] ; CHECK-GI-DOT-NEXT: ldr b16, [x1, #23] +; CHECK-GI-DOT-NEXT: mov v1.b[13], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #14] +; CHECK-GI-DOT-NEXT: mov v2.b[13], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #14] ; CHECK-GI-DOT-NEXT: mov v3.b[7], v7.b[0] -; CHECK-GI-DOT-NEXT: mov v4.b[7], v17.b[0] -; CHECK-GI-DOT-NEXT: mov v6.b[7], v18.b[0] -; CHECK-GI-DOT-NEXT: mov v2.b[7], v19.b[0] -; CHECK-GI-DOT-NEXT: mov v5.b[7], v16.b[0] -; CHECK-GI-DOT-NEXT: mov v4.d[1], v6.d[0] -; CHECK-GI-DOT-NEXT: mov v1.d[1], v3.d[0] -; CHECK-GI-DOT-NEXT: movi v3.2d, #0000000000000000 -; CHECK-GI-DOT-NEXT: mov v2.d[1], v0.d[0] -; CHECK-GI-DOT-NEXT: mov v5.d[1], v0.d[0] -; CHECK-GI-DOT-NEXT: sdot v3.4s, v4.16b, v1.16b -; CHECK-GI-DOT-NEXT: sdot v0.4s, v5.16b, v2.16b -; CHECK-GI-DOT-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-GI-DOT-NEXT: mov v4.b[7], v16.b[0] +; CHECK-GI-DOT-NEXT: mov v1.b[14], v5.b[0] +; CHECK-GI-DOT-NEXT: ldr b5, [x0, #15] +; CHECK-GI-DOT-NEXT: mov v2.b[14], v6.b[0] +; CHECK-GI-DOT-NEXT: ldr b6, [x1, #15] +; CHECK-GI-DOT-NEXT: mov v3.d[1], v0.d[0] +; CHECK-GI-DOT-NEXT: mov v4.d[1], v0.d[0] +; CHECK-GI-DOT-NEXT: mov v1.b[15], v5.b[0] +; CHECK-GI-DOT-NEXT: movi v5.2d, #0000000000000000 +; CHECK-GI-DOT-NEXT: mov v2.b[15], v6.b[0] +; CHECK-GI-DOT-NEXT: sdot v0.4s, v4.16b, v3.16b +; CHECK-GI-DOT-NEXT: sdot v5.4s, v2.16b, v1.16b +; CHECK-GI-DOT-NEXT: add v0.4s, v5.4s, v0.4s ; CHECK-GI-DOT-NEXT: addv s0, v0.4s ; CHECK-GI-DOT-NEXT: fmov w0, s0 ; CHECK-GI-DOT-NEXT: ret From 09b80e6145a2e619fa8ff486a04cdc7d1534a81a Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 15 Feb 2024 10:27:43 +0000 Subject: [PATCH 230/240] [clang][flang][driver] Correct program names in option group descriptions (#81726) Currently https://flang.llvm.org/docs/FlangCommandLineReference.html refers to "Clang" in several of the group descriptions for example: ``` Compilation options Flags controlling the behavior of Clang during compilation... ``` This is pretty confusing. I'm fixing this by making use of `Program` from the existing GlobalDocumentation object to substitute in the program name to these descriptions. This `Program` has been changed to a proper noun given that it's easier to lower case a string than capitalise one character (syntax wise). The tablegen backend has been changed to lower it so that links in the RST/HTML remain the same as they were before. To make sure the file is valid when not generating docs, I'm checking a #define and providing a default GlobalDocumentation if it's not defined. (I looked for a way to check if a def exists, but tablegen doesn't seem to have one) This means that if the DocBrief are used outside of documentation, they'll say "Clang", which is the same as it always was. This change does not aim fix option descriptions that refer to clang. Though we can use parts of this for that, there is only one driver library so it needs a different approach. This change also fixes the warning: ``` /home/buildbot/as-worker-4/publish-sphinx-docs/build/tools/flang/docs/Source/FlangCommandLineReference.rst:194: WARNING: unknown document: 'DiagnosticsReference' ``` Which is due to flang docs trying to link to clang docs. Now it will just tell the reader to go to Clang's page, which is not ideal but it is easy to find with Google at least. --- clang/include/clang/Driver/ClangOptionDocs.td | 3 +- clang/include/clang/Driver/Options.td | 43 +++++++++++++++---- .../utils/TableGen/ClangOptionDocEmitter.cpp | 9 ++-- flang/docs/FlangOptionsDocs.td | 4 +- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Driver/ClangOptionDocs.td b/clang/include/clang/Driver/ClangOptionDocs.td index a5ee577c5f45db..dea6a7ccb12c9a 100644 --- a/clang/include/clang/Driver/ClangOptionDocs.td +++ b/clang/include/clang/Driver/ClangOptionDocs.td @@ -27,11 +27,12 @@ GCC-compatible ``clang`` and ``clang++`` drivers. }]; - string Program = "clang"; + string Program = "Clang"; // Note: We *must* use DefaultVis and not ClangOption, since that's // the name of the actual TableGen record. The alias will not work. list VisibilityMask = ["DefaultVis"]; list IgnoreFlags = ["HelpHidden", "Unsupported", "Ignored"]; } +#define GENERATING_DOCS include "Options.td" diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b302afd65e2811..63ca8534c2a2a3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -13,6 +13,25 @@ // Include the common option parsing interfaces. include "llvm/Option/OptParser.td" +// When generating documentation, we expect there to be a GlobalDocumentation +// def containing the program name that we are generating documentation for. +// This object should only be used by things that are used in documentation, +// such as the group descriptions. +#ifndef GENERATING_DOCS +// So that this file can still be parsed without such a def, define one if there +// isn't one provided. +def GlobalDocumentation { + // Sensible default in case of mistakes. + string Program = "Clang"; +} +#endif + +// Use this to generate program specific documentation, for example: +// StringForProgram<"Control how %Program behaves.">.str +class StringForProgram { + string str = !subst("%Program", GlobalDocumentation.Program, _str); +} + ///////// // Flags @@ -100,14 +119,16 @@ def Action_Group : OptionGroup<"">, DocName<"Actions">, // Meta-group for options which are only used for compilation, // and not linking etc. def CompileOnly_Group : OptionGroup<"">, - DocName<"Compilation options">, DocBrief<[{ -Flags controlling the behavior of Clang during compilation. These flags have -no effect during actions that do not perform compilation.}]>; + DocName<"Compilation options">, + DocBrief.str>; def Preprocessor_Group : OptionGroup<"">, Group, - DocName<"Preprocessor options">, DocBrief<[{ -Flags controlling the behavior of the Clang preprocessor.}]>; + DocName<"Preprocessor options">, + DocBrief.str>; def IncludePath_Group : OptionGroup<"">, Group, DocName<"Include path management">, @@ -128,9 +149,15 @@ def d_Group : OptionGroup<"">, Group, Flags allowing the state of the preprocessor to be dumped in various ways.}]>; def Diag_Group : OptionGroup<"">, Group, - DocName<"Diagnostic options">, DocBrief<[{ -Flags controlling which warnings, errors, and remarks Clang will generate. -See the :doc:`full list of warning and remark flags `.}]>; + DocName<"Diagnostic options">, + DocBrief.str, + // When in clang link directly to the page. + !cond(!eq(GlobalDocumentation.Program, "Clang"): +"See the :doc:`full list of warning and remark flags `.", + // When elsewhere the link will not work. + true: +"See Clang's Diagnostic Reference for a full list of warning and remark flags."))>; def R_Group : OptionGroup<"">, Group, DocFlatten; def R_value_Group : OptionGroup<"">, Group, diff --git a/clang/utils/TableGen/ClangOptionDocEmitter.cpp b/clang/utils/TableGen/ClangOptionDocEmitter.cpp index a4095950ca975f..3fe98909940749 100644 --- a/clang/utils/TableGen/ClangOptionDocEmitter.cpp +++ b/clang/utils/TableGen/ClangOptionDocEmitter.cpp @@ -342,9 +342,10 @@ void emitOption(const DocumentedOption &Option, const Record *DocInfo, })]; for (auto &S : SphinxOptionIDs) NextSuffix[S] = SphinxWorkaroundSuffix + 1; + + std::string Program = DocInfo->getValueAsString("Program").lower(); if (SphinxWorkaroundSuffix) - OS << ".. program:: " << DocInfo->getValueAsString("Program") - << SphinxWorkaroundSuffix << "\n"; + OS << ".. program:: " << Program << SphinxWorkaroundSuffix << "\n"; // Emit the names of the option. OS << ".. option:: "; @@ -353,7 +354,7 @@ void emitOption(const DocumentedOption &Option, const Record *DocInfo, EmittedAny = emitOptionNames(Option, OS, EmittedAny); }); if (SphinxWorkaroundSuffix) - OS << "\n.. program:: " << DocInfo->getValueAsString("Program"); + OS << "\n.. program:: " << Program; OS << "\n\n"; // Emit the description, if we have one. @@ -421,7 +422,7 @@ void clang::EmitClangOptDocs(RecordKeeper &Records, raw_ostream &OS) { return; } OS << DocInfo->getValueAsString("Intro") << "\n"; - OS << ".. program:: " << DocInfo->getValueAsString("Program") << "\n"; + OS << ".. program:: " << DocInfo->getValueAsString("Program").lower() << "\n"; emitDocumentation(0, extractDocumentation(Records, DocInfo), DocInfo, OS); } diff --git a/flang/docs/FlangOptionsDocs.td b/flang/docs/FlangOptionsDocs.td index 9189899e82c62d..14d033f8587e3b 100644 --- a/flang/docs/FlangOptionsDocs.td +++ b/flang/docs/FlangOptionsDocs.td @@ -24,10 +24,10 @@ Introduction }]; - string Program = "flang"; + string Program = "Flang"; list VisibilityMask = ["FlangOption"]; list IgnoreFlags = ["HelpHidden", "Unsupported", "Ignored"]; } - +#define GENERATING_DOCS include "Options.td" From dfb9bf35c42a04a8f152dabf0a9d1a52e451f942 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy <89994100+VyacheslavLevytskyy@users.noreply.github.com> Date: Thu, 15 Feb 2024 11:28:58 +0100 Subject: [PATCH 231/240] let a user select preferred/unpreferred capabilities in a list of enabling capabilities (#81476) By SPIR-V specification: "If an instruction, enumerant, or other feature specifies multiple enabling capabilities, only one such capability needs to be declared to use the feature." However, one capability may be preferred over another. One important case is Shader capability that may not be supported by a backend, but always is inserted if "OpDecorate SpecId" is found, because Enabling Capabilities for the latter is the list of Shader and Kernel, where Shader is coming first and thus always selected as the first available option. In this PR we address the problem by keeping current behaviour of selecting the first option among enabling capabilities as is, but giving a user a way to filter capabilities during the selection process via a newly introduced "--avoid-spirv-capabilities" command line option. This option is to avoid selection of certain capabilities if there are other available enabling capabilities. This PR is changing also existing pruneCapabilities() function. It doesn't remove capability from module requirement anymore, but only adds implicitly required capabilities recursively, so its name is changed accordingly. This change fixes the present bug in collecting required by a module capabilities. Before the change, introduced by this PR, pruneCapabilities() function has been removing, for example, Kernel capability from required by a module, because Kernel is initially required and the second time it was needed pruneCapabilities() removed it by mistake. --- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 46 ++++++++++++++++--- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h | 6 +-- .../CodeGen/SPIRV/transcoding/spec_const.ll | 6 +-- 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index a18aae1761c834..9f14ea5dbe19b3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -35,6 +35,22 @@ static cl::opt cl::desc("Dump MIR with SPIR-V dependencies info"), cl::Optional, cl::init(false)); +static cl::list + AvoidCapabilities("avoid-spirv-capabilities", + cl::desc("SPIR-V capabilities to avoid if there are " + "other options enabling a feature"), + cl::ZeroOrMore, cl::Hidden, + cl::values(clEnumValN(SPIRV::Capability::Shader, "Shader", + "SPIR-V Shader capability"))); +// Use sets instead of cl::list to check "if contains" condition +struct AvoidCapabilitiesSet { + SmallSet S; + AvoidCapabilitiesSet() { + for (auto Cap : AvoidCapabilities) + S.insert(Cap); + } +}; + char llvm::SPIRVModuleAnalysis::ID = 0; namespace llvm { @@ -58,6 +74,8 @@ static SPIRV::Requirements getSymbolicOperandRequirements(SPIRV::OperandCategory::OperandCategory Category, unsigned i, const SPIRVSubtarget &ST, SPIRV::RequirementHandler &Reqs) { + static AvoidCapabilitiesSet + AvoidCaps; // contains capabilities to avoid if there is another option unsigned ReqMinVer = getSymbolicOperandMinVersion(Category, i); unsigned ReqMaxVer = getSymbolicOperandMaxVersion(Category, i); unsigned TargetVer = ST.getSPIRVVersion(); @@ -72,9 +90,26 @@ getSymbolicOperandRequirements(SPIRV::OperandCategory::OperandCategory Category, return {false, {}, {}, 0, 0}; } } else if (MinVerOK && MaxVerOK) { - for (auto Cap : ReqCaps) { // Only need 1 of the capabilities to work. + if (ReqCaps.size() == 1) { + auto Cap = ReqCaps[0]; if (Reqs.isCapabilityAvailable(Cap)) return {true, {Cap}, {}, ReqMinVer, ReqMaxVer}; + } else { + // By SPIR-V specification: "If an instruction, enumerant, or other + // feature specifies multiple enabling capabilities, only one such + // capability needs to be declared to use the feature." However, one + // capability may be preferred over another. We use command line + // argument(s) and AvoidCapabilities to avoid selection of certain + // capabilities if there are other options. + CapabilityList UseCaps; + for (auto Cap : ReqCaps) + if (Reqs.isCapabilityAvailable(Cap)) + UseCaps.push_back(Cap); + for (size_t i = 0, Sz = UseCaps.size(); i < Sz; ++i) { + auto Cap = UseCaps[i]; + if (i == Sz - 1 || !AvoidCaps.S.contains(Cap)) + return {true, {Cap}, {}, ReqMinVer, ReqMaxVer}; + } } } // If there are no capabilities, or we can't satisfy the version or @@ -432,16 +467,13 @@ void SPIRV::RequirementHandler::getAndAddRequirements( addRequirements(getSymbolicOperandRequirements(Category, i, ST, *this)); } -void SPIRV::RequirementHandler::pruneCapabilities( +void SPIRV::RequirementHandler::recursiveAddCapabilities( const CapabilityList &ToPrune) { for (const auto &Cap : ToPrune) { AllCaps.insert(Cap); - auto FoundIndex = llvm::find(MinimalCaps, Cap); - if (FoundIndex != MinimalCaps.end()) - MinimalCaps.erase(FoundIndex); CapabilityList ImplicitDecls = getSymbolicOperandCapabilities(OperandCategory::CapabilityOperand, Cap); - pruneCapabilities(ImplicitDecls); + recursiveAddCapabilities(ImplicitDecls); } } @@ -452,7 +484,7 @@ void SPIRV::RequirementHandler::addCapabilities(const CapabilityList &ToAdd) { continue; CapabilityList ImplicitDecls = getSymbolicOperandCapabilities(OperandCategory::CapabilityOperand, Cap); - pruneCapabilities(ImplicitDecls); + recursiveAddCapabilities(ImplicitDecls); MinimalCaps.push_back(Cap); } } diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index b05526b06e7da7..708384fc55f525 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -71,9 +71,9 @@ struct RequirementHandler { SmallSet AllExtensions; unsigned MinVersion; // 0 if no min version is defined. unsigned MaxVersion; // 0 if no max version is defined. - // Remove a list of capabilities from dedupedCaps and add them to AllCaps, - // recursing through their implicitly declared capabilities too. - void pruneCapabilities(const CapabilityList &ToPrune); + // Add capabilities to AllCaps, recursing through their implicitly declared + // capabilities too. + void recursiveAddCapabilities(const CapabilityList &ToPrune); void initAvailableCapabilitiesForOpenCL(const SPIRVSubtarget &ST); void initAvailableCapabilitiesForVulkan(const SPIRVSubtarget &ST); diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spec_const.ll b/llvm/test/CodeGen/SPIRV/transcoding/spec_const.ll index c47dccb35e14d5..8ce76534c50db5 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/spec_const.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/spec_const.ll @@ -1,9 +1,9 @@ -; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; XFAIL: * +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --avoid-spirv-capabilities=Shader %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --avoid-spirv-capabilities=Shader %s -o - -filetype=obj | spirv-val %} +; CHECK-SPIRV-DAG: OpCapability Kernel ; CHECK-SPIRV-NOT: OpCapability Matrix ; CHECK-SPIRV-NOT: OpCapability Shader -; CHECK-SPIRV: OpCapability Kernel ; CHECK-SPIRV-DAG: OpDecorate %[[#SC0:]] SpecId 0 ; CHECK-SPIRV-DAG: OpDecorate %[[#SC1:]] SpecId 1 From 9552a396ed649e499bc4a370ab72ca6972e5b982 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy <89994100+VyacheslavLevytskyy@users.noreply.github.com> Date: Thu, 15 Feb 2024 11:30:17 +0100 Subject: [PATCH 232/240] add support for the SPV_KHR_linkonce_odr extension (#81512) This PR adds support for the SPV_KHR_linkonce_odr extension and modifies existing negative test with a positive check for the extension and proper linkage type in case when the extension is enabled. SPV_KHR_linkonce_odr adds a "LinkOnceODR" linkage type, allowing proper translation of, for example, C++ templates classes merging during linking from different modules and supporting any other cases when a global variable/function must be merged with equivalent global variable(s)/function(s) from other modules during the linking process. --- llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 18 +++++++++++++----- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 5 ++++- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 6 ++++++ llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp | 5 +++++ llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td | 1 + llvm/test/CodeGen/SPIRV/LinkOnceODR.ll | 11 ++++++++++- llvm/test/CodeGen/SPIRV/LinkOnceODRFun.ll | 17 +++++++++++++++++ 7 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/LinkOnceODRFun.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 8ac498e1556bec..baeed2ad895a4b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -332,6 +332,10 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, if (F.hasName()) buildOpName(FuncVReg, F.getName(), MIRBuilder); + // Get access to information about available extensions + const auto *ST = + static_cast(&MIRBuilder.getMF().getSubtarget()); + // Handle entry points and function linkage. if (isEntryPoint(F)) { const auto &STI = MIRBuilder.getMF().getSubtarget(); @@ -342,15 +346,19 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, addStringImm(F.getName(), MIB); } else if (F.getLinkage() == GlobalValue::LinkageTypes::ExternalLinkage || F.getLinkage() == GlobalValue::LinkOnceODRLinkage) { - auto LnkTy = F.isDeclaration() ? SPIRV::LinkageType::Import - : SPIRV::LinkageType::Export; + SPIRV::LinkageType::LinkageType LnkTy = + F.isDeclaration() + ? SPIRV::LinkageType::Import + : (F.getLinkage() == GlobalValue::LinkOnceODRLinkage && + ST->canUseExtension( + SPIRV::Extension::SPV_KHR_linkonce_odr) + ? SPIRV::LinkageType::LinkOnceODR + : SPIRV::LinkageType::Export); buildOpDecorate(FuncVReg, MIRBuilder, SPIRV::Decoration::LinkageAttributes, {static_cast(LnkTy)}, F.getGlobalIdentifier()); } // Handle function pointers decoration - const auto *ST = - static_cast(&MIRBuilder.getMF().getSubtarget()); bool hasFunctionPointers = ST->canUseExtension(SPIRV::Extension::SPV_INTEL_function_pointers); if (hasFunctionPointers) { @@ -393,7 +401,7 @@ void SPIRVCallLowering::produceIndirectPtrTypes( // SPIR-V pointer to function type: SPIRVType *IndirectFuncPtrTy = GR->getOrCreateSPIRVPointerType( SpirvFuncTy, MIRBuilder, SPIRV::StorageClass::Function); - // Correct the Calee type + // Correct the Callee type GR->assignSPIRVTypeToVReg(IndirectFuncPtrTy, IC.Callee, MF); } } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 52eeb8a523e6f6..ed2c9ab4d4362a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -1601,7 +1601,10 @@ bool SPIRVInstructionSelector::selectGlobalValue( SPIRV::LinkageType::LinkageType LnkType = (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) ? SPIRV::LinkageType::Import - : SPIRV::LinkageType::Export; + : (GV->getLinkage() == GlobalValue::LinkOnceODRLinkage && + STI.canUseExtension(SPIRV::Extension::SPV_KHR_linkonce_odr) + ? SPIRV::LinkageType::LinkOnceODR + : SPIRV::LinkageType::Export); Register Reg = GR.buildGlobalVariable(ResVReg, ResType, GlobalIdent, GV, Storage, Init, GlobalVar->isConstant(), diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 9f14ea5dbe19b3..688b98ffa67477 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -679,6 +679,12 @@ static void addOpDecorateReqs(const MachineInstr &MI, unsigned DecIndex, auto BuiltIn = static_cast(BuiltInOp); Reqs.addRequirements(getSymbolicOperandRequirements( SPIRV::OperandCategory::BuiltInOperand, BuiltIn, ST, Reqs)); + } else if (Dec == SPIRV::Decoration::LinkageAttributes) { + int64_t LinkageOp = MI.getOperand(MI.getNumOperands() - 1).getImm(); + SPIRV::LinkageType::LinkageType LnkType = + static_cast(LinkageOp); + if (LnkType == SPIRV::LinkageType::LinkOnceODR) + Reqs.addExtension(SPIRV::Extension::SPV_KHR_linkonce_odr); } } diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp index effedc2f17d351..354cd5d9b297e7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp @@ -54,6 +54,11 @@ cl::list Extensions( "SPV_KHR_bit_instructions", "This enables bit instructions to be used by SPIR-V modules " "without requiring the Shader capability"), + clEnumValN( + SPIRV::Extension::SPV_KHR_linkonce_odr, "SPV_KHR_linkonce_odr", + "Allows to use the LinkOnceODR linkage type that is to let " + "a function or global variable to be merged with other functions " + "or global variables of the same name when linkage occurs."), clEnumValN(SPIRV::Extension::SPV_INTEL_function_pointers, "SPV_INTEL_function_pointers", "Allows translation of function pointers"))); diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 5d252275ac709b..ed05013642ac21 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -1040,6 +1040,7 @@ multiclass LinkageTypeOperand value, list reqCapabilities> defm Export : LinkageTypeOperand<0, [Linkage]>; defm Import : LinkageTypeOperand<1, [Linkage]>; +defm LinkOnceODR : LinkageTypeOperand<2, [Linkage]>; //===----------------------------------------------------------------------===// // Multiclass used to define AccessQualifier enum values and at the same time diff --git a/llvm/test/CodeGen/SPIRV/LinkOnceODR.ll b/llvm/test/CodeGen/SPIRV/LinkOnceODR.ll index 3fb49ac241c6e6..3dfdeac7adaa5e 100644 --- a/llvm/test/CodeGen/SPIRV/LinkOnceODR.ll +++ b/llvm/test/CodeGen/SPIRV/LinkOnceODR.ll @@ -1,5 +1,14 @@ -;; No extension -> no LinkOnceODR +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_KHR_linkonce_odr %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV-EXT +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_KHR_linkonce_odr %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-EXT: Capability Linkage +; CHECK-SPIRV-EXT: Extension "SPV_KHR_linkonce_odr" +; CHECK-SPIRV-EXT-DAG: OpDecorate %[[#]] LinkageAttributes "GV" LinkOnceODR +; CHECK-SPIRV-EXT-DAG: OpDecorate %[[#]] LinkageAttributes "square" LinkOnceODR + +; No extension -> no LinkOnceODR ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-SPIRV-NOT: OpExtension "SPV_KHR_linkonce_odr" ; CHECK-SPIRV-NOT: OpDecorate %[[#]] LinkageAttributes "GV" LinkOnceODR diff --git a/llvm/test/CodeGen/SPIRV/LinkOnceODRFun.ll b/llvm/test/CodeGen/SPIRV/LinkOnceODRFun.ll new file mode 100644 index 00000000000000..7505c3fc277e9e --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/LinkOnceODRFun.ll @@ -0,0 +1,17 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_KHR_linkonce_odr %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV-EXT +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_KHR_linkonce_odr %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-EXT: Capability Linkage +; CHECK-SPIRV-EXT: Extension "SPV_KHR_linkonce_odr" +; CHECK-SPIRV-EXT-DAG: OpDecorate %[[#]] LinkageAttributes "square" LinkOnceODR + +define spir_kernel void @k() { +entry: + %call = call spir_func i32 @square(i32 2) + ret void +} + +define linkonce_odr dso_local spir_func i32 @square(i32 %in) { +entry: + ret i32 %in +} From 97c19a46cd177b19667a65db8720e92ff91c7b2e Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Thu, 15 Feb 2024 10:32:17 +0000 Subject: [PATCH 233/240] [mlir][ArmSME][nfc] Add integration test for i8 to i32 matmul (#81607) Currently marked as XFAIL due to bug in QEMU. See test for details. --- .../ArmSME/multi-tile-matmul-mixed-types.mlir | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul-mixed-types.mlir diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul-mixed-types.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul-mixed-types.mlir new file mode 100644 index 00000000000000..9f06226a4f651c --- /dev/null +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul-mixed-types.mlir @@ -0,0 +1,123 @@ +// RUN: mlir-opt %s \ +// RUN: -transform-interpreter -test-transform-dialect-erase-schedule \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" -canonicalize \ +// RUN: -arm-sme-vector-legalization -canonicalize -cse \ +// RUN: -convert-vector-to-arm-sme -arm-sme-outer-product-fusion \ +// RUN: -allocate-arm-sme-tiles -convert-arm-sme-to-scf \ +// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ +// RUN: -convert-vector-to-scf=full-unroll -convert-arm-sme-to-llvm \ +// RUN: -test-lower-to-llvm | \ +// RUN: %mcr_aarch64_cmd \ +// RUN: -e=main -entry-point-result=void \ +// RUN: -march=aarch64 -mattr="+sve,+sme" \ +// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib,%mlir_arm_runner_utils | \ +// RUN: FileCheck %s + +/// This is very similar to the SME multi-tile-matmul.mlir test, except that it +/// tests a mixed i8 to i32 matmul and outer product fusion which fuses 16 +/// outer products (four per tile) into four 4-way outer products. + +/// NOTE: QEMU gives incorrect result for SME SMOPA 4-way outer product +/// instruction (version <= 8.2.0, latest version at time of writing), see: +/// https://gitlab.com/qemu-project/qemu/-/issues/2083 +/// This test is expected to fail until a fixed version of QEMU can be used. + +/// FIXME: Remove the 'XFAIL' below once a fixed QEMU version is available +/// (and installed on CI buildbot). +/// XFAIL: * + +func.func @matmul_i8_to_i32(%A : tensor, %B : tensor, %C : tensor) { + %res = linalg.matmul ins(%A, %B: tensor, tensor) + outs(%C: tensor) -> tensor + %xf = tensor.cast %res : tensor to tensor<*xi32> + call @printMemrefI32(%xf) : (tensor<*xi32>) -> () + return +} + +func.func @main() { + /// Set SVL to 128-bit. This ensures this small matmul will use all four + /// 32-bit SME virtual tiles. + %c128 = arith.constant 128 : i32 + func.call @setArmSVLBits(%c128) : (i32) -> () + + %c0 = arith.constant 0 : i32 + %c7 = arith.constant 7 : index + + %A = arith.constant dense<[ + [1, 8, 15, 22, 29, 36, 43, 50, 57, 64, 71, 78, 85], + [2, 9, 16, 23, 30, 37, 44, 51, 58, 65, 72, 79, 86], + [3, 10, 17, 24, 31, 38, 45, 52, 59, 66, 73, 80, 87], + [4, 11, 18, 25, 32, 39, 46, 53, 60, 67, 74, 81, 88], + [5, 12, 19, 26, 33, 40, 47, 54, 61, 68, 75, 82, 89], + [6, 13, 20, 27, 34, 41, 48, 55, 62, 69, 76, 83, 90], + [7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91] + ]> : tensor<7x13xi8> + + %B_init = tensor.empty() : tensor<13x7xi8> + %B = linalg.transpose ins(%A: tensor<7x13xi8>) + outs(%B_init: tensor<13x7xi8>) permutation = [1, 0] + + %A_dyn = tensor.cast %A : tensor<7x13xi8> to tensor + %B_dyn = tensor.cast %B : tensor<13x7xi8> to tensor + + %C_init = bufferization.alloc_tensor(%c7, %c7) : tensor + %C = linalg.fill ins(%c0 : i32) outs(%C_init : tensor) -> tensor + + // CHECK: Unranked Memref {{.*}} rank = 2 offset = 0 sizes = [7, 7] strides = [7, 1] data = + // CHECK: [32955, 33514, 34073, 34632, 35191, 35750, 36309] + // CHECK: [33514, 34086, 34658, 35230, 35802, 36374, 36946] + // CHECK: [34073, 34658, 35243, 35828, 36413, 36998, 37583] + // CHECK: [34632, 35230, 35828, 36426, 37024, 37622, 38220] + // CHECK: [35191, 35802, 36413, 37024, 37635, 38246, 38857] + // CHECK: [35750, 36374, 36998, 37622, 38246, 38870, 39494] + // CHECK: [36309, 36946, 37583, 38220, 38857, 39494, 40131] + call @matmul_i8_to_i32(%A_dyn, %B_dyn, %C) : (tensor, tensor, tensor) -> () + + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%module : !transform.any_op {transform.consumed}) { + %matmul = transform.structured.match ops{["linalg.matmul"]} in %module + : (!transform.any_op) -> !transform.any_op + + // Step 1: Tile for size [8] x [8] (unrolled by 4), which corresponds to + // (2 x SVLs) x (2 x SVLs), where SVLs is the number of 32-bit elements in a + // vector of SVL bits. This uses all four 32-bit SME virtual tiles. + %tiled_linalg_op, %loop_i, %loop_j, %loop_k = transform.structured.tile_using_for %matmul[[8], [8], 4] + : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.op<"scf.for">, !transform.op<"scf.for">) + + // Step 2: Vectorize. + transform.structured.vectorize %tiled_linalg_op vector_sizes [[8], [8], 4] + : !transform.any_op + + // Step 3: Bufferize ahead of TransferReadDropUnitDimsPattern, which + // currently only supports memrefs. + %bufferize = transform.bufferization.one_shot_bufferize %module + {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op + + %func = transform.structured.match ops{["func.func"]} in %bufferize + : (!transform.any_op) -> !transform.any_op + + // Step 4: Lower vector.multi_reduction to vector.contract (+ some helpful patterns). + transform.apply_patterns to %func { + transform.apply_patterns.vector.lower_masked_transfers + transform.apply_patterns.vector.transfer_permutation_patterns + transform.apply_patterns.vector.reduction_to_contract + } : !transform.any_op + + // Step 5: Lower vector.contract to vector.outerproduct. Also drop unit + // dims, specifically to prevent vector.transfer_read of vector<[8]x1xi32>, + // which can't be lowered in generic path. + transform.apply_patterns to %func { + transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct" + transform.apply_patterns.vector.lower_masks + transform.apply_patterns.vector.rank_reducing_subview_patterns + } : !transform.any_op + + transform.yield + } +} + +func.func private @printMemrefI32(%ptr : tensor<*xi32>) +func.func private @setArmSVLBits(%bits : i32) From 2df652a69159b76c97cfd94e32ad6bb71dde716c Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 15 Feb 2024 10:39:05 +0000 Subject: [PATCH 234/240] [CodeGen] Simplify updateLiveIn in MachineSink (#79831) When a whole register is added a basic block's liveins, use LaneBitmask::getAll for the live lanes instead of trying to calculate an accurate mask of the lanes that comprise the register. This simplifies the code and matches other places where a whole register is marked as livein. This also avoids problems when regunits that are synthesized by TableGen to represent ad hoc aliasing have a lane mask of 0. Fixes #78942 --- llvm/lib/CodeGen/MachineSink.cpp | 9 ++------- llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir | 2 +- .../CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll | 2 +- llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir | 2 +- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index e7e8f602683480..c3a1d3759882d8 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1949,13 +1949,8 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, for (unsigned DefReg : DefedRegsInCopy) for (MCPhysReg S : TRI->subregs_inclusive(DefReg)) SuccBB->removeLiveIn(S); - for (auto U : UsedOpsInCopy) { - Register SrcReg = MI->getOperand(U).getReg(); - LaneBitmask Mask; - for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) - Mask |= (*S).second; - SuccBB->addLiveIn(SrcReg, Mask); - } + for (auto U : UsedOpsInCopy) + SuccBB->addLiveIn(MI->getOperand(U).getReg()); SuccBB->sortUniqueLiveIns(); } diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir index 65648bacd55679..c1da29ecc2c2f5 100644 --- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir +++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir @@ -47,7 +47,7 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: liveins: $exec:0x000000000000000F, $sgpr30, $sgpr31, $vgpr0:0x0000000000000003, $vgpr1:0x0000000000000003, $vgpr2:0x0000000000000003, $vgpr3:0x0000000000000003, $vgpr4:0x0000000000000003, $vgpr5:0x0000000000000003, $vgpr6:0x0000000000000003, $vgpr7:0x0000000000000003, $vgpr8:0x0000000000000003, $vgpr9:0x0000000000000003, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F + ; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec ; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index 5a128c7541d1ec..24ef8ce1beb2db 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -808,7 +808,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.58: ; GFX90A-NEXT: successors: %bb.7(0x80000000) - ; GFX90A-NEXT: liveins: $exec:0x000000000000000F, $sgpr12, $sgpr13, $sgpr14, $sgpr15:0x0000000000000003, $sgpr23:0x0000000000000003, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr42_sgpr43, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $exec, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr23, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr42_sgpr43, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr15 = COPY killed renamable $sgpr23, implicit $exec ; GFX90A-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr15, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir b/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir index ff7a40bfde9580..86863c31753642 100644 --- a/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir +++ b/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir @@ -5,7 +5,7 @@ # CHECK-LABEL: bb.0: # CHECK: renamable $sgpr1 = COPY renamable $sgpr2 # CHECK-LABEL: bb.1: -# CHECK: liveins: $sgpr0_sgpr1:0x000000000000000F +# CHECK: liveins: $sgpr0_sgpr1 # CHECK: renamable $vgpr1_vgpr2 = COPY renamable $sgpr0_sgpr1 --- From b279ca278370dabd27b8d380d59bdb5018366053 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 14 Feb 2024 13:05:09 +0000 Subject: [PATCH 235/240] [DAG] visitCTPOP - CTPOP(SHIFT(X)) -> CTPOP(X) iff the shift doesn't affect any non-zero bits If the source is being (logically) shifted, but doesn't affect any active bits, then we can call CTPOP on the shift source directly. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +++++++++++++++++ llvm/test/CodeGen/X86/ctpop-mask.ll | 8 +++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bdd2336fa42379..18037f11ee4088 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11144,6 +11144,23 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0})) return C; + // If the source is being shifted, but doesn't affect any active bits, + // then we can call CTPOP on the shift source directly. + if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) { + const APInt &Amt = AmtC->getAPIntValue(); + if (Amt.ult(NumBits)) { + KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0)); + if ((N0.getOpcode() == ISD::SRL && + Amt.ule(KnownSrc.countMinTrailingZeros())) || + (N0.getOpcode() == ISD::SHL && + Amt.ule(KnownSrc.countMinLeadingZeros()))) { + return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0)); + } + } + } + } + // If the upper bits are known to be zero, then see if its profitable to // only count the lower bits. if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) { diff --git a/llvm/test/CodeGen/X86/ctpop-mask.ll b/llvm/test/CodeGen/X86/ctpop-mask.ll index 97c634a2a133d7..602d9b511cdc06 100644 --- a/llvm/test/CodeGen/X86/ctpop-mask.ll +++ b/llvm/test/CodeGen/X86/ctpop-mask.ll @@ -549,7 +549,6 @@ define i64 @ctpop_shifted_mask8(i64 %x) nounwind readnone { ; X86-POPCOUNT-LABEL: ctpop_shifted_mask8: ; X86-POPCOUNT: # %bb.0: ; X86-POPCOUNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-POPCOUNT-NEXT: shll $8, %eax ; X86-POPCOUNT-NEXT: popcntl %eax, %eax ; X86-POPCOUNT-NEXT: xorl %edx, %edx ; X86-POPCOUNT-NEXT: retl @@ -663,12 +662,11 @@ define i64 @ctpop_shifted_mask16(i64 %x) nounwind readnone { ; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax ; X86-NO-POPCOUNT-NEXT: andl $524280, %eax # imm = 0x7FFF8 -; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx -; X86-NO-POPCOUNT-NEXT: andl $21845, %ecx # imm = 0x5555 -; X86-NO-POPCOUNT-NEXT: shrl $3, %eax +; X86-NO-POPCOUNT-NEXT: shrl %ecx +; X86-NO-POPCOUNT-NEXT: andl $87380, %ecx # imm = 0x15554 ; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax ; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx -; X86-NO-POPCOUNT-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NO-POPCOUNT-NEXT: andl $858993456, %ecx # imm = 0x33333330 ; X86-NO-POPCOUNT-NEXT: shrl $2, %eax ; X86-NO-POPCOUNT-NEXT: andl $858993459, %eax # imm = 0x33333333 ; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax From 0636309051f3b1a2b87047770bb3f7df1f3e27c3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 15 Feb 2024 09:43:13 +0000 Subject: [PATCH 236/240] Fix MSVC "signed/unsigned mismatch" warning. NFC. --- clang/lib/AST/Interp/Function.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/Interp/Function.h b/clang/lib/AST/Interp/Function.h index 6500e0126c226f..b19d64f9371e3c 100644 --- a/clang/lib/AST/Interp/Function.h +++ b/clang/lib/AST/Interp/Function.h @@ -186,7 +186,7 @@ class Function final { /// Returns the number of parameter this function takes when it's called, /// i.e excluding the instance pointer and the RVO pointer. unsigned getNumWrittenParams() const { - assert(getNumParams() >= (hasThisPointer() + hasRVO())); + assert(getNumParams() >= (unsigned)(hasThisPointer() + hasRVO())); return getNumParams() - hasThisPointer() - hasRVO(); } unsigned getWrittenArgSize() const { From 7c422dde8de7382e494615aa2553aa4e27e8b204 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 15 Feb 2024 10:53:53 +0000 Subject: [PATCH 237/240] [X86] shuffle combines - share the same SDLoc argument instead of recreating it over and over again. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 42 ++++++++++++------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 067676703141a3..be123e18016115 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7404,7 +7404,8 @@ static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, return Idx; } -static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { +static SDValue buildFromShuffleMostly(SDValue Op, const SDLoc &DL, + SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); // Skip if insert_vec_elt is not supported. @@ -7412,9 +7413,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) return SDValue(); - SDLoc DL(Op); unsigned NumElems = Op.getNumOperands(); - SDValue VecIn1; SDValue VecIn2; SmallVector InsertIndices; @@ -9021,7 +9020,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { assert(Values.size() > 1 && "Expected non-undef and non-splat vector"); // Check for a build vector from mostly shuffle plus few inserting. - if (SDValue Sh = buildFromShuffleMostly(Op, DAG)) + if (SDValue Sh = buildFromShuffleMostly(Op, dl, DAG)) return Sh; // For SSE 4.1, use insertps to put the high elements into the low element. @@ -39519,12 +39518,12 @@ static SmallVector getPSHUFShuffleMask(SDValue N) { /// We walk up the chain and look for a combinable shuffle, skipping over /// shuffles that we could hoist this shuffle's transformation past without /// altering anything. -static SDValue -combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, - SelectionDAG &DAG) { +static SDValue combineRedundantDWordShuffle(SDValue N, + MutableArrayRef Mask, + const SDLoc &DL, + SelectionDAG &DAG) { assert(N.getOpcode() == X86ISD::PSHUFD && "Called with something other than an x86 128-bit half shuffle!"); - SDLoc DL(N); // Walk up a single-use chain looking for a combinable shuffle. Keep a stack // of the shuffles in the chain so that we can form a fresh chain to replace @@ -39922,10 +39921,10 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, } /// Try to combine x86 target specific shuffles. -static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, +static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, + SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - SDLoc DL(N); MVT VT = N.getSimpleValueType(); SmallVector Mask; unsigned Opcode = N.getOpcode(); @@ -40653,7 +40652,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, break; case X86ISD::PSHUFD: - if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG)) + if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DL, DAG)) return NewN; break; @@ -40762,7 +40761,7 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, } /// Combine shuffle of two fma nodes into FMAddSub or FMSubAdd. -static SDValue combineShuffleToFMAddSub(SDNode *N, +static SDValue combineShuffleToFMAddSub(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // We only handle target-independent shuffles. @@ -40796,7 +40795,6 @@ static SDValue combineShuffleToFMAddSub(SDNode *N, return SDValue(); // FMAddSub takes zeroth operand from FMSub node. - SDLoc DL(N); bool IsSubAdd = Op0Even ? Op0 == FMAdd : Op1 == FMAdd; unsigned Opcode = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1), @@ -40805,10 +40803,10 @@ static SDValue combineShuffleToFMAddSub(SDNode *N, /// Try to combine a shuffle into a target-specific add-sub or /// mul-add-sub node. -static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, +static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - if (SDValue V = combineShuffleToFMAddSub(N, Subtarget, DAG)) + if (SDValue V = combineShuffleToFMAddSub(N, DL, Subtarget, DAG)) return V; SDValue Opnd0, Opnd1; @@ -40817,7 +40815,6 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, return SDValue(); MVT VT = N->getSimpleValueType(0); - SDLoc DL(N); // Try to generate X86ISD::FMADDSUB node here. SDValue Opnd2; @@ -40847,7 +40844,8 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, // We are looking for a shuffle where both sources are concatenated with undef // and have a width that is half of the output's width. AVX2 has VPERMD/Q, so // if we can express this as a single-source shuffle, that's preferable. -static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, +static SDValue combineShuffleOfConcatUndef(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget) { if (!Subtarget.hasAVX2() || !isa(N)) return SDValue(); @@ -40879,11 +40877,10 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, SmallVector Mask; int NumElts = VT.getVectorNumElements(); - ShuffleVectorSDNode *SVOp = cast(N); + auto *SVOp = cast(N); for (int Elt : SVOp->getMask()) Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2)); - SDLoc DL(N); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0), N1.getOperand(0)); return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask); @@ -40935,7 +40932,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isTypeLegal(VT) && !isSoftF16(VT, Subtarget)) - if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) + if (SDValue AddSub = + combineShuffleToAddSubOrFMAddSub(N, dl, Subtarget, DAG)) return AddSub; // Attempt to combine into a vector load/broadcast. @@ -40949,12 +40947,12 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // Into: // (vector_shuffle (concat_vectors t1, t2), undef) // Since the latter can be efficiently lowered with VPERMD/VPERMQ - if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget)) + if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, dl, DAG, Subtarget)) return ShufConcat; if (isTargetShuffle(N->getOpcode())) { SDValue Op(N, 0); - if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget)) + if (SDValue Shuffle = combineTargetShuffle(Op, dl, DAG, DCI, Subtarget)) return Shuffle; // Try recursively combining arbitrary sequences of x86 shuffle From fd191378dce6b20c100da716f94130af2593df37 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 15 Feb 2024 12:17:45 +0100 Subject: [PATCH 238/240] [llvm-dlltool] Add ARM64EC target support. (#81624) Add new target and a new -n option allowing to specify native module definition file, similar to how -defArm64Native works in llvm-lib. This also changes archive format to use K_COFF like non-mingw targets. It's required on ARM64EC, but it should be fine for other targets too. --- llvm/lib/Object/COFFImportFile.cpp | 2 +- .../llvm-dlltool/DlltoolDriver.cpp | 22 +++++++-- llvm/lib/ToolDrivers/llvm-dlltool/Options.td | 3 ++ llvm/test/tools/llvm-dlltool/arm64ec.test | 46 +++++++++++++++++++ 4 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 llvm/test/tools/llvm-dlltool/arm64ec.test diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp index f6f6cf2a1602cf..376dd126baf61a 100644 --- a/llvm/lib/Object/COFFImportFile.cpp +++ b/llvm/lib/Object/COFFImportFile.cpp @@ -712,7 +712,7 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path, return e; return writeArchive(Path, Members, SymtabWritingMode::NormalSymtab, - MinGW ? object::Archive::K_GNU : object::Archive::K_COFF, + object::Archive::K_COFF, /*Deterministic*/ true, /*Thin*/ false, /*OldArchiveBuf*/ nullptr, isArm64EC(Machine)); } diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index 98795c51ce1336..fa716a281a69fc 100644 --- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -75,6 +75,7 @@ MachineTypes getEmulation(StringRef S) { .Case("i386:x86-64", IMAGE_FILE_MACHINE_AMD64) .Case("arm", IMAGE_FILE_MACHINE_ARMNT) .Case("arm64", IMAGE_FILE_MACHINE_ARM64) + .Case("arm64ec", IMAGE_FILE_MACHINE_ARM64EC) .Default(IMAGE_FILE_MACHINE_UNKNOWN); } @@ -87,7 +88,8 @@ MachineTypes getMachine(Triple T) { case Triple::arm: return COFF::IMAGE_FILE_MACHINE_ARMNT; case Triple::aarch64: - return COFF::IMAGE_FILE_MACHINE_ARM64; + return T.isWindowsArm64EC() ? COFF::IMAGE_FILE_MACHINE_ARM64EC + : COFF::IMAGE_FILE_MACHINE_ARM64; default: return COFF::IMAGE_FILE_MACHINE_UNKNOWN; } @@ -168,7 +170,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { (!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) { Table.printHelp(outs(), "llvm-dlltool [options] file...", "llvm-dlltool", false); - llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm, arm64\n"; + llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm, arm64, arm64ec\n"; return 1; } @@ -201,7 +203,19 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { if (auto *Arg = Args.getLastArg(OPT_D)) OutputFile = Arg->getValue(); - std::vector Exports; + std::vector Exports, NativeExports; + + if (Args.hasArg(OPT_n)) { + if (!isArm64EC(Machine)) { + llvm::errs() << "native .def file is supported only on arm64ec target\n"; + return 1; + } + if (!parseModuleDefinition(Args.getLastArg(OPT_n)->getValue(), + IMAGE_FILE_MACHINE_ARM64, AddUnderscores, + NativeExports, OutputFile)) + return 1; + } + if (!parseModuleDefinition(Args.getLastArg(OPT_d)->getValue(), Machine, AddUnderscores, Exports, OutputFile)) return 1; @@ -230,7 +244,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { std::string Path = std::string(Args.getLastArgValue(OPT_l)); if (!Path.empty() && writeImportLibrary(OutputFile, Path, Exports, Machine, - /*MinGW=*/true)) + /*MinGW=*/true, NativeExports)) return 1; return 0; } diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/Options.td b/llvm/lib/ToolDrivers/llvm-dlltool/Options.td index fee408fd0e9a80..ba94aed067e6ad 100644 --- a/llvm/lib/ToolDrivers/llvm-dlltool/Options.td +++ b/llvm/lib/ToolDrivers/llvm-dlltool/Options.td @@ -12,6 +12,9 @@ def D_long : JoinedOrSeparate<["--"], "dllname">, Alias; def d: JoinedOrSeparate<["-"], "d">, HelpText<"Input .def File">; def d_long : JoinedOrSeparate<["--"], "input-def">, Alias; +def n: JoinedOrSeparate<["-"], "n">, HelpText<"Input native .def File on ARM64EC">; +def n_long : JoinedOrSeparate<["--"], "input-native-def">, Alias; + def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">; def k_alias: Flag<["--"], "kill-at">, Alias; diff --git a/llvm/test/tools/llvm-dlltool/arm64ec.test b/llvm/test/tools/llvm-dlltool/arm64ec.test new file mode 100644 index 00000000000000..77cef16a5fb193 --- /dev/null +++ b/llvm/test/tools/llvm-dlltool/arm64ec.test @@ -0,0 +1,46 @@ +Test creating ARM64EC importlib. + +RUN: split-file %s %t.dir && cd %t.dir + +RUN: llvm-dlltool -m arm64ec -d test.def -l test.lib +RUN: llvm-nm --print-armap test.lib | FileCheck --check-prefix=ARMAP %s + +ARMAP: Archive map +ARMAP-NEXT: __IMPORT_DESCRIPTOR_test in test.dll +ARMAP-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll +ARMAP-NEXT: test_NULL_THUNK_DATA in test.dll +ARMAP-EMPTY: +ARMAP-NEXT: Archive EC map +ARMAP-NEXT: #func in test.dll +ARMAP-NEXT: __imp_aux_func in test.dll +ARMAP-NEXT: __imp_func in test.dll +ARMAP-NEXT: func in test.dll + +RUN: llvm-dlltool -m arm64ec -d test.def -n test2.def -l test2.lib +RUN: llvm-nm --print-armap test2.lib | FileCheck --check-prefix=ARMAP2 %s + +ARMAP2: Archive map +ARMAP2-NEXT: __IMPORT_DESCRIPTOR_test in test.dll +ARMAP2-NEXT: __NULL_IMPORT_DESCRIPTOR in test.dll +ARMAP2-NEXT: __imp_otherfunc in test.dll +ARMAP2-NEXT: otherfunc in test.dll +ARMAP2-NEXT: test_NULL_THUNK_DATA in test.dll +ARMAP2-EMPTY: +ARMAP2-NEXT: Archive EC map +ARMAP2-NEXT: #func in test.dll +ARMAP2-NEXT: __imp_aux_func in test.dll +ARMAP2-NEXT: __imp_func in test.dll +ARMAP2-NEXT: func in test.dll + +RUN: not llvm-dlltool -m arm64 -d test.def -n test2.def -l test2.lib 2>&1 | FileCheck --check-prefix=ERR %s +ERR: native .def file is supported only on arm64ec target + +#--- test.def +LIBRARY test.dll +EXPORTS + func + +#--- test2.def +LIBRARY test.dll +EXPORTS + otherfunc From 3fe5a0cfa5391db18fcc226f3f783642d3f44503 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 15 Feb 2024 18:14:04 +0900 Subject: [PATCH 239/240] MCDCCoverageBuilder: Use `pop_back_val()` --- clang/lib/CodeGen/CoverageMappingGen.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index fdf821a0eb6928..eb3beec23808e6 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -770,12 +770,10 @@ struct MCDCCoverageBuilder { /// Pop and return the LHS Decision ([0,0] if not set). mcdc::ConditionIDs pop() { if (!CGM.getCodeGenOpts().MCDCCoverage || NotMapped) - return DecisionStack.front(); + return DecisionStackSentinel; assert(DecisionStack.size() > 1); - mcdc::ConditionIDs D = DecisionStack.back(); - DecisionStack.pop_back(); - return D; + return DecisionStack.pop_back_val(); } /// Return the total number of conditions and reset the state. The number of From 75f0d40507ea3f7c99dd3250ff0fbe6dab341910 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 15 Feb 2024 18:24:57 +0900 Subject: [PATCH 240/240] CoverageMapping: Move `getParams(MCDCParams)` into `mcdc::` Fixup for #81227 --- clang/lib/CodeGen/CoverageMappingGen.cpp | 3 +-- .../llvm/ProfileData/Coverage/CoverageMapping.h | 14 ++------------ .../include/llvm/ProfileData/Coverage/MCDCTypes.h | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index eb3beec23808e6..c10d85ea89ee61 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -190,8 +190,7 @@ class SourceMappingRegion { } const auto &getMCDCDecisionParams() const { - return CounterMappingRegion::getParams( - MCDCParams); + return mcdc::getParams(MCDCParams); } const mcdc::Parameters &getMCDCParams() const { return MCDCParams; } diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index e3b394287f3352..c5c9740f25c2ce 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -258,22 +258,12 @@ struct CounterMappingRegion { /// Parameters used for Modified Condition/Decision Coverage mcdc::Parameters MCDCParams; - template - static auto &getParams(MaybeConstMCDCParameters &MCDCParams) { - using InnerParameters = - typename std::remove_const::type; - MaybeConstInnerParameters *Params = - std::get_if(&MCDCParams); - assert(Params && "InnerParameters unavailable"); - return *Params; - } - const auto &getDecisionParams() const { - return getParams(MCDCParams); + return mcdc::getParams(MCDCParams); } const auto &getBranchParams() const { - return getParams(MCDCParams); + return mcdc::getParams(MCDCParams); } unsigned FileID = 0; diff --git a/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h b/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h index 51f528b7e78804..8c78bed4dec52f 100644 --- a/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h +++ b/llvm/include/llvm/ProfileData/Coverage/MCDCTypes.h @@ -14,6 +14,8 @@ #define LLVM_PROFILEDATA_COVERAGE_MCDCTYPES_H #include +#include +#include #include namespace llvm::coverage::mcdc { @@ -49,6 +51,19 @@ struct BranchParameters { using Parameters = std::variant; +/// Check and get underlying params in MCDCParams. +/// \tparam MaybeConstInnerParameters Type to get. May be const. +/// \tparam MaybeConstMCDCParameters Expected inferred. May be const. +/// \param MCDCParams May be const. +template +static auto &getParams(MaybeConstMCDCParameters &MCDCParams) { + using InnerParameters = + typename std::remove_const::type; + MaybeConstInnerParameters *Params = std::get_if(&MCDCParams); + assert(Params && "InnerParameters unavailable"); + return *Params; +} + } // namespace llvm::coverage::mcdc #endif // LLVM_PROFILEDATA_COVERAGE_MCDCTYPES_H
Bug Group Bug Type ▾