Skip to content

Commit

Permalink
[AArch64][SVE] Add instcombine for PTEST_ANY(X=OP(PG,...), X) -> PTES…
Browse files Browse the repository at this point in the history
…T_ANY(PG, X))

Given this is an OR reduction the two are equivalent and later
optimizations (AArch64InstrInfo::optimizePTestInstr) may rewrite the
sequence to use the flag-setting variant of instruction X, to remove the
PTEST altogether.

Reviewed By: paulwalker-arm, bsmith

Differential Revision: https://reviews.llvm.org/D134946
  • Loading branch information
c-rhodes committed Oct 12, 2022
1 parent 5b5756e commit 388cacb
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 6 deletions.
29 changes: 23 additions & 6 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -966,14 +966,15 @@ static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(II.getArgOperand(1));

if (Op1 && Op2 &&
Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
if (!Op1 || !Op2)
return None;

IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);
IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);

if (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
Type *Tys[] = {Op1->getArgOperand(0)->getType()};

Expand All @@ -983,6 +984,22 @@ static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
return IC.replaceInstUsesWith(II, PTest);
}

// Transform PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)).
// Later optimizations may rewrite sequence to use the flag-setting variant
// of instruction X to remove PTEST.
if ((Op1 == Op2) &&
(II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
((Op1->getIntrinsicID() == Intrinsic::aarch64_sve_brkb_z) ||
(Op1->getIntrinsicID() == Intrinsic::aarch64_sve_rdffr_z))) {
Value *Ops[] = {Op1->getArgOperand(0), Op1};
Type *Tys[] = {Op1->getType()};

auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops);
PTest->takeName(&II);

return IC.replaceInstUsesWith(II, PTest);
}

return None;
}

Expand Down
26 changes: 26 additions & 0 deletions llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,29 @@ define i1 @ptest_any2(<vscale x 4 x i1> %a) #0 {
ret i1 %out
}

; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)).
define i1 @ptest_any_brkb_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; CHECK-LABEL: @ptest_any_brkb_z(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]])
; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT: ret i1 [[OUT]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
%out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
ret i1 %out
}

define i1 @ptest_any_rdffr_z(<vscale x 16 x i1> %pg) {
; CHECK-LABEL: @ptest_any_rdffr_z(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[PG:%.*]])
; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT: ret i1 [[OUT]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg)
%out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
ret i1 %out
}

define i1 @ptest_first(<vscale x 4 x i1> %a) #0 {
; CHECK-LABEL: @ptest_first(
; CHECK-NEXT: [[MASK:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 0)
Expand Down Expand Up @@ -81,4 +104,7 @@ declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)

declare <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1>)

attributes #0 = { "target-features"="+sve" }

0 comments on commit 388cacb

Please sign in to comment.