diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 293292d47dd48..d421357e80927 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29774,10 +29774,8 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( } unsigned EltSize = VT.getScalarSizeInBits(); - for (unsigned LaneSize : {64U, 32U, 16U}) { - if (isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), LaneSize)) { - EVT NewVT = - getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize)); + for (unsigned BlockSize : {64U, 32U, 16U}) { + if (isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), BlockSize)) { unsigned RevOp; if (EltSize == 8) RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU; @@ -29785,24 +29783,24 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( RevOp = AArch64ISD::REVH_MERGE_PASSTHRU; else RevOp = AArch64ISD::REVW_MERGE_PASSTHRU; - - Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1); - Op = LowerToPredicatedOp(Op, DAG, RevOp); - Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op); - return convertFromScalableVector(DAG, VT, Op); + EVT BlockedVT = + getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), BlockSize)); + SDValue Pg = getPredicateForVector(DAG, DL, BlockedVT); + SDValue BlockedOp1 = DAG.getNode(ISD::BITCAST, DL, BlockedVT, Op1); + SDValue BlockedRev = DAG.getNode(RevOp, DL, BlockedVT, Pg, BlockedOp1, + DAG.getUNDEF(BlockedVT)); + SDValue Container = + DAG.getNode(ISD::BITCAST, DL, ContainerVT, BlockedRev); + return convertFromScalableVector(DAG, VT, Container); } } if (Subtarget->hasSVE2p1() && EltSize == 64 && isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), 128)) { - if (!VT.isFloatingPoint()) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU); - - EVT NewVT = getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), 64)); - Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1); - Op = LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU); - Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op); - return convertFromScalableVector(DAG, VT, Op); + SDValue Pg = getPredicateForVector(DAG, DL, VT); + SDValue Revd = DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, DL, ContainerVT, + Pg, Op1, DAG.getUNDEF(ContainerVT)); + return convertFromScalableVector(DAG, VT, Revd); } unsigned WhichResult; diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll index 0cda4d94444e9..42f9bec94721e 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll @@ -227,9 +227,8 @@ define void @test_revdv4f64_sve2p1(ptr %a) #2 { ; CHECK-LABEL: test_revdv4f64_sve2p1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: revd z0.q, p1/m, z0.q +; CHECK-NEXT: revd z0.q, p0/m, z0.q ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll index c364abf2916e8..890bc721128ff 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll @@ -701,7 +701,7 @@ define void @test_revdv4f64_sve2p1(ptr %a) #1 { ; CHECK-LABEL: test_revdv4f64_sve2p1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: revd z0.q, p0/m, z0.q ; CHECK-NEXT: revd z1.q, p0/m, z1.q ; CHECK-NEXT: stp q0, q1, [x0] @@ -710,7 +710,7 @@ define void @test_revdv4f64_sve2p1(ptr %a) #1 { ; NONEON-NOSVE-LABEL: test_revdv4f64_sve2p1: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] -; NONEON-NOSVE-NEXT: ptrue p0.d +; NONEON-NOSVE-NEXT: ptrue p0.d, vl2 ; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q ; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]