[PreISelIntrinsicLowering] Use index type for index in intrinsic expansion (#193807)

preames · web-flow · commit c9014d34522b · 2026-04-23T12:36:41.000-07:00
We'd chosen intptr type for the binary in review, but on reflection the
index type is probably a conceptually better fit. On riscv, these are
going to be the same, so it's purely a conceptual issue.

For the unary case, this is an actual change since we were using i64
unconditionally. This improves codegen for RV32 by avoiding the need for expensive legalization of i64 expressions for the IV.
diff --git a/llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp
@@ -23,7 +23,7 @@ bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
   BasicBlock *PostLoopBB = nullptr;
   Function *ParentFunc = PreLoopBB->getParent();
   LLVMContext &Ctx = PreLoopBB->getContext();
-  Type *Int64Ty = IntegerType::get(Ctx, 64);
+  Type *IdxTy = M.getDataLayout().getIndexType(Ctx, 0);
 
   PostLoopBB = PreLoopBB->splitBasicBlock(CI);
   BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
@@ -32,13 +32,13 @@ bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
   // Loop preheader
   IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
   Value *LoopEnd =
-      PreLoopBuilder.CreateElementCount(Int64Ty, VecTy->getElementCount());
+      PreLoopBuilder.CreateElementCount(IdxTy, VecTy->getElementCount());
 
   // Loop body
   IRBuilder<> LoopBuilder(LoopBB);
 
-  PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
-  LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
+  PHINode *LoopIndex = LoopBuilder.CreatePHI(IdxTy, 2);
+  LoopIndex->addIncoming(ConstantInt::get(IdxTy, 0U), PreLoopBB);
   PHINode *Vec = LoopBuilder.CreatePHI(VecTy, 2);
   Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);
 
@@ -49,7 +49,7 @@ bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
   Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
   Vec->addIncoming(NewVec, LoopBB);
 
-  Value *One = ConstantInt::get(Int64Ty, 1U);
+  Value *One = ConstantInt::get(IdxTy, 1U);
   Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One);
   LoopIndex->addIncoming(NextLoopIndex, LoopBB);
 
@@ -71,7 +71,7 @@ bool llvm::lowerBinaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
   BasicBlock *PostLoopBB = nullptr;
   Function *ParentFunc = PreLoopBB->getParent();
   LLVMContext &Ctx = PreLoopBB->getContext();
-  Type *IdxTy = M.getDataLayout().getIntPtrType(Ctx);
+  Type *IdxTy = M.getDataLayout().getIndexType(Ctx, 0);
 
   PostLoopBB = PreLoopBB->splitBasicBlock(CI);
   BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/RISCV/expand-fp-math.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/RISCV/expand-fp-math.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=pre-isel-intrinsic-lowering -mtriple=riscv32 -S < %s | FileCheck %s --check-prefix=RV32
+; RUN: opt -passes=pre-isel-intrinsic-lowering -mtriple=riscv64 -S < %s | FileCheck %s --check-prefix=RV64
+
+define <vscale x 4 x float> @scalable_vec_sin(<vscale x 4 x float> %input) {
+; RV32-LABEL: define <vscale x 4 x float> @scalable_vec_sin(
+; RV32-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
+; RV32-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
+; RV32-NEXT:    [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 4
+; RV32-NEXT:    br label %[[BB3:.*]]
+; RV32:       [[BB3]]:
+; RV32-NEXT:    [[TMP4:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; RV32-NEXT:    [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
+; RV32-NEXT:    [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i32 [[TMP4]]
+; RV32-NEXT:    [[TMP7:%.*]] = call float @llvm.sin.f32(float [[TMP6]])
+; RV32-NEXT:    [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i32 [[TMP4]]
+; RV32-NEXT:    [[TMP9]] = add i32 [[TMP4]], 1
+; RV32-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], [[TMP2]]
+; RV32-NEXT:    br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
+; RV32:       [[BB11]]:
+; RV32-NEXT:    ret <vscale x 4 x float> [[TMP8]]
+;
+; RV64-LABEL: define <vscale x 4 x float> @scalable_vec_sin(
+; RV64-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
+; RV64-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; RV64-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; RV64-NEXT:    br label %[[BB3:.*]]
+; RV64:       [[BB3]]:
+; RV64-NEXT:    [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; RV64-NEXT:    [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
+; RV64-NEXT:    [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
+; RV64-NEXT:    [[TMP7:%.*]] = call float @llvm.sin.f32(float [[TMP6]])
+; RV64-NEXT:    [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
+; RV64-NEXT:    [[TMP9]] = add i64 [[TMP4]], 1
+; RV64-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
+; RV64-NEXT:    br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
+; RV64:       [[BB11]]:
+; RV64-NEXT:    ret <vscale x 4 x float> [[TMP8]]
+;
+  %output = call <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %input)
+  ret <vscale x 4 x float> %output
+}
+
+define <vscale x 4 x float> @scalable_vec_exp(<vscale x 4 x float> %input) {
+; RV32-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
+; RV32-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
+; RV32-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
+; RV32-NEXT:    [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 4
+; RV32-NEXT:    br label %[[BB3:.*]]
+; RV32:       [[BB3]]:
+; RV32-NEXT:    [[TMP4:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; RV32-NEXT:    [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
+; RV32-NEXT:    [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i32 [[TMP4]]
+; RV32-NEXT:    [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]])
+; RV32-NEXT:    [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i32 [[TMP4]]
+; RV32-NEXT:    [[TMP9]] = add i32 [[TMP4]], 1
+; RV32-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], [[TMP2]]
+; RV32-NEXT:    br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
+; RV32:       [[BB11]]:
+; RV32-NEXT:    ret <vscale x 4 x float> [[TMP8]]
+;
+; RV64-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
+; RV64-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
+; RV64-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; RV64-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; RV64-NEXT:    br label %[[BB3:.*]]
+; RV64:       [[BB3]]:
+; RV64-NEXT:    [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; RV64-NEXT:    [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
+; RV64-NEXT:    [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
+; RV64-NEXT:    [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]])
+; RV64-NEXT:    [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
+; RV64-NEXT:    [[TMP9]] = add i64 [[TMP4]], 1
+; RV64-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
+; RV64-NEXT:    br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
+; RV64:       [[BB11]]:
+; RV64-NEXT:    ret <vscale x 4 x float> [[TMP8]]
+;
+  %output = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %input)
+  ret <vscale x 4 x float> %output
+}
+
+define <vscale x 4 x float> @scalable_vec_log(<vscale x 4 x float> %input) {
+; RV32-LABEL: define <vscale x 4 x float> @scalable_vec_log(
+; RV32-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
+; RV32-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
+; RV32-NEXT:    [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 4
+; RV32-NEXT:    br label %[[BB3:.*]]
+; RV32:       [[BB3]]:
+; RV32-NEXT:    [[TMP4:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; RV32-NEXT:    [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
+; RV32-NEXT:    [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i32 [[TMP4]]
+; RV32-NEXT:    [[TMP7:%.*]] = call float @llvm.log.f32(float [[TMP6]])
+; RV32-NEXT:    [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i32 [[TMP4]]
+; RV32-NEXT:    [[TMP9]] = add i32 [[TMP4]], 1
+; RV32-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], [[TMP2]]
+; RV32-NEXT:    br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
+; RV32:       [[BB11]]:
+; RV32-NEXT:    ret <vscale x 4 x float> [[TMP8]]
+;
+; RV64-LABEL: define <vscale x 4 x float> @scalable_vec_log(
+; RV64-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
+; RV64-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; RV64-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; RV64-NEXT:    br label %[[BB3:.*]]
+; RV64:       [[BB3]]:
+; RV64-NEXT:    [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
+; RV64-NEXT:    [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
+; RV64-NEXT:    [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
+; RV64-NEXT:    [[TMP7:%.*]] = call float @llvm.log.f32(float [[TMP6]])
+; RV64-NEXT:    [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
+; RV64-NEXT:    [[TMP9]] = add i64 [[TMP4]], 1
+; RV64-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
+; RV64-NEXT:    br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
+; RV64:       [[BB11]]:
+; RV64-NEXT:    ret <vscale x 4 x float> [[TMP8]]
+;
+  %output = call <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %input)
+  ret <vscale x 4 x float> %output
+}