Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c9014d3

Browse files
authored
[PreISelIntrinsicLowering] Use index type for index in intrinsic expansion (#193807)
We'd chosen intptr type for the binary in review, but on reflection the index type is probably a conceptually better fit. On riscv, these are going to be the same, so it's purely a conceptual issue. For the unary case, this is an actual change since we were using i64 unconditionally. This improves codegen for RV32 by avoiding the need for expensive legalization of i64 expressions for the IV.
1 parent b96263c commit c9014d3

2 files changed

Lines changed: 126 additions & 6 deletions

File tree

llvm/lib/Transforms/Utils/LowerVectorIntrinsics.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
2323
BasicBlock *PostLoopBB = nullptr;
2424
Function *ParentFunc = PreLoopBB->getParent();
2525
LLVMContext &Ctx = PreLoopBB->getContext();
26-
Type *Int64Ty = IntegerType::get(Ctx, 64);
26+
Type *IdxTy = M.getDataLayout().getIndexType(Ctx, 0);
2727

2828
PostLoopBB = PreLoopBB->splitBasicBlock(CI);
2929
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
@@ -32,13 +32,13 @@ bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
3232
// Loop preheader
3333
IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
3434
Value *LoopEnd =
35-
PreLoopBuilder.CreateElementCount(Int64Ty, VecTy->getElementCount());
35+
PreLoopBuilder.CreateElementCount(IdxTy, VecTy->getElementCount());
3636

3737
// Loop body
3838
IRBuilder<> LoopBuilder(LoopBB);
3939

40-
PHINode *LoopIndex = LoopBuilder.CreatePHI(Int64Ty, 2);
41-
LoopIndex->addIncoming(ConstantInt::get(Int64Ty, 0U), PreLoopBB);
40+
PHINode *LoopIndex = LoopBuilder.CreatePHI(IdxTy, 2);
41+
LoopIndex->addIncoming(ConstantInt::get(IdxTy, 0U), PreLoopBB);
4242
PHINode *Vec = LoopBuilder.CreatePHI(VecTy, 2);
4343
Vec->addIncoming(CI->getArgOperand(0), PreLoopBB);
4444

@@ -49,7 +49,7 @@ bool llvm::lowerUnaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
4949
Value *NewVec = LoopBuilder.CreateInsertElement(Vec, Res, LoopIndex);
5050
Vec->addIncoming(NewVec, LoopBB);
5151

52-
Value *One = ConstantInt::get(Int64Ty, 1U);
52+
Value *One = ConstantInt::get(IdxTy, 1U);
5353
Value *NextLoopIndex = LoopBuilder.CreateAdd(LoopIndex, One);
5454
LoopIndex->addIncoming(NextLoopIndex, LoopBB);
5555

@@ -71,7 +71,7 @@ bool llvm::lowerBinaryVectorIntrinsicAsLoop(Module &M, CallInst *CI) {
7171
BasicBlock *PostLoopBB = nullptr;
7272
Function *ParentFunc = PreLoopBB->getParent();
7373
LLVMContext &Ctx = PreLoopBB->getContext();
74-
Type *IdxTy = M.getDataLayout().getIntPtrType(Ctx);
74+
Type *IdxTy = M.getDataLayout().getIndexType(Ctx, 0);
7575

7676
PostLoopBB = PreLoopBB->splitBasicBlock(CI);
7777
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "", ParentFunc, PostLoopBB);
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=pre-isel-intrinsic-lowering -mtriple=riscv32 -S < %s | FileCheck %s --check-prefix=RV32
3+
; RUN: opt -passes=pre-isel-intrinsic-lowering -mtriple=riscv64 -S < %s | FileCheck %s --check-prefix=RV64
4+
5+
define <vscale x 4 x float> @scalable_vec_sin(<vscale x 4 x float> %input) {
6+
; RV32-LABEL: define <vscale x 4 x float> @scalable_vec_sin(
7+
; RV32-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
8+
; RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
9+
; RV32-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 4
10+
; RV32-NEXT: br label %[[BB3:.*]]
11+
; RV32: [[BB3]]:
12+
; RV32-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
13+
; RV32-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
14+
; RV32-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i32 [[TMP4]]
15+
; RV32-NEXT: [[TMP7:%.*]] = call float @llvm.sin.f32(float [[TMP6]])
16+
; RV32-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i32 [[TMP4]]
17+
; RV32-NEXT: [[TMP9]] = add i32 [[TMP4]], 1
18+
; RV32-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], [[TMP2]]
19+
; RV32-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
20+
; RV32: [[BB11]]:
21+
; RV32-NEXT: ret <vscale x 4 x float> [[TMP8]]
22+
;
23+
; RV64-LABEL: define <vscale x 4 x float> @scalable_vec_sin(
24+
; RV64-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
25+
; RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
26+
; RV64-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
27+
; RV64-NEXT: br label %[[BB3:.*]]
28+
; RV64: [[BB3]]:
29+
; RV64-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
30+
; RV64-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
31+
; RV64-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
32+
; RV64-NEXT: [[TMP7:%.*]] = call float @llvm.sin.f32(float [[TMP6]])
33+
; RV64-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
34+
; RV64-NEXT: [[TMP9]] = add i64 [[TMP4]], 1
35+
; RV64-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
36+
; RV64-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
37+
; RV64: [[BB11]]:
38+
; RV64-NEXT: ret <vscale x 4 x float> [[TMP8]]
39+
;
40+
%output = call <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %input)
41+
ret <vscale x 4 x float> %output
42+
}
43+
44+
define <vscale x 4 x float> @scalable_vec_exp(<vscale x 4 x float> %input) {
45+
; RV32-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
46+
; RV32-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
47+
; RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
48+
; RV32-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 4
49+
; RV32-NEXT: br label %[[BB3:.*]]
50+
; RV32: [[BB3]]:
51+
; RV32-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
52+
; RV32-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
53+
; RV32-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i32 [[TMP4]]
54+
; RV32-NEXT: [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]])
55+
; RV32-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i32 [[TMP4]]
56+
; RV32-NEXT: [[TMP9]] = add i32 [[TMP4]], 1
57+
; RV32-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], [[TMP2]]
58+
; RV32-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
59+
; RV32: [[BB11]]:
60+
; RV32-NEXT: ret <vscale x 4 x float> [[TMP8]]
61+
;
62+
; RV64-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
63+
; RV64-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
64+
; RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
65+
; RV64-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
66+
; RV64-NEXT: br label %[[BB3:.*]]
67+
; RV64: [[BB3]]:
68+
; RV64-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
69+
; RV64-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
70+
; RV64-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
71+
; RV64-NEXT: [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]])
72+
; RV64-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
73+
; RV64-NEXT: [[TMP9]] = add i64 [[TMP4]], 1
74+
; RV64-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
75+
; RV64-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
76+
; RV64: [[BB11]]:
77+
; RV64-NEXT: ret <vscale x 4 x float> [[TMP8]]
78+
;
79+
%output = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %input)
80+
ret <vscale x 4 x float> %output
81+
}
82+
83+
define <vscale x 4 x float> @scalable_vec_log(<vscale x 4 x float> %input) {
84+
; RV32-LABEL: define <vscale x 4 x float> @scalable_vec_log(
85+
; RV32-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
86+
; RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
87+
; RV32-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 4
88+
; RV32-NEXT: br label %[[BB3:.*]]
89+
; RV32: [[BB3]]:
90+
; RV32-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
91+
; RV32-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
92+
; RV32-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i32 [[TMP4]]
93+
; RV32-NEXT: [[TMP7:%.*]] = call float @llvm.log.f32(float [[TMP6]])
94+
; RV32-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i32 [[TMP4]]
95+
; RV32-NEXT: [[TMP9]] = add i32 [[TMP4]], 1
96+
; RV32-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], [[TMP2]]
97+
; RV32-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
98+
; RV32: [[BB11]]:
99+
; RV32-NEXT: ret <vscale x 4 x float> [[TMP8]]
100+
;
101+
; RV64-LABEL: define <vscale x 4 x float> @scalable_vec_log(
102+
; RV64-SAME: <vscale x 4 x float> [[INPUT:%.*]]) {
103+
; RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
104+
; RV64-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
105+
; RV64-NEXT: br label %[[BB3:.*]]
106+
; RV64: [[BB3]]:
107+
; RV64-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ]
108+
; RV64-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP8:%.*]], %[[BB3]] ]
109+
; RV64-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]]
110+
; RV64-NEXT: [[TMP7:%.*]] = call float @llvm.log.f32(float [[TMP6]])
111+
; RV64-NEXT: [[TMP8]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]]
112+
; RV64-NEXT: [[TMP9]] = add i64 [[TMP4]], 1
113+
; RV64-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]]
114+
; RV64-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]]
115+
; RV64: [[BB11]]:
116+
; RV64-NEXT: ret <vscale x 4 x float> [[TMP8]]
117+
;
118+
%output = call <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %input)
119+
ret <vscale x 4 x float> %output
120+
}

0 commit comments

Comments
 (0)