-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[SelectionDAG] Split vector types for atomic load #120640
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/spr/main/45989503
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: None (jofrn) Changes
Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120640.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3b3dddc44e3682..e0cd7319ac034b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -946,6 +946,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7c4caa96244b8b..44adc3fdb4a5a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1146,6 +1146,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_STEP_VECTOR(N, Lo, Hi);
break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD:
+ SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi);
+ break;
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
@@ -2079,6 +2082,38 @@ void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi);
}
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT MemoryVT = LD->getMemoryVT();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ Lo = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, LoMemVT, LoMemVT, Ch, Ptr,
+ LD->getMemOperand());
+
+ MachinePointerInfo MPI;
+ IncrementPointer(LD, LoMemVT, MPI, Ptr);
+
+ Hi = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, HiMemVT, HiMemVT, Ch, Ptr,
+ LD->getMemOperand());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue &Hi) {
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index ba1bc4d98537d1..302a94aa9c1f60 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -176,6 +176,62 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) nounwind {
ret <2 x float> %ret
}
+define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movzwl (%rdi), %eax
+; CHECK3-NEXT: movzwl 2(%rdi), %ecx
+; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
+; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %dx
+; CHECK0-NEXT: movw 2(%rdi), %cx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %dx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm0
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm1
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
+; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x half>, ptr %x acquire, align 4
+ ret <2 x half> %ret
+}
+
+define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_bfloat:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movzwl (%rdi), %eax
+; CHECK3-NEXT: movzwl 2(%rdi), %ecx
+; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
+; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_bfloat:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %cx
+; CHECK0-NEXT: movw 2(%rdi), %dx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %dx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm1
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm0
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
+ ret <2 x bfloat> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK3-LABEL: atomic_vec1_ptr:
; CHECK3: ## %bb.0:
|
8ed9199
to
c9bdb95
Compare
b2f0b33
to
6737dda
Compare
c9bdb95
to
94a71a3
Compare
6737dda
to
2949391
Compare
94a71a3
to
34df4f7
Compare
34df4f7
to
761d4d9
Compare
2949391
to
78adf01
Compare
6506acb
to
db674f8
Compare
db674f8
to
13ea377
Compare
23c9ff2
to
2c51f72
Compare
13ea377
to
e11194d
Compare
2c51f72
to
3a82883
Compare
3a82883
to
36161df
Compare
e11194d
to
3be4fa0
Compare
4df72a2
to
bc6c355
Compare
@@ -1421,6 +1424,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { | |||
SetSplitVector(SDValue(N, ResNo), Lo, Hi); | |||
} | |||
|
|||
void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is missing the Lo / Hi out arguments, like all of the other SplitVecRes cases. You should still be trying to respect the result of DAG.GetSplitDestVTs. With this you are bypassing SetSplitVector
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bump
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had tried doing it by splitting Lo and Hi. I ran into an issue though because TokenFactor (what we join our values on) requires glue operands; however, EXTRACT_SUBVECTOR does not have these, nor does DAG.getAtomicLoad.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TokenFactor does not require glue operands. The most common use case for TokenFactor is pairing a modified real value with a chain
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
EXTRACT_SUBVECTOR does not have chain operands, nor does DAG.getAtomicLoad. I checked a few others too, but do we not need to use a TokenFactor if you are saying we do not have to associate Lo and Hi?
LD->getChain(), LD->getBasePtr(), LD->getMemOperand()); | ||
|
||
// Instead of splitting, put all the elements back into a vector. | ||
SmallVector<SDValue, 4> Ops; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This loop can be replaced with DAG.ExtractVectorElements (but I don't think you should be scalarizing here like this)
} | ||
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MemoryVT, Ops); | ||
|
||
ReplaceValueWith(SDValue(LD, 0), Concat); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This replacement should be done piecewise in the caller, you should be inserting casting code to satisfy the two DAG.GetSplitDestVTs pieces
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. That is done by using a TokenFactor to associate the Lo and Hi value. However, replacing the TokenFactor is not possible because we can not create it out of operands without glue.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't need to associate the Hi and Lo halves, you need to report the Hi and Lo halves to the caller. The only special thing you need to do here is replace the chain uses, as is done in the non-atomic load case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How do you associate both chain uses at once? Doesn't that require a TokenFactor to replace?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are not multiple chains. The original atomic_load had a chain output and a chain result. The new coerced load has a different output chain, which you replace with the new result chain. The type coercion code is chain free and doesn't care
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm talking about chain uses of Lo and Hi. The atomic_load lowering contains this, which looks to be associating the values:
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1));
However, Lo and Hi are uses from getLoad/getAtomicLoad, but getAtomicLoad does not have the chain that TokenFactor wants.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TokenFactor of a single input is identical to the single input value. But the atomic_load lowering should not contain this. The new lowering has exactly one chain as the original load. The atomic_load is specifically not being split, only the type profile is changing
bc6c355
to
51488b8
Compare
0d766dd
to
bf3f6b0
Compare
51488b8
to
e262387
Compare
bf3f6b0
to
7565845
Compare
e262387
to
fc42ee7
Compare
79fce92
to
2fb47b0
Compare
867af9f
to
340dec5
Compare
2fb47b0
to
2faa227
Compare
5bc5d32
to
8da6708
Compare
2faa227
to
71d49aa
Compare
EVT LoMemVT, HiMemVT; | ||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are unused. You at most should be bit casting the in memory type to the target integer memory type, there's no splitting
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only one has to be used to get double that size. May we just remove it all and use MemoryVT.getSizeInBits()
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), LD->getValueType(0).getSizeInBits()); | ||
EVT MemIntVT = EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); | ||
SDValue ALD = DAG.getAtomicLoad(ExtType, dl, MemIntVT, IntVT, Ch, Ptr, | ||
LD->getMemOperand()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe should assert this isn't an extending load, although you could in principle handle it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will fail for some reason, on at least atomic_vec2_ptr270 and atomic_vec2_half.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe the right thing to do is just create a non-extending load of the bitcast in-memory type
EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); | ||
EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); | ||
SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, | ||
DAG.getVectorIdxConstant(0, dl)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't a vector extract, so don't use getVectorIdxConstant. I'm not actually sure what the type for the second operand is supposed to be for extract_element
71d49aa
to
db5b862
Compare
8da6708
to
218ce15
Compare
db5b862
to
28f6bf3
Compare
28f6bf3
to
309d817
Compare
EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); | ||
EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); | ||
SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, | ||
DAG.getIntPtrConstant(0, dl)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This probably should be a target constant but this appears to be what existing uses do
309d817
to
cd4402a
Compare
4383732
to
0fcd430
Compare
EVT MemIntVT = | ||
EVT::getIntegerVT(*DAG.getContext(), 2 * LoMemVT.getSizeInBits()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MemVT should not be a derived quantity, directly bitcast the original memory type
Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357
0fcd430
to
5b5d948
Compare
cd4402a
to
0e4399d
Compare
Vector types that aren't widened are split
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.
Stack: