-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[X86] Manage atomic load of fp -> int promotion in DAG #120386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/spr/main/5c36cc8c
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (jofrn) ChangesWhen lowering atomic <1 x T> vector types with floats, selection can fail since Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120386.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2571873dba8483..8006d32d077a65 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2595,6 +2595,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(Op, MVT::f32, Promote);
}
+ setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
+ setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
+ setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 9cac8167542d8b..2bde0d2ffd06ad 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,12 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefix=CHECK0
define void @test1(ptr %ptr, i32 %val1) {
; CHECK-LABEL: test1:
; CHECK: ## %bb.0:
; CHECK-NEXT: xchgl %esi, (%rdi)
; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: test1:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: xchgl %esi, (%rdi)
+; CHECK0-NEXT: retq
store atomic i32 %val1, ptr %ptr seq_cst, align 4
ret void
}
@@ -16,6 +21,11 @@ define void @test2(ptr %ptr, i32 %val1) {
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: test2:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movl %esi, (%rdi)
+; CHECK0-NEXT: retq
store atomic i32 %val1, ptr %ptr release, align 4
ret void
}
@@ -25,6 +35,11 @@ define i32 @test3(ptr %ptr) {
; CHECK: ## %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: test3:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movl (%rdi), %eax
+; CHECK0-NEXT: retq
%val = load atomic i32, ptr %ptr seq_cst, align 4
ret i32 %val
}
@@ -34,6 +49,64 @@ define <1 x i32> @atomic_vec1_i32(ptr %x) {
; CHECK: ## %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec1_i32:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movl (%rdi), %eax
+; CHECK0-NEXT: retq
%ret = load atomic <1 x i32>, ptr %x acquire, align 4
ret <1 x i32> %ret
}
+
+define <1 x half> @atomic_vec1_half(ptr %x) {
+; CHECK-LABEL: atomic_vec1_half:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec1_half:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %cx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm0
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT: retq
+ %ret = load atomic <1 x half>, ptr %x acquire, align 4
+ ret <1 x half> %ret
+}
+
+define <1 x float> @atomic_vec1_float(ptr %x) {
+; CHECK-LABEL: atomic_vec1_float:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec1_float:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK0-NEXT: retq
+ %ret = load atomic <1 x float>, ptr %x acquire, align 4
+ ret <1 x float> %ret
+}
+
+define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
+; CHECK-LABEL: atomic_vec1_bfloat:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec1_bfloat:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %cx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm0
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT: retq
+ %ret = load atomic <1 x bfloat>, ptr %x acquire, align 4
+ ret <1 x bfloat> %ret
+}
+
|
f799ee0
to
141279f
Compare
141279f
to
70bb5b9
Compare
7263545
to
5a3a12d
Compare
dac7f1e
to
df5e28c
Compare
@@ -2595,6 +2595,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |||
setOperationAction(Op, MVT::f32, Promote); | |||
} | |||
|
|||
setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Presumably similar changes to other backends are also required?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handle bf16 as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bf16
is already lowered properly without promotion.
And yes, other backends would either have to promote these here or implement them explicitly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SelectionDAG making anything legal by default was a terrible mistake but we're not going to fix that here
66eca4b
to
4d3fcb3
Compare
b336c25
to
7ef2576
Compare
4d0be71
to
601c009
Compare
7ef2576
to
b4f0562
Compare
601c009
to
3796cf7
Compare
3796cf7
to
5f30edf
Compare
b4f0562
to
e0a02b6
Compare
5f30edf
to
99296f3
Compare
40392eb
to
7ca91cc
Compare
99296f3
to
245acf7
Compare
7ca91cc
to
0d6882e
Compare
a3e83cb
to
faa0e03
Compare
6ac3c17
to
03a726d
Compare
a1143a0
to
2a1b149
Compare
03a726d
to
f096c88
Compare
f096c88
to
edd2af8
Compare
24d9628
to
e9820bf
Compare
47d8c3a
to
02dd787
Compare
4b9e4d3
to
d6cac89
Compare
5a5f241
to
cb2e5bc
Compare
7f2115c
to
ab9f3f2
Compare
cb2e5bc
to
14c8155
Compare
ab9f3f2
to
4a47d3f
Compare
0824a27
to
6078905
Compare
5189e84
to
e7805ff
Compare
6078905
to
fdc2107
Compare
fdc2107
to
5005b94
Compare
2d7f7dc
to
1e2a179
Compare
5005b94
to
c7d4433
Compare
When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5
1e2a179
to
08e39f2
Compare
c7d4433
to
531bc05
Compare
When lowering atomic <1 x T> vector types with floats, selection can fail since
this pattern is unsupported. To support this, floats can be casted to
an integer type of the same size.
Stack: