-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[GlobalISel][AMDGPU] Fix handling of v2i128 type for AND, OR, XOR #138574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-globalisel Author: Chinmay Deshpande (chinmaydd) ChangesCurrent behavior crashes the compiler. This bug was found using the AMDGPU Fuzzing project. Fixes SWDEV-508816. Full diff: https://github.com/llvm/llvm-project/pull/138574.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ff8658ed82a72..e8063d54ac65a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -119,6 +119,18 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
};
}
+static LegalizeMutation breakCurrentEltsToSize32Or64(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ const LLT EltTy = Ty.getElementType();
+ const int Size = Ty.getSizeInBits();
+ const int EltSize = EltTy.getSizeInBits();
+ const unsigned TargetEltSize = EltSize % 64 == 0 ? 64 : 32;
+ const unsigned NewNumElts = (Size + (TargetEltSize - 1)) / TargetEltSize;
+ return std::pair(TypeIdx, LLT::fixed_vector(NewNumElts, TargetEltSize));
+ };
+}
+
// Increase the number of vector elements to reach the next multiple of 32-bit
// type.
static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
@@ -875,7 +887,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
.clampScalar(0, S32, S64)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
+ .fewerElementsIf(all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)), fewerEltsToSize64Vector(0))
+ .bitcastIf(all(vectorWiderThan(0, 64), scalarOrEltWiderThan(0, 64)), breakCurrentEltsToSize32Or64(0))
.widenScalarToNextPow2(0)
.scalarize(0);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll
new file mode 100644
index 0000000000000..532a797094d14
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
+
+define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) {
+; GFX7-LABEL: v_and_v2i128:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_and_b32_e32 v0, v0, v8
+; GFX7-NEXT: v_and_b32_e32 v1, v1, v9
+; GFX7-NEXT: v_and_b32_e32 v2, v2, v10
+; GFX7-NEXT: v_and_b32_e32 v3, v3, v11
+; GFX7-NEXT: v_and_b32_e32 v4, v4, v12
+; GFX7-NEXT: v_and_b32_e32 v5, v5, v13
+; GFX7-NEXT: v_and_b32_e32 v6, v6, v14
+; GFX7-NEXT: v_and_b32_e32 v7, v7, v15
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_and_v2i128:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v0, v0, v8
+; GFX9-NEXT: v_and_b32_e32 v1, v1, v9
+; GFX9-NEXT: v_and_b32_e32 v2, v2, v10
+; GFX9-NEXT: v_and_b32_e32 v3, v3, v11
+; GFX9-NEXT: v_and_b32_e32 v4, v4, v12
+; GFX9-NEXT: v_and_b32_e32 v5, v5, v13
+; GFX9-NEXT: v_and_b32_e32 v6, v6, v14
+; GFX9-NEXT: v_and_b32_e32 v7, v7, v15
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_and_v2i128:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_and_b32_e32 v0, v0, v8
+; GFX8-NEXT: v_and_b32_e32 v1, v1, v9
+; GFX8-NEXT: v_and_b32_e32 v2, v2, v10
+; GFX8-NEXT: v_and_b32_e32 v3, v3, v11
+; GFX8-NEXT: v_and_b32_e32 v4, v4, v12
+; GFX8-NEXT: v_and_b32_e32 v5, v5, v13
+; GFX8-NEXT: v_and_b32_e32 v6, v6, v14
+; GFX8-NEXT: v_and_b32_e32 v7, v7, v15
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_and_v2i128:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_and_b32_e32 v0, v0, v8
+; GFX10-NEXT: v_and_b32_e32 v1, v1, v9
+; GFX10-NEXT: v_and_b32_e32 v2, v2, v10
+; GFX10-NEXT: v_and_b32_e32 v3, v3, v11
+; GFX10-NEXT: v_and_b32_e32 v4, v4, v12
+; GFX10-NEXT: v_and_b32_e32 v5, v5, v13
+; GFX10-NEXT: v_and_b32_e32 v6, v6, v14
+; GFX10-NEXT: v_and_b32_e32 v7, v7, v15
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %and = and <2 x i128> %a, %b
+ ret <2 x i128> %and
+}
+
+define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) {
+; GFX7-LABEL: v_and_v2i128_inline_imm:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b64 s[4:5], 64
+; GFX7-NEXT: s_mov_b64 s[6:7], 0
+; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_and_b32_e32 v1, s5, v1
+; GFX7-NEXT: v_and_b32_e32 v2, s6, v2
+; GFX7-NEXT: v_and_b32_e32 v3, s7, v3
+; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_and_b32_e32 v5, s5, v5
+; GFX7-NEXT: v_and_b32_e32 v6, s6, v6
+; GFX7-NEXT: v_and_b32_e32 v7, s7, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_and_v2i128_inline_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], 64
+; GFX9-NEXT: s_mov_b64 s[6:7], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX9-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX9-NEXT: v_and_b32_e32 v1, s5, v1
+; GFX9-NEXT: v_and_b32_e32 v2, s6, v2
+; GFX9-NEXT: v_and_b32_e32 v3, s7, v3
+; GFX9-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX9-NEXT: v_and_b32_e32 v5, s5, v5
+; GFX9-NEXT: v_and_b32_e32 v6, s6, v6
+; GFX9-NEXT: v_and_b32_e32 v7, s7, v7
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_and_v2i128_inline_imm:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b64 s[4:5], 64
+; GFX8-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX8-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_and_b32_e32 v1, s5, v1
+; GFX8-NEXT: v_and_b32_e32 v2, s6, v2
+; GFX8-NEXT: v_and_b32_e32 v3, s7, v3
+; GFX8-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX8-NEXT: v_and_b32_e32 v5, s5, v5
+; GFX8-NEXT: v_and_b32_e32 v6, s6, v6
+; GFX8-NEXT: v_and_b32_e32 v7, s7, v7
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %and = and <2 x i128> %a, <i128 64, i128 64>
+ ret <2 x i128> %and
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll
new file mode 100644
index 0000000000000..eaba0500dc1f3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
+
+define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) {
+; GFX7-LABEL: v_or_v2i128:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX7-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX7-NEXT: v_or_b32_e32 v2, v2, v10
+; GFX7-NEXT: v_or_b32_e32 v3, v3, v11
+; GFX7-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX7-NEXT: v_or_b32_e32 v5, v5, v13
+; GFX7-NEXT: v_or_b32_e32 v6, v6, v14
+; GFX7-NEXT: v_or_b32_e32 v7, v7, v15
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_or_v2i128:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX9-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX9-NEXT: v_or_b32_e32 v2, v2, v10
+; GFX9-NEXT: v_or_b32_e32 v3, v3, v11
+; GFX9-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX9-NEXT: v_or_b32_e32 v5, v5, v13
+; GFX9-NEXT: v_or_b32_e32 v6, v6, v14
+; GFX9-NEXT: v_or_b32_e32 v7, v7, v15
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_or_v2i128:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v10
+; GFX8-NEXT: v_or_b32_e32 v3, v3, v11
+; GFX8-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX8-NEXT: v_or_b32_e32 v5, v5, v13
+; GFX8-NEXT: v_or_b32_e32 v6, v6, v14
+; GFX8-NEXT: v_or_b32_e32 v7, v7, v15
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_or_v2i128:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX10-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX10-NEXT: v_or_b32_e32 v2, v2, v10
+; GFX10-NEXT: v_or_b32_e32 v3, v3, v11
+; GFX10-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX10-NEXT: v_or_b32_e32 v5, v5, v13
+; GFX10-NEXT: v_or_b32_e32 v6, v6, v14
+; GFX10-NEXT: v_or_b32_e32 v7, v7, v15
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %or = or <2 x i128> %a, %b
+ ret <2 x i128> %or
+}
+
+define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) {
+; GFX7-LABEL: v_or_v2i128_inline_imm:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b64 s[4:5], 64
+; GFX7-NEXT: s_mov_b64 s[6:7], 0
+; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX7-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_or_b32_e32 v1, s5, v1
+; GFX7-NEXT: v_or_b32_e32 v2, s6, v2
+; GFX7-NEXT: v_or_b32_e32 v3, s7, v3
+; GFX7-NEXT: v_or_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_or_b32_e32 v5, s5, v5
+; GFX7-NEXT: v_or_b32_e32 v6, s6, v6
+; GFX7-NEXT: v_or_b32_e32 v7, s7, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_or_v2i128_inline_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], 64
+; GFX9-NEXT: s_mov_b64 s[6:7], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX9-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX9-NEXT: v_or_b32_e32 v1, s5, v1
+; GFX9-NEXT: v_or_b32_e32 v2, s6, v2
+; GFX9-NEXT: v_or_b32_e32 v3, s7, v3
+; GFX9-NEXT: v_or_b32_e32 v4, s4, v4
+; GFX9-NEXT: v_or_b32_e32 v5, s5, v5
+; GFX9-NEXT: v_or_b32_e32 v6, s6, v6
+; GFX9-NEXT: v_or_b32_e32 v7, s7, v7
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_or_v2i128_inline_imm:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b64 s[4:5], 64
+; GFX8-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX8-NEXT: v_or_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_or_b32_e32 v1, s5, v1
+; GFX8-NEXT: v_or_b32_e32 v2, s6, v2
+; GFX8-NEXT: v_or_b32_e32 v3, s7, v3
+; GFX8-NEXT: v_or_b32_e32 v4, s4, v4
+; GFX8-NEXT: v_or_b32_e32 v5, s5, v5
+; GFX8-NEXT: v_or_b32_e32 v6, s6, v6
+; GFX8-NEXT: v_or_b32_e32 v7, s7, v7
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %or = or <2 x i128> %a, <i128 64, i128 64>
+ ret <2 x i128> %or
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll
new file mode 100644
index 0000000000000..291d27b0cf527
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
+
+define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) {
+; GFX7-LABEL: v_xor_v2i128:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v8
+; GFX7-NEXT: v_xor_b32_e32 v1, v1, v9
+; GFX7-NEXT: v_xor_b32_e32 v2, v2, v10
+; GFX7-NEXT: v_xor_b32_e32 v3, v3, v11
+; GFX7-NEXT: v_xor_b32_e32 v4, v4, v12
+; GFX7-NEXT: v_xor_b32_e32 v5, v5, v13
+; GFX7-NEXT: v_xor_b32_e32 v6, v6, v14
+; GFX7-NEXT: v_xor_b32_e32 v7, v7, v15
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_xor_v2i128:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8
+; GFX9-NEXT: v_xor_b32_e32 v1, v1, v9
+; GFX9-NEXT: v_xor_b32_e32 v2, v2, v10
+; GFX9-NEXT: v_xor_b32_e32 v3, v3, v11
+; GFX9-NEXT: v_xor_b32_e32 v4, v4, v12
+; GFX9-NEXT: v_xor_b32_e32 v5, v5, v13
+; GFX9-NEXT: v_xor_b32_e32 v6, v6, v14
+; GFX9-NEXT: v_xor_b32_e32 v7, v7, v15
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_xor_v2i128:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8
+; GFX8-NEXT: v_xor_b32_e32 v1, v1, v9
+; GFX8-NEXT: v_xor_b32_e32 v2, v2, v10
+; GFX8-NEXT: v_xor_b32_e32 v3, v3, v11
+; GFX8-NEXT: v_xor_b32_e32 v4, v4, v12
+; GFX8-NEXT: v_xor_b32_e32 v5, v5, v13
+; GFX8-NEXT: v_xor_b32_e32 v6, v6, v14
+; GFX8-NEXT: v_xor_b32_e32 v7, v7, v15
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_xor_v2i128:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v8
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v9
+; GFX10-NEXT: v_xor_b32_e32 v2, v2, v10
+; GFX10-NEXT: v_xor_b32_e32 v3, v3, v11
+; GFX10-NEXT: v_xor_b32_e32 v4, v4, v12
+; GFX10-NEXT: v_xor_b32_e32 v5, v5, v13
+; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14
+; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %xor = xor <2 x i128> %a, %b
+ ret <2 x i128> %xor
+}
+
+define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) {
+; GFX7-LABEL: v_xor_v2i128_inline_imm:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b64 s[4:5], 64
+; GFX7-NEXT: s_mov_b64 s[6:7], 0
+; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX7-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_xor_b32_e32 v1, s5, v1
+; GFX7-NEXT: v_xor_b32_e32 v2, s6, v2
+; GFX7-NEXT: v_xor_b32_e32 v3, s7, v3
+; GFX7-NEXT: v_xor_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_xor_b32_e32 v5, s5, v5
+; GFX7-NEXT: v_xor_b32_e32 v6, s6, v6
+; GFX7-NEXT: v_xor_b32_e32 v7, s7, v7
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_xor_v2i128_inline_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], 64
+; GFX9-NEXT: s_mov_b64 s[6:7], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX9-NEXT: v_xor_b32_e32 v1, s5, v1
+; GFX9-NEXT: v_xor_b32_e32 v2, s6, v2
+; GFX9-NEXT: v_xor_b32_e32 v3, s7, v3
+; GFX9-NEXT: v_xor_b32_e32 v4, s4, v4
+; GFX9-NEXT: v_xor_b32_e32 v5, s5, v5
+; GFX9-NEXT: v_xor_b32_e32 v6, s6, v6
+; GFX9-NEXT: v_xor_b32_e32 v7, s7, v7
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_xor_v2i128_inline_imm:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b64 s[4:5], 64
+; GFX8-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
+; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
+; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_xor_b32_e32 v1, s5, v1
+; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
+; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3
+; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
+; GFX8-NEXT: v_xor_b32_e32 v5, s5, v5
+; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6
+; GFX8-NEXT: v_xor_b32_e32 v7, s7, v7
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+ %xor = xor <2 x i128> %a, <i128 64, i128 64>
+ ret <2 x i128> %xor
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Change-Id: I709d434e111f61e867c4fc284f1f4e768a083015
Change-Id: If980ba1599f9eb805ae4ba0566d1e0c5459ff8ff
Change-Id: Ia82e785f936dae63180b62a297b5cd2a1d1b8bf3
Change-Id: I28f0273da90b5caa9133762ab272ede95a7dd82e
Change-Id: I25afdc40b28be4201473923ff527062e9c76f19f
a10d36d
to
73137e3
Compare
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) | ||
.clampScalar(0, S32, S64) | ||
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) | ||
.fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see why this was wrong. Why was the rule wrong? Is this just papering over a bug in LegalizerHelper
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I dont believe so.
A 2xi128
vector is wider than 64 bits which satisfies the predicate here. Once the fewerElements
action is chosen, the mutation at fewerEltsToSize64Vector chooses a vector of type 0xs128
to break it down. This eventually causes an FPE in LegalizerHelper::getNarrowTypeBreakdown.
I think fewerEltsToSize64Vector
was not written with vectors of element types wider than 64 bits in mind.
Current behavior crashes the compiler.
This bug was found using the AMDGPU Fuzzing project.
Fixes SWDEV-508816.