Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P3:
64 return MRI.getType(Reg) == LLT::pointer(3, 32);
65 case P4:
66 return MRI.getType(Reg) == LLT::pointer(4, 64);
67 case P5:
68 return MRI.getType(Reg) == LLT::pointer(5, 32);
69 case Ptr32:
70 return isAnyPtr(MRI.getType(Reg), 32);
71 case Ptr64:
72 return isAnyPtr(MRI.getType(Reg), 64);
73 case Ptr128:
74 return isAnyPtr(MRI.getType(Reg), 128);
75 case V2S32:
76 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
77 case V4S32:
78 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
79 case B32:
80 return MRI.getType(Reg).getSizeInBits() == 32;
81 case B64:
82 return MRI.getType(Reg).getSizeInBits() == 64;
83 case B96:
84 return MRI.getType(Reg).getSizeInBits() == 96;
85 case B128:
86 return MRI.getType(Reg).getSizeInBits() == 128;
87 case B256:
88 return MRI.getType(Reg).getSizeInBits() == 256;
89 case B512:
90 return MRI.getType(Reg).getSizeInBits() == 512;
91 case UniS1:
92 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
93 case UniS16:
94 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
95 case UniS32:
96 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
97 case UniS64:
98 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
99 case UniS128:
100 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
101 case UniP0:
102 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
103 case UniP1:
104 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
105 case UniP3:
106 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
107 case UniP4:
108 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
109 case UniP5:
110 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
111 case UniPtr32:
112 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
113 case UniPtr64:
114 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
115 case UniPtr128:
116 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
117 case UniV2S16:
118 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
119 case UniB32:
120 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
121 case UniB64:
122 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
123 case UniB96:
124 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
125 case UniB128:
126 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
127 case UniB256:
128 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
129 case UniB512:
130 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
131 case DivS1:
132 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
133 case DivS16:
134 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
135 case DivS32:
136 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
137 case DivS64:
138 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
139 case DivS128:
140 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
141 case DivP0:
142 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
143 case DivP1:
144 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
145 case DivP3:
146 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
147 case DivP4:
148 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
149 case DivP5:
150 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
151 case DivPtr32:
152 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
153 case DivPtr64:
154 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
155 case DivPtr128:
156 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
157 case DivV2S16:
158 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
159 case DivB32:
160 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
161 case DivB64:
162 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
163 case DivB96:
164 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
165 case DivB128:
166 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
167 case DivB256:
168 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
169 case DivB512:
170 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
171 case _:
172 return true;
173 default:
174 llvm_unreachable("missing matchUniformityAndLLT");
175 }
176}
177
179 const MachineUniformityInfo &MUI,
180 const MachineRegisterInfo &MRI) const {
181 // Check LLT signature.
182 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
183 if (OpUniformityAndTypes[i] == _) {
184 if (MI.getOperand(i).isReg())
185 return false;
186 continue;
187 }
188
189 // Remaining IDs check registers.
190 if (!MI.getOperand(i).isReg())
191 return false;
192
193 if (!matchUniformityAndLLT(MI.getOperand(i).getReg(),
194 OpUniformityAndTypes[i], MUI, MRI))
195 return false;
196 }
197
198 // More complex check.
199 if (TestFunc)
200 return TestFunc(MI);
201
202 return true;
203}
204
206
208 : FastTypes(FastTypes) {}
209
211 if (Ty == LLT::scalar(16))
212 return S16;
213 if (Ty == LLT::scalar(32))
214 return S32;
215 if (Ty == LLT::scalar(64))
216 return S64;
217 if (Ty == LLT::fixed_vector(2, 16))
218 return V2S16;
219 if (Ty == LLT::fixed_vector(2, 32))
220 return V2S32;
221 if (Ty == LLT::fixed_vector(3, 32))
222 return V3S32;
223 if (Ty == LLT::fixed_vector(4, 32))
224 return V4S32;
225 return _;
226}
227
229 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
230 isAnyPtr(Ty, 32))
231 return B32;
232 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
233 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
234 return B64;
235 if (Ty == LLT::fixed_vector(3, 32))
236 return B96;
237 if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128))
238 return B128;
239 return _;
240}
241
242const RegBankLLTMapping &
245 const MachineUniformityInfo &MUI) const {
246 // Search in "Fast Rules".
247 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
248 // slot that could "match fast Predicate". If not, InvalidMapping is
249 // returned which results in failure, does not search "Slow Rules".
250 if (FastTypes != NoFastRules) {
251 Register Reg = MI.getOperand(0).getReg();
252 int Slot;
253 if (FastTypes == StandardB)
254 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
255 else
256 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
257
258 if (Slot != -1)
259 return MUI.isUniform(Reg) ? Uni[Slot] : Div[Slot];
260 }
261
262 // Slow search for more complex rules.
263 for (const RegBankLegalizeRule &Rule : Rules) {
264 if (Rule.Predicate.match(MI, MUI, MRI))
265 return Rule.OperandMapping;
266 }
267
268 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
269 llvm_unreachable("None of the rules defined for MI's opcode matched MI");
270}
271
273 Rules.push_back(Rule);
274}
275
277 RegBankLLTMapping RuleApplyIDs) {
278 int Slot = getFastPredicateSlot(Ty);
279 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
280 Div[Slot] = RuleApplyIDs;
281}
282
284 RegBankLLTMapping RuleApplyIDs) {
285 int Slot = getFastPredicateSlot(Ty);
286 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
287 Uni[Slot] = RuleApplyIDs;
288}
289
290int SetOfRulesForOpcode::getFastPredicateSlot(
292 switch (FastTypes) {
293 case Standard: {
294 switch (Ty) {
295 case S32:
296 return 0;
297 case S16:
298 return 1;
299 case S64:
300 return 2;
301 case V2S16:
302 return 3;
303 default:
304 return -1;
305 }
306 }
307 case StandardB: {
308 switch (Ty) {
309 case B32:
310 return 0;
311 case B64:
312 return 1;
313 case B96:
314 return 2;
315 case B128:
316 return 3;
317 default:
318 return -1;
319 }
320 }
321 case Vector: {
322 switch (Ty) {
323 case S32:
324 return 0;
325 case V2S32:
326 return 1;
327 case V3S32:
328 return 2;
329 case V4S32:
330 return 3;
331 default:
332 return -1;
333 }
334 }
335 default:
336 return -1;
337 }
338}
339
340RegBankLegalizeRules::RuleSetInitializer
341RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
342 FastRulesTypes FastTypes) {
343 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
344}
345
346RegBankLegalizeRules::RuleSetInitializer
347RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
348 FastRulesTypes FastTypes) {
349 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
350}
351
354 unsigned Opc = MI.getOpcode();
355 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
356 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
357 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
358 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
359 auto IRAIt = IRulesAlias.find(IntrID);
360 if (IRAIt == IRulesAlias.end()) {
361 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
362 llvm_unreachable("No rules defined for intrinsic opcode");
363 }
364 return IRules.at(IRAIt->second);
365 }
366
367 auto GRAIt = GRulesAlias.find(Opc);
368 if (GRAIt == GRulesAlias.end()) {
369 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
370 llvm_unreachable("No rules defined for generic opcode");
371 }
372 return GRules.at(GRAIt->second);
373}
374
375// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
376class Predicate {
377private:
378 struct Elt {
379 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
380 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
381 // Sequences of && and || will be represented by jumps, for example:
382 // (A && B && ... X) or (A && B && ... X) || Y
383 // A == true jump to B
384 // A == false jump to end or Y, result is A(false) or Y
385 // (A || B || ... X) or (A || B || ... X) && Y
386 // A == true jump to end or Y, result is A(true) or Y
387 // A == false jump to B
388 // Notice that when negating expression, we simply flip Neg on each Pred
389 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
390 std::function<bool(const MachineInstr &)> Pred;
391 bool Neg; // Neg of Pred is calculated before jump
392 unsigned TJumpOffset;
393 unsigned FJumpOffset;
394 };
395
396 SmallVector<Elt, 8> Expression;
397
398 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
399
400public:
401 Predicate(std::function<bool(const MachineInstr &)> Pred) {
402 Expression.push_back({Pred, false, 1, 1});
403 };
404
405 bool operator()(const MachineInstr &MI) const {
406 unsigned Idx = 0;
407 unsigned ResultIdx = Expression.size();
408 bool Result;
409 do {
410 Result = Expression[Idx].Pred(MI);
411 Result = Expression[Idx].Neg ? !Result : Result;
412 if (Result) {
413 Idx += Expression[Idx].TJumpOffset;
414 } else {
415 Idx += Expression[Idx].FJumpOffset;
416 }
417 } while ((Idx != ResultIdx));
418
419 return Result;
420 };
421
422 Predicate operator!() const {
423 SmallVector<Elt, 8> NegExpression;
424 for (const Elt &ExprElt : Expression) {
425 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
426 ExprElt.TJumpOffset});
427 }
428 return Predicate(std::move(NegExpression));
429 };
430
431 Predicate operator&&(const Predicate &RHS) const {
432 SmallVector<Elt, 8> AndExpression = Expression;
433
434 unsigned RHSSize = RHS.Expression.size();
435 unsigned ResultIdx = Expression.size();
436 for (unsigned i = 0; i < ResultIdx; ++i) {
437 // LHS results in false, whole expression results in false.
438 if (i + AndExpression[i].FJumpOffset == ResultIdx)
439 AndExpression[i].FJumpOffset += RHSSize;
440 }
441
442 AndExpression.append(RHS.Expression);
443
444 return Predicate(std::move(AndExpression));
445 }
446
447 Predicate operator||(const Predicate &RHS) const {
448 SmallVector<Elt, 8> OrExpression = Expression;
449
450 unsigned RHSSize = RHS.Expression.size();
451 unsigned ResultIdx = Expression.size();
452 for (unsigned i = 0; i < ResultIdx; ++i) {
453 // LHS results in true, whole expression results in true.
454 if (i + OrExpression[i].TJumpOffset == ResultIdx)
455 OrExpression[i].TJumpOffset += RHSSize;
456 }
457
458 OrExpression.append(RHS.Expression);
459
460 return Predicate(std::move(OrExpression));
461 }
462};
463
464// Initialize rules
467 : ST(&_ST), MRI(&_MRI) {
468
469 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
470 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
471 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
472 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
473 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
474
475 addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
476
477 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
479 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
480 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
481 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
482 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
483 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
484 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
485 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
486
487 addRulesForGOpcs({G_SHL}, Standard)
488 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
489 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
491 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
492 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
493 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
494 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
495 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
496
497 addRulesForGOpcs({G_LSHR}, Standard)
498 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
499 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
501 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
502 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
503 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
504 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
505 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
506
507 addRulesForGOpcs({G_ASHR}, Standard)
508 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
509 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
511 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
512 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
513 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
514 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
515 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
516
517 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
518
519 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
520 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
521 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
522 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
523 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
524
525 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
526 // and G_FREEZE here, rest is trivially regbankselected earlier
527 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
528 addRulesForGOpcs({G_CONSTANT})
529 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
530 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
531
532 addRulesForGOpcs({G_ICMP})
533 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
534 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
535 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
536
537 addRulesForGOpcs({G_FCMP})
538 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
539 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}});
540
541 addRulesForGOpcs({G_BRCOND})
542 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
543 .Any({{DivS1}, {{}, {Vcc}}});
544
545 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
546
547 addRulesForGOpcs({G_SELECT}, StandardB)
548 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
550 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
554
555 addRulesForGOpcs({G_ANYEXT})
556 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
557 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
558 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
559 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
560 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
561 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
562 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
563 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
564 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
565 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
566
567 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
568 // It is up to user to deal with truncated bits.
569 addRulesForGOpcs({G_TRUNC})
570 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
571 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
572 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
573 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
574 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
575 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
576 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
577 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
578 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
579 // This is non-trivial. VgprToVccCopy is done using compare instruction.
580 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}})
581 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
582 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
583
584 addRulesForGOpcs({G_ZEXT})
588 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
589 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
590 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
591 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
592 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
593 // not extending S16 to S32 is questionable.
594 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
595 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
596 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
597 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
598
599 addRulesForGOpcs({G_SEXT})
603 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
604 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
605 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
606 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
607 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
608 // not extending S16 to S32 is questionable.
609 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
610 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
611 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
612 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
613
614 addRulesForGOpcs({G_SEXT_INREG})
615 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
616 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
617 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
619
620 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
621 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
622 bool usesTrue16 = ST->useRealTrue16Insts();
623
624 Predicate isAlign16([](const MachineInstr &MI) -> bool {
625 return (*MI.memoperands_begin())->getAlign() >= Align(16);
626 });
627
628 Predicate isAlign4([](const MachineInstr &MI) -> bool {
629 return (*MI.memoperands_begin())->getAlign() >= Align(4);
630 });
631
632 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
633 return (*MI.memoperands_begin())->isAtomic();
634 });
635
636 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
637 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
638 });
639
640 Predicate isConst([](const MachineInstr &MI) -> bool {
641 // Address space in MMO be different then address space on pointer.
642 const MachineMemOperand *MMO = *MI.memoperands_begin();
643 const unsigned AS = MMO->getAddrSpace();
644 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
646 });
647
648 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
649 return (*MI.memoperands_begin())->isVolatile();
650 });
651
652 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
653 return (*MI.memoperands_begin())->isInvariant();
654 });
655
656 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
657 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
658 });
659
660 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
661 const MachineMemOperand *MMO = *MI.memoperands_begin();
662 return MMO->getAlign() >= Align(MMO->getSize().getValue());
663 });
664
665 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
666 const MachineMemOperand *MMO = *MI.memoperands_begin();
667 const unsigned MemSize = 8 * MMO->getSize().getValue();
668 return MemSize == 16 || MemSize == 8;
669 });
670
671 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
672 const MachineMemOperand *MMO = *MI.memoperands_begin();
673 return 8 * MMO->getSize().getValue() == 32;
674 });
675
676 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
677 (isConst || isInvMMO || isNoClobberMMO);
678
679 // clang-format off
680 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
681 addRulesForGOpcs({G_LOAD})
682 // flat, addrspace(0), never uniform - flat_load
683 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
684 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
685 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
686 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
687 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
688
689 // global, addrspace(1)
690 // divergent - global_load
691 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
692 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
693 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
694 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
695 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
696 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
697 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
698
699 // uniform - s_load
700 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
701 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
702 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
703 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
704 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
705 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
706 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
707 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
708 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
709 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
710 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
711 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
712 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
713
714 // Uniform via global or buffer load, for example volatile or non-aligned
715 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
716 // selected as global_load, use SgprP1 for pointer instead to match
717 // patterns without flat-for-global, default for GFX7 and older.
718 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
719 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
720 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
721 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
722 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
723 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
724 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
725 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
726 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
727 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
728 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
729
730 // local, addrspace(3) - ds_load
731 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
732 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
733 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
734 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
735 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
736
737 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
738 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
739 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
740 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
741 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
742
743 // constant, addrspace(4)
744 // divergent - global_load
745 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
746 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
747 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
748 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
749 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
750 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
751 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
752
753 // uniform - s_load
754 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
755 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
756 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
757 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
758 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
759 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
760 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
761 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
762 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
763 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
764 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
765 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
766
767 // uniform in vgpr - global_load or buffer_load
768 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
769 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
770 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
771 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
772 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
773 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
774 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
775 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
776 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
777
778 // private, addrspace(5), never uniform - scratch_load
779 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
780 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
781 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
782 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
783 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
784
785 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
786
787
788 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
789 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
790
791 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
792 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
793 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
794 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
795 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
796
797 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
798 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
799
800 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
801 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
802 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
803 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
804 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
805
806 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
807
808 addRulesForGOpcs({G_STORE})
809 // addrspace(0)
810 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
811 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
812 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
813 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
814 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
815
816 // addrspace(1), there are no stores to addrspace(4)
817 // For targets:
818 // - with "+flat-for-global" - global_store
819 // - without(-flat-for-global) - buffer_store addr64
820 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
821 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
822 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
823 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
824 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
825
826 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
827 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
828 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
829 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
830 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
831 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
832 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
833 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
834
835 // addrspace(3) and addrspace(5)
836 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
837 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
838 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
839 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
840 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
841 // clang-format on
842
843 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD}, StandardB)
852
853 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
854 .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
855
856 addRulesForGOpcs({G_PTR_ADD})
857 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
858 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
859 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
860 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
861
862 addRulesForGOpcs({G_INTTOPTR})
863 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
864 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
865 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
866 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
867 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
868 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
869
870 addRulesForGOpcs({G_PTRTOINT})
871 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
872 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
873 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
874 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
875 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
876 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
877
878 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
879
880 bool hasSALUFloat = ST->hasSALUFloatInsts();
881
882 addRulesForGOpcs({G_FADD}, Standard)
883 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
884 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
885 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
886
887 addRulesForGOpcs({G_FPTOUI})
888 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
889 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
890
891 addRulesForGOpcs({G_UITOFP})
892 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
893 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
894 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
895
896 using namespace Intrinsic;
897
898 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
899
900 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
901 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
902
903 addRulesForIOpcs({amdgcn_if_break}, Standard)
904 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
905
906 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
907 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
908
909 addRulesForIOpcs({amdgcn_readfirstlane})
910 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
911 // this should not exist in the first place, it is from call lowering
912 // readfirstlaning just in case register is not in sgpr.
913 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
914
915} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Machine IR instance of the generic uniformity analysis.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode & getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping & findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39