Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
143 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
144 cl::Hidden, cl::init(false),
145 cl::desc("DAG combiner force override the narrowing profitable check when "
146 "reducing the width of load/op/store sequences"));
147
149 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
150 cl::desc("DAG combiner enable load/<replace bytes>/store with "
151 "a narrower store"));
152
153static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
154 cl::init(false),
155 cl::desc("Disable the DAG combiner"));
156
157namespace {
158
159 class DAGCombiner {
160 SelectionDAG &DAG;
161 const TargetLowering &TLI;
162 const SelectionDAGTargetInfo *STI;
164 CodeGenOptLevel OptLevel;
165 bool LegalDAG = false;
166 bool LegalOperations = false;
167 bool LegalTypes = false;
168 bool ForCodeSize;
169 bool DisableGenericCombines;
170
171 /// Worklist of all of the nodes that need to be simplified.
172 ///
173 /// This must behave as a stack -- new nodes to process are pushed onto the
174 /// back and when processing we pop off of the back.
175 ///
176 /// The worklist will not contain duplicates but may contain null entries
177 /// due to nodes being deleted from the underlying DAG. For fast lookup and
178 /// deduplication, the index of the node in this vector is stored in the
179 /// node in SDNode::CombinerWorklistIndex.
181
182 /// This records all nodes attempted to be added to the worklist since we
183 /// considered a new worklist entry. As we keep do not add duplicate nodes
184 /// in the worklist, this is different from the tail of the worklist.
186
187 /// Map from candidate StoreNode to the pair of RootNode and count.
188 /// The count is used to track how many times we have seen the StoreNode
189 /// with the same RootNode bail out in dependence check. If we have seen
190 /// the bail out for the same pair many times over a limit, we won't
191 /// consider the StoreNode with the same RootNode as store merging
192 /// candidate again.
194
195 // BatchAA - Used for DAG load/store alias analysis.
196 BatchAAResults *BatchAA;
197
198 /// This caches all chains that have already been processed in
199 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
200 /// stores candidates.
201 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
202
203 /// When an instruction is simplified, add all users of the instruction to
204 /// the work lists because they might get more simplified now.
205 void AddUsersToWorklist(SDNode *N) {
206 for (SDNode *Node : N->users())
207 AddToWorklist(Node);
208 }
209
210 /// Convenient shorthand to add a node and all of its user to the worklist.
211 void AddToWorklistWithUsers(SDNode *N) {
212 AddUsersToWorklist(N);
213 AddToWorklist(N);
214 }
215
216 // Prune potentially dangling nodes. This is called after
217 // any visit to a node, but should also be called during a visit after any
218 // failed combine which may have created a DAG node.
219 void clearAddedDanglingWorklistEntries() {
220 // Check any nodes added to the worklist to see if they are prunable.
221 while (!PruningList.empty()) {
222 auto *N = PruningList.pop_back_val();
223 if (N->use_empty())
224 recursivelyDeleteUnusedNodes(N);
225 }
226 }
227
228 SDNode *getNextWorklistEntry() {
229 // Before we do any work, remove nodes that are not in use.
230 clearAddedDanglingWorklistEntries();
231 SDNode *N = nullptr;
232 // The Worklist holds the SDNodes in order, but it may contain null
233 // entries.
234 while (!N && !Worklist.empty()) {
235 N = Worklist.pop_back_val();
236 }
237
238 if (N) {
239 assert(N->getCombinerWorklistIndex() >= 0 &&
240 "Found a worklist entry without a corresponding map entry!");
241 // Set to -2 to indicate that we combined the node.
242 N->setCombinerWorklistIndex(-2);
243 }
244 return N;
245 }
246
247 /// Call the node-specific routine that folds each particular type of node.
248 SDValue visit(SDNode *N);
249
250 public:
251 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
252 : DAG(D), TLI(D.getTargetLoweringInfo()),
253 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
254 BatchAA(BatchAA) {
255 ForCodeSize = DAG.shouldOptForSize();
256 DisableGenericCombines =
257 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
277 bool SkipIfCombinedBefore = false) {
278 assert(N->getOpcode() != ISD::DELETED_NODE &&
279 "Deleted Node added to Worklist");
280
281 // Skip handle nodes as they can't usefully be combined and confuse the
282 // zero-use deletion strategy.
283 if (N->getOpcode() == ISD::HANDLENODE)
284 return;
285
286 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
287 return;
288
289 if (IsCandidateForPruning)
290 ConsiderForPruning(N);
291
292 if (N->getCombinerWorklistIndex() < 0) {
293 N->setCombinerWorklistIndex(Worklist.size());
294 Worklist.push_back(N);
295 }
296 }
297
298 /// Remove all instances of N from the worklist.
299 void removeFromWorklist(SDNode *N) {
300 PruningList.remove(N);
301 StoreRootCountMap.erase(N);
302
303 int WorklistIndex = N->getCombinerWorklistIndex();
304 // If not in the worklist, the index might be -1 or -2 (was combined
305 // before). As the node gets deleted anyway, there's no need to update
306 // the index.
307 if (WorklistIndex < 0)
308 return; // Not in the worklist.
309
310 // Null out the entry rather than erasing it to avoid a linear operation.
311 Worklist[WorklistIndex] = nullptr;
312 N->setCombinerWorklistIndex(-1);
313 }
314
315 void deleteAndRecombine(SDNode *N);
316 bool recursivelyDeleteUnusedNodes(SDNode *N);
317
318 /// Replaces all uses of the results of one DAG node with new values.
319 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
320 bool AddTo = true);
321
322 /// Replaces all uses of the results of one DAG node with new values.
323 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
324 return CombineTo(N, &Res, 1, AddTo);
325 }
326
327 /// Replaces all uses of the results of one DAG node with new values.
328 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
329 bool AddTo = true) {
330 SDValue To[] = { Res0, Res1 };
331 return CombineTo(N, To, 2, AddTo);
332 }
333
334 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
335 bool AddTo = true) {
336 return CombineTo(N, To->data(), To->size(), AddTo);
337 }
338
339 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
340
341 private:
342 unsigned MaximumLegalStoreInBits;
343
344 /// Check the specified integer node value to see if it can be simplified or
345 /// if things it uses can be simplified by bit propagation.
346 /// If so, return true.
347 bool SimplifyDemandedBits(SDValue Op) {
348 unsigned BitWidth = Op.getScalarValueSizeInBits();
349 APInt DemandedBits = APInt::getAllOnes(BitWidth);
350 return SimplifyDemandedBits(Op, DemandedBits);
351 }
352
353 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
354 EVT VT = Op.getValueType();
355 APInt DemandedElts = VT.isFixedLengthVector()
357 : APInt(1, 1);
358 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
359 }
360
361 /// Check the specified vector node value to see if it can be simplified or
362 /// if things it uses can be simplified as it only uses some of the
363 /// elements. If so, return true.
364 bool SimplifyDemandedVectorElts(SDValue Op) {
365 // TODO: For now just pretend it cannot be simplified.
366 if (Op.getValueType().isScalableVector())
367 return false;
368
369 unsigned NumElts = Op.getValueType().getVectorNumElements();
370 APInt DemandedElts = APInt::getAllOnes(NumElts);
371 return SimplifyDemandedVectorElts(Op, DemandedElts);
372 }
373
374 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
375 const APInt &DemandedElts,
376 bool AssumeSingleUse = false);
377 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
378 bool AssumeSingleUse = false);
379
380 bool CombineToPreIndexedLoadStore(SDNode *N);
381 bool CombineToPostIndexedLoadStore(SDNode *N);
382 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
383 bool SliceUpLoad(SDNode *N);
384
385 // Looks up the chain to find a unique (unaliased) store feeding the passed
386 // load. If no such store is found, returns a nullptr.
387 // Note: This will look past a CALLSEQ_START if the load is chained to it so
388 // so that it can find stack stores for byval params.
389 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
390 // Scalars have size 0 to distinguish from singleton vectors.
391 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
392 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
393 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
394
395 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
396 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
397 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
398 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue PromoteIntBinOp(SDValue Op);
400 SDValue PromoteIntShiftOp(SDValue Op);
401 SDValue PromoteExtend(SDValue Op);
402 bool PromoteLoad(SDValue Op);
403
404 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
405 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
406 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
407
408 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
409 SDValue RHS, SDValue True, SDValue False,
410 ISD::CondCode CC);
411
412 /// Call the node-specific routine that knows how to fold each
413 /// particular type of node. If that doesn't do anything, try the
414 /// target-specific DAG combines.
415 SDValue combine(SDNode *N);
416
417 // Visitation implementation - Implement dag node combining for different
418 // node types. The semantics are as follows:
419 // Return Value:
420 // SDValue.getNode() == 0 - No change was made
421 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
422 // otherwise - N should be replaced by the returned Operand.
423 //
424 SDValue visitTokenFactor(SDNode *N);
425 SDValue visitMERGE_VALUES(SDNode *N);
426 SDValue visitADD(SDNode *N);
427 SDValue visitADDLike(SDNode *N);
428 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
429 SDNode *LocReference);
430 SDValue visitPTRADD(SDNode *N);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFDIV(SDNode *N);
513 SDValue visitFREM(SDNode *N);
514 SDValue visitFSQRT(SDNode *N);
515 SDValue visitFCOPYSIGN(SDNode *N);
516 SDValue visitFPOW(SDNode *N);
517 SDValue visitFCANONICALIZE(SDNode *N);
518 SDValue visitSINT_TO_FP(SDNode *N);
519 SDValue visitUINT_TO_FP(SDNode *N);
520 SDValue visitFP_TO_SINT(SDNode *N);
521 SDValue visitFP_TO_UINT(SDNode *N);
522 SDValue visitXROUND(SDNode *N);
523 SDValue visitFP_ROUND(SDNode *N);
524 SDValue visitFP_EXTEND(SDNode *N);
525 SDValue visitFNEG(SDNode *N);
526 SDValue visitFABS(SDNode *N);
527 SDValue visitFCEIL(SDNode *N);
528 SDValue visitFTRUNC(SDNode *N);
529 SDValue visitFFREXP(SDNode *N);
530 SDValue visitFFLOOR(SDNode *N);
531 SDValue visitFMinMax(SDNode *N);
532 SDValue visitBRCOND(SDNode *N);
533 SDValue visitBR_CC(SDNode *N);
534 SDValue visitLOAD(SDNode *N);
535
536 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
537 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
538 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
539
540 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
541
542 SDValue visitSTORE(SDNode *N);
543 SDValue visitATOMIC_STORE(SDNode *N);
544 SDValue visitLIFETIME_END(SDNode *N);
545 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
546 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
547 SDValue visitBUILD_VECTOR(SDNode *N);
548 SDValue visitCONCAT_VECTORS(SDNode *N);
549 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
550 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
551 SDValue visitVECTOR_SHUFFLE(SDNode *N);
552 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
553 SDValue visitINSERT_SUBVECTOR(SDNode *N);
554 SDValue visitVECTOR_COMPRESS(SDNode *N);
555 SDValue visitMLOAD(SDNode *N);
556 SDValue visitMSTORE(SDNode *N);
557 SDValue visitMGATHER(SDNode *N);
558 SDValue visitMSCATTER(SDNode *N);
559 SDValue visitMHISTOGRAM(SDNode *N);
560 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
561 SDValue visitVPGATHER(SDNode *N);
562 SDValue visitVPSCATTER(SDNode *N);
563 SDValue visitVP_STRIDED_LOAD(SDNode *N);
564 SDValue visitVP_STRIDED_STORE(SDNode *N);
565 SDValue visitFP_TO_FP16(SDNode *N);
566 SDValue visitFP16_TO_FP(SDNode *N);
567 SDValue visitFP_TO_BF16(SDNode *N);
568 SDValue visitBF16_TO_FP(SDNode *N);
569 SDValue visitVECREDUCE(SDNode *N);
570 SDValue visitVPOp(SDNode *N);
571 SDValue visitGET_FPENV_MEM(SDNode *N);
572 SDValue visitSET_FPENV_MEM(SDNode *N);
573
574 template <class MatchContextClass>
575 SDValue visitFADDForFMACombine(SDNode *N);
576 template <class MatchContextClass>
577 SDValue visitFSUBForFMACombine(SDNode *N);
578 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
579
580 SDValue XformToShuffleWithZero(SDNode *N);
581 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
582 const SDLoc &DL,
583 SDNode *N,
584 SDValue N0,
585 SDValue N1);
586 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
587 SDValue N1, SDNodeFlags Flags);
588 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
589 SDValue N1, SDNodeFlags Flags);
590 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
591 EVT VT, SDValue N0, SDValue N1,
592 SDNodeFlags Flags = SDNodeFlags());
593
594 SDValue visitShiftByConstant(SDNode *N);
595
596 SDValue foldSelectOfConstants(SDNode *N);
597 SDValue foldVSelectOfConstants(SDNode *N);
598 SDValue foldBinOpIntoSelect(SDNode *BO);
599 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
600 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
601 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
602 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
603 SDValue N2, SDValue N3, ISD::CondCode CC,
604 bool NotExtCompare = false);
605 SDValue convertSelectOfFPConstantsToLoadOffset(
606 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
607 ISD::CondCode CC);
608 SDValue foldSignChangeInBitcast(SDNode *N);
609 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
610 SDValue N2, SDValue N3, ISD::CondCode CC);
611 SDValue foldSelectOfBinops(SDNode *N);
612 SDValue foldSextSetcc(SDNode *N);
613 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
614 const SDLoc &DL);
615 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
616 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
617 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
618 SDValue False, ISD::CondCode CC, const SDLoc &DL);
619 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
620 SDValue False, ISD::CondCode CC, const SDLoc &DL);
621 SDValue unfoldMaskedMerge(SDNode *N);
622 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
623 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
624 const SDLoc &DL, bool foldBooleans);
625 SDValue rebuildSetCC(SDValue N);
626
627 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
628 SDValue &CC, bool MatchStrict = false) const;
629 bool isOneUseSetCC(SDValue N) const;
630
631 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
632 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
633
634 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
635 unsigned HiOp);
636 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
637 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
638 const TargetLowering &TLI);
639 SDValue foldPartialReduceMLAMulOp(SDNode *N);
640 SDValue foldPartialReduceAdd(SDNode *N);
641
642 SDValue CombineExtLoad(SDNode *N);
643 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
644 SDValue combineRepeatedFPDivisors(SDNode *N);
645 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
646 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
647 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
648 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
649 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
650 SDValue BuildSDIV(SDNode *N);
651 SDValue BuildSDIVPow2(SDNode *N);
652 SDValue BuildUDIV(SDNode *N);
653 SDValue BuildSREMPow2(SDNode *N);
654 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
655 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
656 bool KnownNeverZero = false,
657 bool InexpensiveOnly = false,
658 std::optional<EVT> OutVT = std::nullopt);
659 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
660 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
661 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
662 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
663 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
664 SDNodeFlags Flags, bool Reciprocal);
665 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
666 SDNodeFlags Flags, bool Reciprocal);
667 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
668 bool DemandHighBits = true);
669 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
670 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
671 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
672 bool HasPos, unsigned PosOpcode,
673 unsigned NegOpcode, const SDLoc &DL);
674 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
675 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
676 bool HasPos, unsigned PosOpcode,
677 unsigned NegOpcode, const SDLoc &DL);
678 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
679 bool FromAdd);
680 SDValue MatchLoadCombine(SDNode *N);
681 SDValue mergeTruncStores(StoreSDNode *N);
682 SDValue reduceLoadWidth(SDNode *N);
683 SDValue ReduceLoadOpStoreWidth(SDNode *N);
684 SDValue splitMergedValStore(StoreSDNode *ST);
685 SDValue TransformFPLoadStorePair(SDNode *N);
686 SDValue convertBuildVecZextToZext(SDNode *N);
687 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
688 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
689 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
690 SDValue reduceBuildVecToShuffle(SDNode *N);
691 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
692 ArrayRef<int> VectorMask, SDValue VecIn1,
693 SDValue VecIn2, unsigned LeftIdx,
694 bool DidSplitVec);
695 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
696
697 /// Walk up chain skipping non-aliasing memory nodes,
698 /// looking for aliasing nodes and adding them to the Aliases vector.
699 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
700 SmallVectorImpl<SDValue> &Aliases);
701
702 /// Return true if there is any possibility that the two addresses overlap.
703 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
704
705 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
706 /// chain (aliasing node.)
707 SDValue FindBetterChain(SDNode *N, SDValue Chain);
708
709 /// Try to replace a store and any possibly adjacent stores on
710 /// consecutive chains with better chains. Return true only if St is
711 /// replaced.
712 ///
713 /// Notice that other chains may still be replaced even if the function
714 /// returns false.
715 bool findBetterNeighborChains(StoreSDNode *St);
716
717 // Helper for findBetterNeighborChains. Walk up store chain add additional
718 // chained stores that do not overlap and can be parallelized.
719 bool parallelizeChainedStores(StoreSDNode *St);
720
721 /// Holds a pointer to an LSBaseSDNode as well as information on where it
722 /// is located in a sequence of memory operations connected by a chain.
723 struct MemOpLink {
724 // Ptr to the mem node.
725 LSBaseSDNode *MemNode;
726
727 // Offset from the base ptr.
728 int64_t OffsetFromBase;
729
730 MemOpLink(LSBaseSDNode *N, int64_t Offset)
731 : MemNode(N), OffsetFromBase(Offset) {}
732 };
733
734 // Classify the origin of a stored value.
735 enum class StoreSource { Unknown, Constant, Extract, Load };
736 StoreSource getStoreSource(SDValue StoreVal) {
737 switch (StoreVal.getOpcode()) {
738 case ISD::Constant:
739 case ISD::ConstantFP:
740 return StoreSource::Constant;
744 return StoreSource::Constant;
745 return StoreSource::Unknown;
748 return StoreSource::Extract;
749 case ISD::LOAD:
750 return StoreSource::Load;
751 default:
752 return StoreSource::Unknown;
753 }
754 }
755
756 /// This is a helper function for visitMUL to check the profitability
757 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
758 /// MulNode is the original multiply, AddNode is (add x, c1),
759 /// and ConstNode is c2.
760 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
761 SDValue ConstNode);
762
763 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
764 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
765 /// the type of the loaded value to be extended.
766 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
767 EVT LoadResultTy, EVT &ExtVT);
768
769 /// Helper function to calculate whether the given Load/Store can have its
770 /// width reduced to ExtVT.
771 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
772 EVT &MemVT, unsigned ShAmt = 0);
773
774 /// Used by BackwardsPropagateMask to find suitable loads.
775 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
776 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
777 ConstantSDNode *Mask, SDNode *&NodeToMask);
778 /// Attempt to propagate a given AND node back to load leaves so that they
779 /// can be combined into narrow loads.
780 bool BackwardsPropagateMask(SDNode *N);
781
782 /// Helper function for mergeConsecutiveStores which merges the component
783 /// store chains.
784 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
785 unsigned NumStores);
786
787 /// Helper function for mergeConsecutiveStores which checks if all the store
788 /// nodes have the same underlying object. We can still reuse the first
789 /// store's pointer info if all the stores are from the same object.
790 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
791
792 /// This is a helper function for mergeConsecutiveStores. When the source
793 /// elements of the consecutive stores are all constants or all extracted
794 /// vector elements, try to merge them into one larger store introducing
795 /// bitcasts if necessary. \return True if a merged store was created.
796 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
797 EVT MemVT, unsigned NumStores,
798 bool IsConstantSrc, bool UseVector,
799 bool UseTrunc);
800
801 /// This is a helper function for mergeConsecutiveStores. Stores that
802 /// potentially may be merged with St are placed in StoreNodes. On success,
803 /// returns a chain predecessor to all store candidates.
804 SDNode *getStoreMergeCandidates(StoreSDNode *St,
805 SmallVectorImpl<MemOpLink> &StoreNodes);
806
807 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
808 /// have indirect dependency through their operands. RootNode is the
809 /// predecessor to all stores calculated by getStoreMergeCandidates and is
810 /// used to prune the dependency check. \return True if safe to merge.
811 bool checkMergeStoreCandidatesForDependencies(
812 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
813 SDNode *RootNode);
814
815 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
816 /// chain has a call in it. \return True if a call is found.
817 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
818
819 /// This is a helper function for mergeConsecutiveStores. Given a list of
820 /// store candidates, find the first N that are consecutive in memory.
821 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
822 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
823 int64_t ElementSizeBytes) const;
824
825 /// This is a helper function for mergeConsecutiveStores. It is used for
826 /// store chains that are composed entirely of constant values.
827 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
828 unsigned NumConsecutiveStores,
829 EVT MemVT, SDNode *Root, bool AllowVectors);
830
831 /// This is a helper function for mergeConsecutiveStores. It is used for
832 /// store chains that are composed entirely of extracted vector elements.
833 /// When extracting multiple vector elements, try to store them in one
834 /// vector store rather than a sequence of scalar stores.
835 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
836 unsigned NumConsecutiveStores, EVT MemVT,
837 SDNode *Root);
838
839 /// This is a helper function for mergeConsecutiveStores. It is used for
840 /// store chains that are composed entirely of loaded values.
841 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
842 unsigned NumConsecutiveStores, EVT MemVT,
843 SDNode *Root, bool AllowVectors,
844 bool IsNonTemporalStore, bool IsNonTemporalLoad);
845
846 /// Merge consecutive store operations into a wide store.
847 /// This optimization uses wide integers or vectors when possible.
848 /// \return true if stores were merged.
849 bool mergeConsecutiveStores(StoreSDNode *St);
850
851 /// Try to transform a truncation where C is a constant:
852 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
853 ///
854 /// \p N needs to be a truncation and its first operand an AND. Other
855 /// requirements are checked by the function (e.g. that trunc is
856 /// single-use) and if missed an empty SDValue is returned.
857 SDValue distributeTruncateThroughAnd(SDNode *N);
858
859 /// Helper function to determine whether the target supports operation
860 /// given by \p Opcode for type \p VT, that is, whether the operation
861 /// is legal or custom before legalizing operations, and whether is
862 /// legal (but not custom) after legalization.
863 bool hasOperation(unsigned Opcode, EVT VT) {
864 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
865 }
866
867 bool hasUMin(EVT VT) const {
868 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
869 return (LK.first == TargetLoweringBase::TypeLegal ||
871 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
872 }
873
874 public:
875 /// Runs the dag combiner on all nodes in the work list
876 void Run(CombineLevel AtLevel);
877
878 SelectionDAG &getDAG() const { return DAG; }
879
880 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
881 EVT getShiftAmountTy(EVT LHSTy) {
882 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
883 }
884
885 /// This method returns true if we are running before type legalization or
886 /// if the specified VT is legal.
887 bool isTypeLegal(const EVT &VT) {
888 if (!LegalTypes) return true;
889 return TLI.isTypeLegal(VT);
890 }
891
892 /// Convenience wrapper around TargetLowering::getSetCCResultType
893 EVT getSetCCResultType(EVT VT) const {
894 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
895 }
896
897 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
898 SDValue OrigLoad, SDValue ExtLoad,
899 ISD::NodeType ExtType);
900 };
901
902/// This class is a DAGUpdateListener that removes any deleted
903/// nodes from the worklist.
904class WorklistRemover : public SelectionDAG::DAGUpdateListener {
905 DAGCombiner &DC;
906
907public:
908 explicit WorklistRemover(DAGCombiner &dc)
909 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
910
911 void NodeDeleted(SDNode *N, SDNode *E) override {
912 DC.removeFromWorklist(N);
913 }
914};
915
916class WorklistInserter : public SelectionDAG::DAGUpdateListener {
917 DAGCombiner &DC;
918
919public:
920 explicit WorklistInserter(DAGCombiner &dc)
921 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
922
923 // FIXME: Ideally we could add N to the worklist, but this causes exponential
924 // compile time costs in large DAGs, e.g. Halide.
925 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
926};
927
928} // end anonymous namespace
929
930//===----------------------------------------------------------------------===//
931// TargetLowering::DAGCombinerInfo implementation
932//===----------------------------------------------------------------------===//
933
935 ((DAGCombiner*)DC)->AddToWorklist(N);
936}
937
939CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
940 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
941}
942
944CombineTo(SDNode *N, SDValue Res, bool AddTo) {
945 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
946}
947
949CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
950 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
951}
952
955 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
956}
957
960 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
961}
962
963//===----------------------------------------------------------------------===//
964// Helper Functions
965//===----------------------------------------------------------------------===//
966
967void DAGCombiner::deleteAndRecombine(SDNode *N) {
968 removeFromWorklist(N);
969
970 // If the operands of this node are only used by the node, they will now be
971 // dead. Make sure to re-visit them and recursively delete dead nodes.
972 for (const SDValue &Op : N->ops())
973 // For an operand generating multiple values, one of the values may
974 // become dead allowing further simplification (e.g. split index
975 // arithmetic from an indexed load).
976 if (Op->hasOneUse() || Op->getNumValues() > 1)
977 AddToWorklist(Op.getNode());
978
979 DAG.DeleteNode(N);
980}
981
982// APInts must be the same size for most operations, this helper
983// function zero extends the shorter of the pair so that they match.
984// We provide an Offset so that we can create bitwidths that won't overflow.
985static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
986 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
987 LHS = LHS.zext(Bits);
988 RHS = RHS.zext(Bits);
989}
990
991// Return true if this node is a setcc, or is a select_cc
992// that selects between the target values used for true and false, making it
993// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
994// the appropriate nodes based on the type of node we are checking. This
995// simplifies life a bit for the callers.
996bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
997 SDValue &CC, bool MatchStrict) const {
998 if (N.getOpcode() == ISD::SETCC) {
999 LHS = N.getOperand(0);
1000 RHS = N.getOperand(1);
1001 CC = N.getOperand(2);
1002 return true;
1003 }
1004
1005 if (MatchStrict &&
1006 (N.getOpcode() == ISD::STRICT_FSETCC ||
1007 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1008 LHS = N.getOperand(1);
1009 RHS = N.getOperand(2);
1010 CC = N.getOperand(3);
1011 return true;
1012 }
1013
1014 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1015 !TLI.isConstFalseVal(N.getOperand(3)))
1016 return false;
1017
1018 if (TLI.getBooleanContents(N.getValueType()) ==
1020 return false;
1021
1022 LHS = N.getOperand(0);
1023 RHS = N.getOperand(1);
1024 CC = N.getOperand(4);
1025 return true;
1026}
1027
1028/// Return true if this is a SetCC-equivalent operation with only one use.
1029/// If this is true, it allows the users to invert the operation for free when
1030/// it is profitable to do so.
1031bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1032 SDValue N0, N1, N2;
1033 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1034 return true;
1035 return false;
1036}
1037
1039 if (!ScalarTy.isSimple())
1040 return false;
1041
1042 uint64_t MaskForTy = 0ULL;
1043 switch (ScalarTy.getSimpleVT().SimpleTy) {
1044 case MVT::i8:
1045 MaskForTy = 0xFFULL;
1046 break;
1047 case MVT::i16:
1048 MaskForTy = 0xFFFFULL;
1049 break;
1050 case MVT::i32:
1051 MaskForTy = 0xFFFFFFFFULL;
1052 break;
1053 default:
1054 return false;
1055 break;
1056 }
1057
1058 APInt Val;
1059 if (ISD::isConstantSplatVector(N, Val))
1060 return Val.getLimitedValue() == MaskForTy;
1061
1062 return false;
1063}
1064
1065// Determines if it is a constant integer or a splat/build vector of constant
1066// integers (and undefs).
1067// Do not permit build vector implicit truncation.
1068static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1070 return !(Const->isOpaque() && NoOpaques);
1071 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1072 return false;
1073 unsigned BitWidth = N.getScalarValueSizeInBits();
1074 for (const SDValue &Op : N->op_values()) {
1075 if (Op.isUndef())
1076 continue;
1078 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1079 (Const->isOpaque() && NoOpaques))
1080 return false;
1081 }
1082 return true;
1083}
1084
1085// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1086// undef's.
1087static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1088 if (V.getOpcode() != ISD::BUILD_VECTOR)
1089 return false;
1090 return isConstantOrConstantVector(V, NoOpaques) ||
1092}
1093
1094// Determine if this an indexed load with an opaque target constant index.
1095static bool canSplitIdx(LoadSDNode *LD) {
1096 return MaySplitLoadIndex &&
1097 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1098 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1099}
1100
1101bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1102 const SDLoc &DL,
1103 SDNode *N,
1104 SDValue N0,
1105 SDValue N1) {
1106 // Currently this only tries to ensure we don't undo the GEP splits done by
1107 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1108 // we check if the following transformation would be problematic:
1109 // (load/store (add, (add, x, offset1), offset2)) ->
1110 // (load/store (add, x, offset1+offset2)).
1111
1112 // (load/store (add, (add, x, y), offset2)) ->
1113 // (load/store (add, (add, x, offset2), y)).
1114
1115 if (!N0.isAnyAdd())
1116 return false;
1117
1118 // Check for vscale addressing modes.
1119 // (load/store (add/sub (add x, y), vscale))
1120 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1121 // (load/store (add/sub (add x, y), (mul vscale, C)))
1122 if ((N1.getOpcode() == ISD::VSCALE ||
1123 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1124 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1126 N1.getValueType().getFixedSizeInBits() <= 64) {
1127 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1128 ? N1.getConstantOperandVal(0)
1129 : (N1.getOperand(0).getConstantOperandVal(0) *
1130 (N1.getOpcode() == ISD::SHL
1131 ? (1LL << N1.getConstantOperandVal(1))
1132 : N1.getConstantOperandVal(1)));
1133 if (Opc == ISD::SUB)
1134 ScalableOffset = -ScalableOffset;
1135 if (all_of(N->users(), [&](SDNode *Node) {
1136 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1137 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1138 TargetLoweringBase::AddrMode AM;
1139 AM.HasBaseReg = true;
1140 AM.ScalableOffset = ScalableOffset;
1141 EVT VT = LoadStore->getMemoryVT();
1142 unsigned AS = LoadStore->getAddressSpace();
1143 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1144 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1145 AS);
1146 }
1147 return false;
1148 }))
1149 return true;
1150 }
1151
1152 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1153 return false;
1154
1155 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1156 if (!C2)
1157 return false;
1158
1159 const APInt &C2APIntVal = C2->getAPIntValue();
1160 if (C2APIntVal.getSignificantBits() > 64)
1161 return false;
1162
1163 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1164 if (N0.hasOneUse())
1165 return false;
1166
1167 const APInt &C1APIntVal = C1->getAPIntValue();
1168 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1169 if (CombinedValueIntVal.getSignificantBits() > 64)
1170 return false;
1171 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1172
1173 for (SDNode *Node : N->users()) {
1174 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1175 // Is x[offset2] already not a legal addressing mode? If so then
1176 // reassociating the constants breaks nothing (we test offset2 because
1177 // that's the one we hope to fold into the load or store).
1178 TargetLoweringBase::AddrMode AM;
1179 AM.HasBaseReg = true;
1180 AM.BaseOffs = C2APIntVal.getSExtValue();
1181 EVT VT = LoadStore->getMemoryVT();
1182 unsigned AS = LoadStore->getAddressSpace();
1183 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1184 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1185 continue;
1186
1187 // Would x[offset1+offset2] still be a legal addressing mode?
1188 AM.BaseOffs = CombinedValue;
1189 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1190 return true;
1191 }
1192 }
1193 } else {
1194 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1195 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1196 return false;
1197
1198 for (SDNode *Node : N->users()) {
1199 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1200 if (!LoadStore)
1201 return false;
1202
1203 // Is x[offset2] a legal addressing mode? If so then
1204 // reassociating the constants breaks address pattern
1205 TargetLoweringBase::AddrMode AM;
1206 AM.HasBaseReg = true;
1207 AM.BaseOffs = C2APIntVal.getSExtValue();
1208 EVT VT = LoadStore->getMemoryVT();
1209 unsigned AS = LoadStore->getAddressSpace();
1210 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1211 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1212 return false;
1213 }
1214 return true;
1215 }
1216
1217 return false;
1218}
1219
1220/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1221/// \p N0 is the same kind of operation as \p Opc.
1222SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1223 SDValue N0, SDValue N1,
1224 SDNodeFlags Flags) {
1225 EVT VT = N0.getValueType();
1226
1227 if (N0.getOpcode() != Opc)
1228 return SDValue();
1229
1230 SDValue N00 = N0.getOperand(0);
1231 SDValue N01 = N0.getOperand(1);
1232
1234 SDNodeFlags NewFlags;
1235 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1236 Flags.hasNoUnsignedWrap())
1237 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1238
1240 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1241 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1242 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1243 N0->getFlags().hasDisjoint());
1244 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1245 }
1246 return SDValue();
1247 }
1248 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1249 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1250 // iff (op x, c1) has one use
1251 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1252 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1253 }
1254 }
1255
1256 // Check for repeated operand logic simplifications.
1257 if (Opc == ISD::AND || Opc == ISD::OR) {
1258 // (N00 & N01) & N00 --> N00 & N01
1259 // (N00 & N01) & N01 --> N00 & N01
1260 // (N00 | N01) | N00 --> N00 | N01
1261 // (N00 | N01) | N01 --> N00 | N01
1262 if (N1 == N00 || N1 == N01)
1263 return N0;
1264 }
1265 if (Opc == ISD::XOR) {
1266 // (N00 ^ N01) ^ N00 --> N01
1267 if (N1 == N00)
1268 return N01;
1269 // (N00 ^ N01) ^ N01 --> N00
1270 if (N1 == N01)
1271 return N00;
1272 }
1273
1274 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1275 if (N1 != N01) {
1276 // Reassociate if (op N00, N1) already exist
1277 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1278 // if Op (Op N00, N1), N01 already exist
1279 // we need to stop reassciate to avoid dead loop
1280 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1281 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1282 }
1283 }
1284
1285 if (N1 != N00) {
1286 // Reassociate if (op N01, N1) already exist
1287 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1288 // if Op (Op N01, N1), N00 already exist
1289 // we need to stop reassciate to avoid dead loop
1290 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1291 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1292 }
1293 }
1294
1295 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1296 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1297 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1298 // comparisons with the same predicate. This enables optimizations as the
1299 // following one:
1300 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1301 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1302 if (Opc == ISD::AND || Opc == ISD::OR) {
1303 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1304 N01->getOpcode() == ISD::SETCC) {
1305 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1306 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1307 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1308 if (CC1 == CC00 && CC1 != CC01) {
1309 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1310 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1311 }
1312 if (CC1 == CC01 && CC1 != CC00) {
1313 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1314 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1315 }
1316 }
1317 }
1318 }
1319
1320 return SDValue();
1321}
1322
1323/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1324/// same kind of operation as \p Opc.
1325SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1326 SDValue N1, SDNodeFlags Flags) {
1327 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1328
1329 // Floating-point reassociation is not allowed without loose FP math.
1330 if (N0.getValueType().isFloatingPoint() ||
1332 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1333 return SDValue();
1334
1335 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1336 return Combined;
1337 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1338 return Combined;
1339 return SDValue();
1340}
1341
1342// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1343// Note that we only expect Flags to be passed from FP operations. For integer
1344// operations they need to be dropped.
1345SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1346 const SDLoc &DL, EVT VT, SDValue N0,
1347 SDValue N1, SDNodeFlags Flags) {
1348 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1349 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1350 N0->hasOneUse() && N1->hasOneUse() &&
1352 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1353 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1354 return DAG.getNode(RedOpc, DL, VT,
1355 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1356 N0.getOperand(0), N1.getOperand(0)));
1357 }
1358
1359 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1360 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1361 // single node.
1362 SDValue A, B, C, D, RedA, RedB;
1363 if (sd_match(N0, m_OneUse(m_c_BinOp(
1364 Opc,
1365 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1366 m_Value(RedA)),
1367 m_Value(B)))) &&
1369 Opc,
1370 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1371 m_Value(RedB)),
1372 m_Value(D)))) &&
1373 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1374 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1375 A.getValueType() == C.getValueType() &&
1376 hasOperation(Opc, A.getValueType()) &&
1377 TLI.shouldReassociateReduction(RedOpc, VT)) {
1378 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1379 (!N0->getFlags().hasAllowReassociation() ||
1381 !RedA->getFlags().hasAllowReassociation() ||
1382 !RedB->getFlags().hasAllowReassociation()))
1383 return SDValue();
1384 SelectionDAG::FlagInserter FlagsInserter(
1385 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1386 RedB->getFlags());
1387 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1388 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1389 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1390 return DAG.getNode(Opc, DL, VT, Red, Op2);
1391 }
1392 return SDValue();
1393}
1394
1395SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1396 bool AddTo) {
1397 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1398 ++NodesCombined;
1399 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1400 To[0].dump(&DAG);
1401 dbgs() << " and " << NumTo - 1 << " other values\n");
1402 for (unsigned i = 0, e = NumTo; i != e; ++i)
1403 assert((!To[i].getNode() ||
1404 N->getValueType(i) == To[i].getValueType()) &&
1405 "Cannot combine value to value of different type!");
1406
1407 WorklistRemover DeadNodes(*this);
1408 DAG.ReplaceAllUsesWith(N, To);
1409 if (AddTo) {
1410 // Push the new nodes and any users onto the worklist
1411 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1412 if (To[i].getNode())
1413 AddToWorklistWithUsers(To[i].getNode());
1414 }
1415 }
1416
1417 // Finally, if the node is now dead, remove it from the graph. The node
1418 // may not be dead if the replacement process recursively simplified to
1419 // something else needing this node.
1420 if (N->use_empty())
1421 deleteAndRecombine(N);
1422 return SDValue(N, 0);
1423}
1424
1425void DAGCombiner::
1426CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1427 // Replace the old value with the new one.
1428 ++NodesCombined;
1429 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1430 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1431
1432 // Replace all uses.
1433 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1434
1435 // Push the new node and any (possibly new) users onto the worklist.
1436 AddToWorklistWithUsers(TLO.New.getNode());
1437
1438 // Finally, if the node is now dead, remove it from the graph.
1439 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1440}
1441
1442/// Check the specified integer node value to see if it can be simplified or if
1443/// things it uses can be simplified by bit propagation. If so, return true.
1444bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1445 const APInt &DemandedElts,
1446 bool AssumeSingleUse) {
1447 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1448 KnownBits Known;
1449 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1450 AssumeSingleUse))
1451 return false;
1452
1453 // Revisit the node.
1454 AddToWorklist(Op.getNode());
1455
1456 CommitTargetLoweringOpt(TLO);
1457 return true;
1458}
1459
1460/// Check the specified vector node value to see if it can be simplified or
1461/// if things it uses can be simplified as it only uses some of the elements.
1462/// If so, return true.
1463bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1464 const APInt &DemandedElts,
1465 bool AssumeSingleUse) {
1466 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1467 APInt KnownUndef, KnownZero;
1468 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1469 TLO, 0, AssumeSingleUse))
1470 return false;
1471
1472 // Revisit the node.
1473 AddToWorklist(Op.getNode());
1474
1475 CommitTargetLoweringOpt(TLO);
1476 return true;
1477}
1478
1479void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1480 SDLoc DL(Load);
1481 EVT VT = Load->getValueType(0);
1482 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1483
1484 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1485 Trunc.dump(&DAG); dbgs() << '\n');
1486
1487 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1488 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1489
1490 AddToWorklist(Trunc.getNode());
1491 recursivelyDeleteUnusedNodes(Load);
1492}
1493
1494SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1495 Replace = false;
1496 SDLoc DL(Op);
1497 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1498 LoadSDNode *LD = cast<LoadSDNode>(Op);
1499 EVT MemVT = LD->getMemoryVT();
1501 : LD->getExtensionType();
1502 Replace = true;
1503 return DAG.getExtLoad(ExtType, DL, PVT,
1504 LD->getChain(), LD->getBasePtr(),
1505 MemVT, LD->getMemOperand());
1506 }
1507
1508 unsigned Opc = Op.getOpcode();
1509 switch (Opc) {
1510 default: break;
1511 case ISD::AssertSext:
1512 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1513 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1514 break;
1515 case ISD::AssertZext:
1516 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1517 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1518 break;
1519 case ISD::Constant: {
1520 unsigned ExtOpc =
1521 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1522 return DAG.getNode(ExtOpc, DL, PVT, Op);
1523 }
1524 }
1525
1526 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1527 return SDValue();
1528 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1529}
1530
1531SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1533 return SDValue();
1534 EVT OldVT = Op.getValueType();
1535 SDLoc DL(Op);
1536 bool Replace = false;
1537 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1538 if (!NewOp.getNode())
1539 return SDValue();
1540 AddToWorklist(NewOp.getNode());
1541
1542 if (Replace)
1543 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1544 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1545 DAG.getValueType(OldVT));
1546}
1547
1548SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1549 EVT OldVT = Op.getValueType();
1550 SDLoc DL(Op);
1551 bool Replace = false;
1552 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1553 if (!NewOp.getNode())
1554 return SDValue();
1555 AddToWorklist(NewOp.getNode());
1556
1557 if (Replace)
1558 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1559 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1560}
1561
1562/// Promote the specified integer binary operation if the target indicates it is
1563/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1564/// i32 since i16 instructions are longer.
1565SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1566 if (!LegalOperations)
1567 return SDValue();
1568
1569 EVT VT = Op.getValueType();
1570 if (VT.isVector() || !VT.isInteger())
1571 return SDValue();
1572
1573 // If operation type is 'undesirable', e.g. i16 on x86, consider
1574 // promoting it.
1575 unsigned Opc = Op.getOpcode();
1576 if (TLI.isTypeDesirableForOp(Opc, VT))
1577 return SDValue();
1578
1579 EVT PVT = VT;
1580 // Consult target whether it is a good idea to promote this operation and
1581 // what's the right type to promote it to.
1582 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1583 assert(PVT != VT && "Don't know what type to promote to!");
1584
1585 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1586
1587 bool Replace0 = false;
1588 SDValue N0 = Op.getOperand(0);
1589 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1590
1591 bool Replace1 = false;
1592 SDValue N1 = Op.getOperand(1);
1593 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1594 SDLoc DL(Op);
1595
1596 SDValue RV =
1597 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1598
1599 // We are always replacing N0/N1's use in N and only need additional
1600 // replacements if there are additional uses.
1601 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1602 // (SDValue) here because the node may reference multiple values
1603 // (for example, the chain value of a load node).
1604 Replace0 &= !N0->hasOneUse();
1605 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1606
1607 // Combine Op here so it is preserved past replacements.
1608 CombineTo(Op.getNode(), RV);
1609
1610 // If operands have a use ordering, make sure we deal with
1611 // predecessor first.
1612 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1613 std::swap(N0, N1);
1614 std::swap(NN0, NN1);
1615 }
1616
1617 if (Replace0) {
1618 AddToWorklist(NN0.getNode());
1619 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1620 }
1621 if (Replace1) {
1622 AddToWorklist(NN1.getNode());
1623 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1624 }
1625 return Op;
1626 }
1627 return SDValue();
1628}
1629
1630/// Promote the specified integer shift operation if the target indicates it is
1631/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1632/// i32 since i16 instructions are longer.
1633SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1634 if (!LegalOperations)
1635 return SDValue();
1636
1637 EVT VT = Op.getValueType();
1638 if (VT.isVector() || !VT.isInteger())
1639 return SDValue();
1640
1641 // If operation type is 'undesirable', e.g. i16 on x86, consider
1642 // promoting it.
1643 unsigned Opc = Op.getOpcode();
1644 if (TLI.isTypeDesirableForOp(Opc, VT))
1645 return SDValue();
1646
1647 EVT PVT = VT;
1648 // Consult target whether it is a good idea to promote this operation and
1649 // what's the right type to promote it to.
1650 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1651 assert(PVT != VT && "Don't know what type to promote to!");
1652
1653 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1654
1655 bool Replace = false;
1656 SDValue N0 = Op.getOperand(0);
1657 if (Opc == ISD::SRA)
1658 N0 = SExtPromoteOperand(N0, PVT);
1659 else if (Opc == ISD::SRL)
1660 N0 = ZExtPromoteOperand(N0, PVT);
1661 else
1662 N0 = PromoteOperand(N0, PVT, Replace);
1663
1664 if (!N0.getNode())
1665 return SDValue();
1666
1667 SDLoc DL(Op);
1668 SDValue N1 = Op.getOperand(1);
1669 SDValue RV =
1670 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1671
1672 if (Replace)
1673 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1674
1675 // Deal with Op being deleted.
1676 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1677 return RV;
1678 }
1679 return SDValue();
1680}
1681
1682SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1683 if (!LegalOperations)
1684 return SDValue();
1685
1686 EVT VT = Op.getValueType();
1687 if (VT.isVector() || !VT.isInteger())
1688 return SDValue();
1689
1690 // If operation type is 'undesirable', e.g. i16 on x86, consider
1691 // promoting it.
1692 unsigned Opc = Op.getOpcode();
1693 if (TLI.isTypeDesirableForOp(Opc, VT))
1694 return SDValue();
1695
1696 EVT PVT = VT;
1697 // Consult target whether it is a good idea to promote this operation and
1698 // what's the right type to promote it to.
1699 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1700 assert(PVT != VT && "Don't know what type to promote to!");
1701 // fold (aext (aext x)) -> (aext x)
1702 // fold (aext (zext x)) -> (zext x)
1703 // fold (aext (sext x)) -> (sext x)
1704 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1705 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1706 }
1707 return SDValue();
1708}
1709
1710bool DAGCombiner::PromoteLoad(SDValue Op) {
1711 if (!LegalOperations)
1712 return false;
1713
1714 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1715 return false;
1716
1717 EVT VT = Op.getValueType();
1718 if (VT.isVector() || !VT.isInteger())
1719 return false;
1720
1721 // If operation type is 'undesirable', e.g. i16 on x86, consider
1722 // promoting it.
1723 unsigned Opc = Op.getOpcode();
1724 if (TLI.isTypeDesirableForOp(Opc, VT))
1725 return false;
1726
1727 EVT PVT = VT;
1728 // Consult target whether it is a good idea to promote this operation and
1729 // what's the right type to promote it to.
1730 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1731 assert(PVT != VT && "Don't know what type to promote to!");
1732
1733 SDLoc DL(Op);
1734 SDNode *N = Op.getNode();
1735 LoadSDNode *LD = cast<LoadSDNode>(N);
1736 EVT MemVT = LD->getMemoryVT();
1738 : LD->getExtensionType();
1739 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1740 LD->getChain(), LD->getBasePtr(),
1741 MemVT, LD->getMemOperand());
1742 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1743
1744 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1745 Result.dump(&DAG); dbgs() << '\n');
1746
1747 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1748 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1749
1750 AddToWorklist(Result.getNode());
1751 recursivelyDeleteUnusedNodes(N);
1752 return true;
1753 }
1754
1755 return false;
1756}
1757
1758/// Recursively delete a node which has no uses and any operands for
1759/// which it is the only use.
1760///
1761/// Note that this both deletes the nodes and removes them from the worklist.
1762/// It also adds any nodes who have had a user deleted to the worklist as they
1763/// may now have only one use and subject to other combines.
1764bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1765 if (!N->use_empty())
1766 return false;
1767
1768 SmallSetVector<SDNode *, 16> Nodes;
1769 Nodes.insert(N);
1770 do {
1771 N = Nodes.pop_back_val();
1772 if (!N)
1773 continue;
1774
1775 if (N->use_empty()) {
1776 for (const SDValue &ChildN : N->op_values())
1777 Nodes.insert(ChildN.getNode());
1778
1779 removeFromWorklist(N);
1780 DAG.DeleteNode(N);
1781 } else {
1782 AddToWorklist(N);
1783 }
1784 } while (!Nodes.empty());
1785 return true;
1786}
1787
1788//===----------------------------------------------------------------------===//
1789// Main DAG Combiner implementation
1790//===----------------------------------------------------------------------===//
1791
1792void DAGCombiner::Run(CombineLevel AtLevel) {
1793 // set the instance variables, so that the various visit routines may use it.
1794 Level = AtLevel;
1795 LegalDAG = Level >= AfterLegalizeDAG;
1796 LegalOperations = Level >= AfterLegalizeVectorOps;
1797 LegalTypes = Level >= AfterLegalizeTypes;
1798
1799 WorklistInserter AddNodes(*this);
1800
1801 // Add all the dag nodes to the worklist.
1802 //
1803 // Note: All nodes are not added to PruningList here, this is because the only
1804 // nodes which can be deleted are those which have no uses and all other nodes
1805 // which would otherwise be added to the worklist by the first call to
1806 // getNextWorklistEntry are already present in it.
1807 for (SDNode &Node : DAG.allnodes())
1808 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1809
1810 // Create a dummy node (which is not added to allnodes), that adds a reference
1811 // to the root node, preventing it from being deleted, and tracking any
1812 // changes of the root.
1813 HandleSDNode Dummy(DAG.getRoot());
1814
1815 // While we have a valid worklist entry node, try to combine it.
1816 while (SDNode *N = getNextWorklistEntry()) {
1817 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1818 // N is deleted from the DAG, since they too may now be dead or may have a
1819 // reduced number of uses, allowing other xforms.
1820 if (recursivelyDeleteUnusedNodes(N))
1821 continue;
1822
1823 WorklistRemover DeadNodes(*this);
1824
1825 // If this combine is running after legalizing the DAG, re-legalize any
1826 // nodes pulled off the worklist.
1827 if (LegalDAG) {
1828 SmallSetVector<SDNode *, 16> UpdatedNodes;
1829 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1830
1831 for (SDNode *LN : UpdatedNodes)
1832 AddToWorklistWithUsers(LN);
1833
1834 if (!NIsValid)
1835 continue;
1836 }
1837
1838 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1839
1840 // Add any operands of the new node which have not yet been combined to the
1841 // worklist as well. getNextWorklistEntry flags nodes that have been
1842 // combined before. Because the worklist uniques things already, this won't
1843 // repeatedly process the same operand.
1844 for (const SDValue &ChildN : N->op_values())
1845 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1846 /*SkipIfCombinedBefore=*/true);
1847
1848 SDValue RV = combine(N);
1849
1850 if (!RV.getNode())
1851 continue;
1852
1853 ++NodesCombined;
1854
1855 // Invalidate cached info.
1856 ChainsWithoutMergeableStores.clear();
1857
1858 // If we get back the same node we passed in, rather than a new node or
1859 // zero, we know that the node must have defined multiple values and
1860 // CombineTo was used. Since CombineTo takes care of the worklist
1861 // mechanics for us, we have no work to do in this case.
1862 if (RV.getNode() == N)
1863 continue;
1864
1865 assert(N->getOpcode() != ISD::DELETED_NODE &&
1866 RV.getOpcode() != ISD::DELETED_NODE &&
1867 "Node was deleted but visit returned new node!");
1868
1869 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1870
1871 if (N->getNumValues() == RV->getNumValues())
1872 DAG.ReplaceAllUsesWith(N, RV.getNode());
1873 else {
1874 assert(N->getValueType(0) == RV.getValueType() &&
1875 N->getNumValues() == 1 && "Type mismatch");
1876 DAG.ReplaceAllUsesWith(N, &RV);
1877 }
1878
1879 // Push the new node and any users onto the worklist. Omit this if the
1880 // new node is the EntryToken (e.g. if a store managed to get optimized
1881 // out), because re-visiting the EntryToken and its users will not uncover
1882 // any additional opportunities, but there may be a large number of such
1883 // users, potentially causing compile time explosion.
1884 if (RV.getOpcode() != ISD::EntryToken)
1885 AddToWorklistWithUsers(RV.getNode());
1886
1887 // Finally, if the node is now dead, remove it from the graph. The node
1888 // may not be dead if the replacement process recursively simplified to
1889 // something else needing this node. This will also take care of adding any
1890 // operands which have lost a user to the worklist.
1891 recursivelyDeleteUnusedNodes(N);
1892 }
1893
1894 // If the root changed (e.g. it was a dead load, update the root).
1895 DAG.setRoot(Dummy.getValue());
1896 DAG.RemoveDeadNodes();
1897}
1898
1899SDValue DAGCombiner::visit(SDNode *N) {
1900 // clang-format off
1901 switch (N->getOpcode()) {
1902 default: break;
1903 case ISD::TokenFactor: return visitTokenFactor(N);
1904 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1905 case ISD::ADD: return visitADD(N);
1906 case ISD::PTRADD: return visitPTRADD(N);
1907 case ISD::SUB: return visitSUB(N);
1908 case ISD::SADDSAT:
1909 case ISD::UADDSAT: return visitADDSAT(N);
1910 case ISD::SSUBSAT:
1911 case ISD::USUBSAT: return visitSUBSAT(N);
1912 case ISD::ADDC: return visitADDC(N);
1913 case ISD::SADDO:
1914 case ISD::UADDO: return visitADDO(N);
1915 case ISD::SUBC: return visitSUBC(N);
1916 case ISD::SSUBO:
1917 case ISD::USUBO: return visitSUBO(N);
1918 case ISD::ADDE: return visitADDE(N);
1919 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1920 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1921 case ISD::SUBE: return visitSUBE(N);
1922 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1923 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1924 case ISD::SMULFIX:
1925 case ISD::SMULFIXSAT:
1926 case ISD::UMULFIX:
1927 case ISD::UMULFIXSAT: return visitMULFIX(N);
1928 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1929 case ISD::SDIV: return visitSDIV(N);
1930 case ISD::UDIV: return visitUDIV(N);
1931 case ISD::SREM:
1932 case ISD::UREM: return visitREM(N);
1933 case ISD::MULHU: return visitMULHU(N);
1934 case ISD::MULHS: return visitMULHS(N);
1935 case ISD::AVGFLOORS:
1936 case ISD::AVGFLOORU:
1937 case ISD::AVGCEILS:
1938 case ISD::AVGCEILU: return visitAVG(N);
1939 case ISD::ABDS:
1940 case ISD::ABDU: return visitABD(N);
1941 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1942 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1943 case ISD::SMULO:
1944 case ISD::UMULO: return visitMULO(N);
1945 case ISD::SMIN:
1946 case ISD::SMAX:
1947 case ISD::UMIN:
1948 case ISD::UMAX: return visitIMINMAX(N);
1949 case ISD::AND: return visitAND(N);
1950 case ISD::OR: return visitOR(N);
1951 case ISD::XOR: return visitXOR(N);
1952 case ISD::SHL: return visitSHL(N);
1953 case ISD::SRA: return visitSRA(N);
1954 case ISD::SRL: return visitSRL(N);
1955 case ISD::ROTR:
1956 case ISD::ROTL: return visitRotate(N);
1957 case ISD::FSHL:
1958 case ISD::FSHR: return visitFunnelShift(N);
1959 case ISD::SSHLSAT:
1960 case ISD::USHLSAT: return visitSHLSAT(N);
1961 case ISD::ABS: return visitABS(N);
1962 case ISD::BSWAP: return visitBSWAP(N);
1963 case ISD::BITREVERSE: return visitBITREVERSE(N);
1964 case ISD::CTLZ: return visitCTLZ(N);
1965 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1966 case ISD::CTTZ: return visitCTTZ(N);
1967 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1968 case ISD::CTPOP: return visitCTPOP(N);
1969 case ISD::SELECT: return visitSELECT(N);
1970 case ISD::VSELECT: return visitVSELECT(N);
1971 case ISD::SELECT_CC: return visitSELECT_CC(N);
1972 case ISD::SETCC: return visitSETCC(N);
1973 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1974 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1975 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1976 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1977 case ISD::AssertSext:
1978 case ISD::AssertZext: return visitAssertExt(N);
1979 case ISD::AssertAlign: return visitAssertAlign(N);
1980 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1983 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1984 case ISD::TRUNCATE: return visitTRUNCATE(N);
1985 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1986 case ISD::BITCAST: return visitBITCAST(N);
1987 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1988 case ISD::FADD: return visitFADD(N);
1989 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1990 case ISD::FSUB: return visitFSUB(N);
1991 case ISD::FMUL: return visitFMUL(N);
1992 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1993 case ISD::FMAD: return visitFMAD(N);
1994 case ISD::FDIV: return visitFDIV(N);
1995 case ISD::FREM: return visitFREM(N);
1996 case ISD::FSQRT: return visitFSQRT(N);
1997 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1998 case ISD::FPOW: return visitFPOW(N);
1999 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2000 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2001 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2002 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2003 case ISD::LROUND:
2004 case ISD::LLROUND:
2005 case ISD::LRINT:
2006 case ISD::LLRINT: return visitXROUND(N);
2007 case ISD::FP_ROUND: return visitFP_ROUND(N);
2008 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2009 case ISD::FNEG: return visitFNEG(N);
2010 case ISD::FABS: return visitFABS(N);
2011 case ISD::FFLOOR: return visitFFLOOR(N);
2012 case ISD::FMINNUM:
2013 case ISD::FMAXNUM:
2014 case ISD::FMINIMUM:
2015 case ISD::FMAXIMUM:
2016 case ISD::FMINIMUMNUM:
2017 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2018 case ISD::FCEIL: return visitFCEIL(N);
2019 case ISD::FTRUNC: return visitFTRUNC(N);
2020 case ISD::FFREXP: return visitFFREXP(N);
2021 case ISD::BRCOND: return visitBRCOND(N);
2022 case ISD::BR_CC: return visitBR_CC(N);
2023 case ISD::LOAD: return visitLOAD(N);
2024 case ISD::STORE: return visitSTORE(N);
2025 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2026 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2027 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2028 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2029 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2030 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2031 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2032 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2033 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2034 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2035 case ISD::MGATHER: return visitMGATHER(N);
2036 case ISD::MLOAD: return visitMLOAD(N);
2037 case ISD::MSCATTER: return visitMSCATTER(N);
2038 case ISD::MSTORE: return visitMSTORE(N);
2039 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2040 case ISD::PARTIAL_REDUCE_SMLA:
2041 case ISD::PARTIAL_REDUCE_UMLA:
2042 case ISD::PARTIAL_REDUCE_SUMLA:
2043 return visitPARTIAL_REDUCE_MLA(N);
2044 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2045 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2046 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2047 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2048 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2049 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2050 case ISD::FREEZE: return visitFREEZE(N);
2051 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2052 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2053 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2054 case ISD::VECREDUCE_FADD:
2055 case ISD::VECREDUCE_FMUL:
2056 case ISD::VECREDUCE_ADD:
2057 case ISD::VECREDUCE_MUL:
2058 case ISD::VECREDUCE_AND:
2059 case ISD::VECREDUCE_OR:
2060 case ISD::VECREDUCE_XOR:
2061 case ISD::VECREDUCE_SMAX:
2062 case ISD::VECREDUCE_SMIN:
2063 case ISD::VECREDUCE_UMAX:
2064 case ISD::VECREDUCE_UMIN:
2065 case ISD::VECREDUCE_FMAX:
2066 case ISD::VECREDUCE_FMIN:
2067 case ISD::VECREDUCE_FMAXIMUM:
2068 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2069#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2070#include "llvm/IR/VPIntrinsics.def"
2071 return visitVPOp(N);
2072 }
2073 // clang-format on
2074 return SDValue();
2075}
2076
2077SDValue DAGCombiner::combine(SDNode *N) {
2078 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2079 return SDValue();
2080
2081 SDValue RV;
2082 if (!DisableGenericCombines)
2083 RV = visit(N);
2084
2085 // If nothing happened, try a target-specific DAG combine.
2086 if (!RV.getNode()) {
2087 assert(N->getOpcode() != ISD::DELETED_NODE &&
2088 "Node was deleted but visit returned NULL!");
2089
2090 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2091 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2092
2093 // Expose the DAG combiner to the target combiner impls.
2094 TargetLowering::DAGCombinerInfo
2095 DagCombineInfo(DAG, Level, false, this);
2096
2097 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2098 }
2099 }
2100
2101 // If nothing happened still, try promoting the operation.
2102 if (!RV.getNode()) {
2103 switch (N->getOpcode()) {
2104 default: break;
2105 case ISD::ADD:
2106 case ISD::SUB:
2107 case ISD::MUL:
2108 case ISD::AND:
2109 case ISD::OR:
2110 case ISD::XOR:
2111 RV = PromoteIntBinOp(SDValue(N, 0));
2112 break;
2113 case ISD::SHL:
2114 case ISD::SRA:
2115 case ISD::SRL:
2116 RV = PromoteIntShiftOp(SDValue(N, 0));
2117 break;
2118 case ISD::SIGN_EXTEND:
2119 case ISD::ZERO_EXTEND:
2120 case ISD::ANY_EXTEND:
2121 RV = PromoteExtend(SDValue(N, 0));
2122 break;
2123 case ISD::LOAD:
2124 if (PromoteLoad(SDValue(N, 0)))
2125 RV = SDValue(N, 0);
2126 break;
2127 }
2128 }
2129
2130 // If N is a commutative binary node, try to eliminate it if the commuted
2131 // version is already present in the DAG.
2132 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2133 SDValue N0 = N->getOperand(0);
2134 SDValue N1 = N->getOperand(1);
2135
2136 // Constant operands are canonicalized to RHS.
2137 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2138 SDValue Ops[] = {N1, N0};
2139 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2140 N->getFlags());
2141 if (CSENode)
2142 return SDValue(CSENode, 0);
2143 }
2144 }
2145
2146 return RV;
2147}
2148
2149/// Given a node, return its input chain if it has one, otherwise return a null
2150/// sd operand.
2152 if (unsigned NumOps = N->getNumOperands()) {
2153 if (N->getOperand(0).getValueType() == MVT::Other)
2154 return N->getOperand(0);
2155 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2156 return N->getOperand(NumOps-1);
2157 for (unsigned i = 1; i < NumOps-1; ++i)
2158 if (N->getOperand(i).getValueType() == MVT::Other)
2159 return N->getOperand(i);
2160 }
2161 return SDValue();
2162}
2163
2164SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2165 SDValue Operand = N->getOperand(0);
2166 EVT VT = Operand.getValueType();
2167 SDLoc dl(N);
2168
2169 // Canonicalize undef to quiet NaN.
2170 if (Operand.isUndef()) {
2171 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2172 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2173 }
2174 return SDValue();
2175}
2176
2177SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2178 // If N has two operands, where one has an input chain equal to the other,
2179 // the 'other' chain is redundant.
2180 if (N->getNumOperands() == 2) {
2181 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2182 return N->getOperand(0);
2183 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2184 return N->getOperand(1);
2185 }
2186
2187 // Don't simplify token factors if optnone.
2188 if (OptLevel == CodeGenOptLevel::None)
2189 return SDValue();
2190
2191 // Don't simplify the token factor if the node itself has too many operands.
2192 if (N->getNumOperands() > TokenFactorInlineLimit)
2193 return SDValue();
2194
2195 // If the sole user is a token factor, we should make sure we have a
2196 // chance to merge them together. This prevents TF chains from inhibiting
2197 // optimizations.
2198 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2199 AddToWorklist(*(N->user_begin()));
2200
2201 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2202 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2203 SmallPtrSet<SDNode*, 16> SeenOps;
2204 bool Changed = false; // If we should replace this token factor.
2205
2206 // Start out with this token factor.
2207 TFs.push_back(N);
2208
2209 // Iterate through token factors. The TFs grows when new token factors are
2210 // encountered.
2211 for (unsigned i = 0; i < TFs.size(); ++i) {
2212 // Limit number of nodes to inline, to avoid quadratic compile times.
2213 // We have to add the outstanding Token Factors to Ops, otherwise we might
2214 // drop Ops from the resulting Token Factors.
2215 if (Ops.size() > TokenFactorInlineLimit) {
2216 for (unsigned j = i; j < TFs.size(); j++)
2217 Ops.emplace_back(TFs[j], 0);
2218 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2219 // combiner worklist later.
2220 TFs.resize(i);
2221 break;
2222 }
2223
2224 SDNode *TF = TFs[i];
2225 // Check each of the operands.
2226 for (const SDValue &Op : TF->op_values()) {
2227 switch (Op.getOpcode()) {
2228 case ISD::EntryToken:
2229 // Entry tokens don't need to be added to the list. They are
2230 // redundant.
2231 Changed = true;
2232 break;
2233
2234 case ISD::TokenFactor:
2235 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2236 // Queue up for processing.
2237 TFs.push_back(Op.getNode());
2238 Changed = true;
2239 break;
2240 }
2241 [[fallthrough]];
2242
2243 default:
2244 // Only add if it isn't already in the list.
2245 if (SeenOps.insert(Op.getNode()).second)
2246 Ops.push_back(Op);
2247 else
2248 Changed = true;
2249 break;
2250 }
2251 }
2252 }
2253
2254 // Re-visit inlined Token Factors, to clean them up in case they have been
2255 // removed. Skip the first Token Factor, as this is the current node.
2256 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2257 AddToWorklist(TFs[i]);
2258
2259 // Remove Nodes that are chained to another node in the list. Do so
2260 // by walking up chains breath-first stopping when we've seen
2261 // another operand. In general we must climb to the EntryNode, but we can exit
2262 // early if we find all remaining work is associated with just one operand as
2263 // no further pruning is possible.
2264
2265 // List of nodes to search through and original Ops from which they originate.
2267 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2268 SmallPtrSet<SDNode *, 16> SeenChains;
2269 bool DidPruneOps = false;
2270
2271 unsigned NumLeftToConsider = 0;
2272 for (const SDValue &Op : Ops) {
2273 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2274 OpWorkCount.push_back(1);
2275 }
2276
2277 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2278 // If this is an Op, we can remove the op from the list. Remark any
2279 // search associated with it as from the current OpNumber.
2280 if (SeenOps.contains(Op)) {
2281 Changed = true;
2282 DidPruneOps = true;
2283 unsigned OrigOpNumber = 0;
2284 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2285 OrigOpNumber++;
2286 assert((OrigOpNumber != Ops.size()) &&
2287 "expected to find TokenFactor Operand");
2288 // Re-mark worklist from OrigOpNumber to OpNumber
2289 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2290 if (Worklist[i].second == OrigOpNumber) {
2291 Worklist[i].second = OpNumber;
2292 }
2293 }
2294 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2295 OpWorkCount[OrigOpNumber] = 0;
2296 NumLeftToConsider--;
2297 }
2298 // Add if it's a new chain
2299 if (SeenChains.insert(Op).second) {
2300 OpWorkCount[OpNumber]++;
2301 Worklist.push_back(std::make_pair(Op, OpNumber));
2302 }
2303 };
2304
2305 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2306 // We need at least be consider at least 2 Ops to prune.
2307 if (NumLeftToConsider <= 1)
2308 break;
2309 auto CurNode = Worklist[i].first;
2310 auto CurOpNumber = Worklist[i].second;
2311 assert((OpWorkCount[CurOpNumber] > 0) &&
2312 "Node should not appear in worklist");
2313 switch (CurNode->getOpcode()) {
2314 case ISD::EntryToken:
2315 // Hitting EntryToken is the only way for the search to terminate without
2316 // hitting
2317 // another operand's search. Prevent us from marking this operand
2318 // considered.
2319 NumLeftToConsider++;
2320 break;
2321 case ISD::TokenFactor:
2322 for (const SDValue &Op : CurNode->op_values())
2323 AddToWorklist(i, Op.getNode(), CurOpNumber);
2324 break;
2325 case ISD::LIFETIME_START:
2326 case ISD::LIFETIME_END:
2327 case ISD::CopyFromReg:
2328 case ISD::CopyToReg:
2329 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2330 break;
2331 default:
2332 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2333 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2334 break;
2335 }
2336 OpWorkCount[CurOpNumber]--;
2337 if (OpWorkCount[CurOpNumber] == 0)
2338 NumLeftToConsider--;
2339 }
2340
2341 // If we've changed things around then replace token factor.
2342 if (Changed) {
2344 if (Ops.empty()) {
2345 // The entry token is the only possible outcome.
2346 Result = DAG.getEntryNode();
2347 } else {
2348 if (DidPruneOps) {
2349 SmallVector<SDValue, 8> PrunedOps;
2350 //
2351 for (const SDValue &Op : Ops) {
2352 if (SeenChains.count(Op.getNode()) == 0)
2353 PrunedOps.push_back(Op);
2354 }
2355 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2356 } else {
2357 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2358 }
2359 }
2360 return Result;
2361 }
2362 return SDValue();
2363}
2364
2365/// MERGE_VALUES can always be eliminated.
2366SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2367 WorklistRemover DeadNodes(*this);
2368 // Replacing results may cause a different MERGE_VALUES to suddenly
2369 // be CSE'd with N, and carry its uses with it. Iterate until no
2370 // uses remain, to ensure that the node can be safely deleted.
2371 // First add the users of this node to the work list so that they
2372 // can be tried again once they have new operands.
2373 AddUsersToWorklist(N);
2374 do {
2375 // Do as a single replacement to avoid rewalking use lists.
2377 DAG.ReplaceAllUsesWith(N, Ops.data());
2378 } while (!N->use_empty());
2379 deleteAndRecombine(N);
2380 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2381}
2382
2383/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2384/// ConstantSDNode pointer else nullptr.
2387 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2388}
2389
2390// isTruncateOf - If N is a truncate of some other value, return true, record
2391// the value being truncated in Op and which of Op's bits are zero/one in Known.
2392// This function computes KnownBits to avoid a duplicated call to
2393// computeKnownBits in the caller.
2395 KnownBits &Known) {
2396 if (N->getOpcode() == ISD::TRUNCATE) {
2397 Op = N->getOperand(0);
2398 Known = DAG.computeKnownBits(Op);
2399 if (N->getFlags().hasNoUnsignedWrap())
2400 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2401 return true;
2402 }
2403
2404 if (N.getValueType().getScalarType() != MVT::i1 ||
2405 !sd_match(
2407 return false;
2408
2409 Known = DAG.computeKnownBits(Op);
2410 return (Known.Zero | 1).isAllOnes();
2411}
2412
2413/// Return true if 'Use' is a load or a store that uses N as its base pointer
2414/// and that N may be folded in the load / store addressing mode.
2416 const TargetLowering &TLI) {
2417 EVT VT;
2418 unsigned AS;
2419
2420 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2421 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2422 return false;
2423 VT = LD->getMemoryVT();
2424 AS = LD->getAddressSpace();
2425 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2426 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2427 return false;
2428 VT = ST->getMemoryVT();
2429 AS = ST->getAddressSpace();
2431 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2432 return false;
2433 VT = LD->getMemoryVT();
2434 AS = LD->getAddressSpace();
2436 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2437 return false;
2438 VT = ST->getMemoryVT();
2439 AS = ST->getAddressSpace();
2440 } else {
2441 return false;
2442 }
2443
2445 if (N->isAnyAdd()) {
2446 AM.HasBaseReg = true;
2448 if (Offset)
2449 // [reg +/- imm]
2450 AM.BaseOffs = Offset->getSExtValue();
2451 else
2452 // [reg +/- reg]
2453 AM.Scale = 1;
2454 } else if (N->getOpcode() == ISD::SUB) {
2455 AM.HasBaseReg = true;
2457 if (Offset)
2458 // [reg +/- imm]
2459 AM.BaseOffs = -Offset->getSExtValue();
2460 else
2461 // [reg +/- reg]
2462 AM.Scale = 1;
2463 } else {
2464 return false;
2465 }
2466
2467 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2468 VT.getTypeForEVT(*DAG.getContext()), AS);
2469}
2470
2471/// This inverts a canonicalization in IR that replaces a variable select arm
2472/// with an identity constant. Codegen improves if we re-use the variable
2473/// operand rather than load a constant. This can also be converted into a
2474/// masked vector operation if the target supports it.
2476 bool ShouldCommuteOperands) {
2477 // Match a select as operand 1. The identity constant that we are looking for
2478 // is only valid as operand 1 of a non-commutative binop.
2479 SDValue N0 = N->getOperand(0);
2480 SDValue N1 = N->getOperand(1);
2481 if (ShouldCommuteOperands)
2482 std::swap(N0, N1);
2483
2484 unsigned SelOpcode = N1.getOpcode();
2485 if ((SelOpcode != ISD::VSELECT && SelOpcode != ISD::SELECT) ||
2486 !N1.hasOneUse())
2487 return SDValue();
2488
2489 // We can't hoist all instructions because of immediate UB (not speculatable).
2490 // For example div/rem by zero.
2492 return SDValue();
2493
2494 unsigned Opcode = N->getOpcode();
2495 EVT VT = N->getValueType(0);
2496 SDValue Cond = N1.getOperand(0);
2497 SDValue TVal = N1.getOperand(1);
2498 SDValue FVal = N1.getOperand(2);
2499 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2500
2501 // This transform increases uses of N0, so freeze it to be safe.
2502 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2503 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2504 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2505 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2506 FVal)) {
2507 SDValue F0 = DAG.getFreeze(N0);
2508 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2509 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2510 }
2511 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2512 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2513 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2514 TVal)) {
2515 SDValue F0 = DAG.getFreeze(N0);
2516 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2517 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2518 }
2519
2520 return SDValue();
2521}
2522
2523SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2524 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2525 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2526 "Unexpected binary operator");
2527
2528 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2529 return Sel;
2530
2531 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2532 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2533 return Sel;
2534
2535 // Don't do this unless the old select is going away. We want to eliminate the
2536 // binary operator, not replace a binop with a select.
2537 // TODO: Handle ISD::SELECT_CC.
2538 unsigned SelOpNo = 0;
2539 SDValue Sel = BO->getOperand(0);
2540 auto BinOpcode = BO->getOpcode();
2541 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2542 SelOpNo = 1;
2543 Sel = BO->getOperand(1);
2544
2545 // Peek through trunc to shift amount type.
2546 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2547 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2548 // This is valid when the truncated bits of x are already zero.
2549 SDValue Op;
2550 KnownBits Known;
2551 if (isTruncateOf(DAG, Sel, Op, Known) &&
2553 Sel = Op;
2554 }
2555 }
2556
2557 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2558 return SDValue();
2559
2560 SDValue CT = Sel.getOperand(1);
2561 if (!isConstantOrConstantVector(CT, true) &&
2563 return SDValue();
2564
2565 SDValue CF = Sel.getOperand(2);
2566 if (!isConstantOrConstantVector(CF, true) &&
2568 return SDValue();
2569
2570 // Bail out if any constants are opaque because we can't constant fold those.
2571 // The exception is "and" and "or" with either 0 or -1 in which case we can
2572 // propagate non constant operands into select. I.e.:
2573 // and (select Cond, 0, -1), X --> select Cond, 0, X
2574 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2575 bool CanFoldNonConst =
2576 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2579
2580 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2581 if (!CanFoldNonConst &&
2582 !isConstantOrConstantVector(CBO, true) &&
2584 return SDValue();
2585
2586 SDLoc DL(Sel);
2587 SDValue NewCT, NewCF;
2588 EVT VT = BO->getValueType(0);
2589
2590 if (CanFoldNonConst) {
2591 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2592 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2593 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2594 NewCT = CT;
2595 else
2596 NewCT = CBO;
2597
2598 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2599 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2600 NewCF = CF;
2601 else
2602 NewCF = CBO;
2603 } else {
2604 // We have a select-of-constants followed by a binary operator with a
2605 // constant. Eliminate the binop by pulling the constant math into the
2606 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2607 // CBO, CF + CBO
2608 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2609 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2610 if (!NewCT)
2611 return SDValue();
2612
2613 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2614 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2615 if (!NewCF)
2616 return SDValue();
2617 }
2618
2619 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2620}
2621
2623 SelectionDAG &DAG) {
2624 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2625 "Expecting add or sub");
2626
2627 // Match a constant operand and a zext operand for the math instruction:
2628 // add Z, C
2629 // sub C, Z
2630 bool IsAdd = N->getOpcode() == ISD::ADD;
2631 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2632 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2633 auto *CN = dyn_cast<ConstantSDNode>(C);
2634 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2635 return SDValue();
2636
2637 // Match the zext operand as a setcc of a boolean.
2638 if (Z.getOperand(0).getValueType() != MVT::i1)
2639 return SDValue();
2640
2641 // Match the compare as: setcc (X & 1), 0, eq.
2642 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2644 return SDValue();
2645
2646 // We are adding/subtracting a constant and an inverted low bit. Turn that
2647 // into a subtract/add of the low bit with incremented/decremented constant:
2648 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2649 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2650 EVT VT = C.getValueType();
2651 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2652 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2653 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2654 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2655}
2656
2657// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2658SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2659 SDValue N0 = N->getOperand(0);
2660 EVT VT = N0.getValueType();
2661 SDValue A, B;
2662
2663 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2665 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2666 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2667 }
2668 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2670 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2671 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2672 }
2673 return SDValue();
2674}
2675
2676/// Try to fold a pointer arithmetic node.
2677/// This needs to be done separately from normal addition, because pointer
2678/// addition is not commutative.
2679SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2680 SDValue N0 = N->getOperand(0);
2681 SDValue N1 = N->getOperand(1);
2682 EVT PtrVT = N0.getValueType();
2683 EVT IntVT = N1.getValueType();
2684 SDLoc DL(N);
2685
2686 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2687 // combines here depend on this assumption.
2688 assert(PtrVT == IntVT &&
2689 "PTRADD with different operand types is not supported");
2690
2691 // fold (ptradd x, 0) -> x
2692 if (isNullConstant(N1))
2693 return N0;
2694
2695 // fold (ptradd 0, x) -> x
2696 if (PtrVT == IntVT && isNullConstant(N0))
2697 return N1;
2698
2699 if (N0.getOpcode() == ISD::PTRADD &&
2700 !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2701 SDValue X = N0.getOperand(0);
2702 SDValue Y = N0.getOperand(1);
2703 SDValue Z = N1;
2704 bool N0OneUse = N0.hasOneUse();
2705 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2706 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2707
2708 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2709 // * y is a constant and (ptradd x, y) has one use; or
2710 // * y and z are both constants.
2711 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2712 // If both additions in the original were NUW, the new ones are as well.
2713 SDNodeFlags Flags =
2714 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2715 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2716 AddToWorklist(Add.getNode());
2717 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2718 }
2719 }
2720
2721 // The following combines can turn in-bounds pointer arithmetic out of bounds.
2722 // That is problematic for settings like AArch64's CPA, which checks that
2723 // intermediate results of pointer arithmetic remain in bounds. The target
2724 // therefore needs to opt-in to enable them.
2726 DAG.getMachineFunction().getFunction(), PtrVT))
2727 return SDValue();
2728
2729 if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2730 // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2731 // global address GA and constant c, such that c can be folded into GA.
2732 // TODO: Support constant vector splats.
2733 SDValue GAValue = N0.getOperand(0);
2734 if (const GlobalAddressSDNode *GA =
2736 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2737 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2738 // If both additions in the original were NUW, reassociation preserves
2739 // that.
2740 SDNodeFlags Flags =
2741 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2742 SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2743 AddToWorklist(Inner.getNode());
2744 return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2745 }
2746 }
2747 }
2748
2749 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2750 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2751 // y is not, and (add y, z) is used only once.
2752 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2753 // z is not, and (add y, z) is used only once.
2754 // The goal is to move constant offsets to the outermost ptradd, to create
2755 // more opportunities to fold offsets into memory instructions.
2756 // Together with the another combine above, this also implements
2757 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2758 SDValue X = N0;
2759 SDValue Y = N1.getOperand(0);
2760 SDValue Z = N1.getOperand(1);
2761 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2762 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2763
2764 // If both additions in the original were NUW, reassociation preserves that.
2765 SDNodeFlags ReassocFlags =
2766 (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2767
2768 if (ZIsConstant != YIsConstant) {
2769 if (YIsConstant)
2770 std::swap(Y, Z);
2771 SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2772 AddToWorklist(Inner.getNode());
2773 return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2774 }
2775 }
2776
2777 // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if
2778 // that transformation can't block an offset folding at any use of the ptradd.
2779 // This should be done late, after legalization, so that it doesn't block
2780 // other ptradd combines that could enable more offset folding.
2781 if (LegalOperations && DAG.haveNoCommonBitsSet(N0, N1)) {
2782 bool TransformCannotBreakAddrMode = none_of(N->users(), [&](SDNode *User) {
2783 return canFoldInAddressingMode(N, User, DAG, TLI);
2784 });
2785
2786 if (TransformCannotBreakAddrMode)
2787 return DAG.getNode(ISD::OR, DL, PtrVT, N0, N1, SDNodeFlags::Disjoint);
2788 }
2789
2790 return SDValue();
2791}
2792
2793/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2794/// a shift and add with a different constant.
2796 SelectionDAG &DAG) {
2797 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2798 "Expecting add or sub");
2799
2800 // We need a constant operand for the add/sub, and the other operand is a
2801 // logical shift right: add (srl), C or sub C, (srl).
2802 bool IsAdd = N->getOpcode() == ISD::ADD;
2803 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2804 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2805 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2806 ShiftOp.getOpcode() != ISD::SRL)
2807 return SDValue();
2808
2809 // The shift must be of a 'not' value.
2810 SDValue Not = ShiftOp.getOperand(0);
2811 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2812 return SDValue();
2813
2814 // The shift must be moving the sign bit to the least-significant-bit.
2815 EVT VT = ShiftOp.getValueType();
2816 SDValue ShAmt = ShiftOp.getOperand(1);
2817 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2818 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2819 return SDValue();
2820
2821 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2822 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2823 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2824 if (SDValue NewC = DAG.FoldConstantArithmetic(
2825 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2826 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2827 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2828 Not.getOperand(0), ShAmt);
2829 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2830 }
2831
2832 return SDValue();
2833}
2834
2835static bool
2837 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2838 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2839}
2840
2841/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2842/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2843/// are no common bits set in the operands).
2844SDValue DAGCombiner::visitADDLike(SDNode *N) {
2845 SDValue N0 = N->getOperand(0);
2846 SDValue N1 = N->getOperand(1);
2847 EVT VT = N0.getValueType();
2848 SDLoc DL(N);
2849
2850 // fold (add x, undef) -> undef
2851 if (N0.isUndef())
2852 return N0;
2853 if (N1.isUndef())
2854 return N1;
2855
2856 // fold (add c1, c2) -> c1+c2
2857 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2858 return C;
2859
2860 // canonicalize constant to RHS
2863 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2864
2865 if (areBitwiseNotOfEachother(N0, N1))
2866 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2867
2868 // fold vector ops
2869 if (VT.isVector()) {
2870 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2871 return FoldedVOp;
2872
2873 // fold (add x, 0) -> x, vector edition
2875 return N0;
2876 }
2877
2878 // fold (add x, 0) -> x
2879 if (isNullConstant(N1))
2880 return N0;
2881
2882 if (N0.getOpcode() == ISD::SUB) {
2883 SDValue N00 = N0.getOperand(0);
2884 SDValue N01 = N0.getOperand(1);
2885
2886 // fold ((A-c1)+c2) -> (A+(c2-c1))
2887 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2888 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2889
2890 // fold ((c1-A)+c2) -> (c1+c2)-A
2891 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2892 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2893 }
2894
2895 // add (sext i1 X), 1 -> zext (not i1 X)
2896 // We don't transform this pattern:
2897 // add (zext i1 X), -1 -> sext (not i1 X)
2898 // because most (?) targets generate better code for the zext form.
2899 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2900 isOneOrOneSplat(N1)) {
2901 SDValue X = N0.getOperand(0);
2902 if ((!LegalOperations ||
2903 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2905 X.getScalarValueSizeInBits() == 1) {
2906 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2907 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2908 }
2909 }
2910
2911 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2912 // iff (or x, c0) is equivalent to (add x, c0).
2913 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2914 // iff (xor x, c0) is equivalent to (add x, c0).
2915 if (DAG.isADDLike(N0)) {
2916 SDValue N01 = N0.getOperand(1);
2917 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2918 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2919 }
2920
2921 if (SDValue NewSel = foldBinOpIntoSelect(N))
2922 return NewSel;
2923
2924 // reassociate add
2925 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2926 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2927 return RADD;
2928
2929 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2930 // equivalent to (add x, c).
2931 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2932 // equivalent to (add x, c).
2933 // Do this optimization only when adding c does not introduce instructions
2934 // for adding carries.
2935 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2936 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2937 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2938 // If N0's type does not split or is a sign mask, it does not introduce
2939 // add carry.
2940 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2941 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2944 if (NoAddCarry)
2945 return DAG.getNode(
2946 ISD::ADD, DL, VT,
2947 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2948 N0.getOperand(1));
2949 }
2950 return SDValue();
2951 };
2952 if (SDValue Add = ReassociateAddOr(N0, N1))
2953 return Add;
2954 if (SDValue Add = ReassociateAddOr(N1, N0))
2955 return Add;
2956
2957 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2958 if (SDValue SD =
2959 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2960 return SD;
2961 }
2962
2963 SDValue A, B, C, D;
2964
2965 // fold ((0-A) + B) -> B-A
2966 if (sd_match(N0, m_Neg(m_Value(A))))
2967 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2968
2969 // fold (A + (0-B)) -> A-B
2970 if (sd_match(N1, m_Neg(m_Value(B))))
2971 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2972
2973 // fold (A+(B-A)) -> B
2974 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2975 return B;
2976
2977 // fold ((B-A)+A) -> B
2978 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2979 return B;
2980
2981 // fold ((A-B)+(C-A)) -> (C-B)
2982 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2984 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2985
2986 // fold ((A-B)+(B-C)) -> (A-C)
2987 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2989 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2990
2991 // fold (A+(B-(A+C))) to (B-C)
2992 // fold (A+(B-(C+A))) to (B-C)
2993 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2994 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2995
2996 // fold (A+((B-A)+or-C)) to (B+or-C)
2997 if (sd_match(N1,
2999 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
3000 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
3001
3002 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
3003 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
3004 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
3006 return DAG.getNode(ISD::SUB, DL, VT,
3007 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
3008 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
3009
3010 // fold (add (umax X, C), -C) --> (usubsat X, C)
3011 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
3012 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
3013 return (!Max && !Op) ||
3014 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
3015 };
3016 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
3017 /*AllowUndefs*/ true))
3018 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
3019 N0.getOperand(1));
3020 }
3021
3023 return SDValue(N, 0);
3024
3025 if (isOneOrOneSplat(N1)) {
3026 // fold (add (xor a, -1), 1) -> (sub 0, a)
3027 if (isBitwiseNot(N0))
3028 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
3029 N0.getOperand(0));
3030
3031 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
3032 if (N0.getOpcode() == ISD::ADD) {
3033 SDValue A, Xor;
3034
3035 if (isBitwiseNot(N0.getOperand(0))) {
3036 A = N0.getOperand(1);
3037 Xor = N0.getOperand(0);
3038 } else if (isBitwiseNot(N0.getOperand(1))) {
3039 A = N0.getOperand(0);
3040 Xor = N0.getOperand(1);
3041 }
3042
3043 if (Xor)
3044 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3045 }
3046
3047 // Look for:
3048 // add (add x, y), 1
3049 // And if the target does not like this form then turn into:
3050 // sub y, (xor x, -1)
3051 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3052 N0.hasOneUse() &&
3053 // Limit this to after legalization if the add has wrap flags
3054 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3055 !N->getFlags().hasNoSignedWrap()))) {
3056 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3057 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3058 }
3059 }
3060
3061 // (x - y) + -1 -> add (xor y, -1), x
3062 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3063 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3064 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3065 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3066 }
3067
3068 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3069 // This can help if the inner add has multiple uses.
3070 APInt CM, CA;
3071 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3072 if (VT.getScalarSizeInBits() <= 64) {
3074 m_ConstInt(CM)))) &&
3076 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3077 SDNodeFlags Flags;
3078 // If all the inputs are nuw, the outputs can be nuw. If all the input
3079 // are _also_ nsw the outputs can be too.
3080 if (N->getFlags().hasNoUnsignedWrap() &&
3081 N0->getFlags().hasNoUnsignedWrap() &&
3084 if (N->getFlags().hasNoSignedWrap() &&
3085 N0->getFlags().hasNoSignedWrap() &&
3088 }
3089 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3090 DAG.getConstant(CM, DL, VT), Flags);
3091 return DAG.getNode(
3092 ISD::ADD, DL, VT, Mul,
3093 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3094 }
3095 // Also look in case there is an intermediate add.
3096 if (sd_match(N0, m_OneUse(m_Add(
3098 m_ConstInt(CM))),
3099 m_Value(B)))) &&
3101 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3102 SDNodeFlags Flags;
3103 // If all the inputs are nuw, the outputs can be nuw. If all the input
3104 // are _also_ nsw the outputs can be too.
3105 SDValue OMul =
3106 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3107 if (N->getFlags().hasNoUnsignedWrap() &&
3108 N0->getFlags().hasNoUnsignedWrap() &&
3109 OMul->getFlags().hasNoUnsignedWrap() &&
3110 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3112 if (N->getFlags().hasNoSignedWrap() &&
3113 N0->getFlags().hasNoSignedWrap() &&
3114 OMul->getFlags().hasNoSignedWrap() &&
3115 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3117 }
3118 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3119 DAG.getConstant(CM, DL, VT), Flags);
3120 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3121 return DAG.getNode(
3122 ISD::ADD, DL, VT, Add,
3123 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3124 }
3125 }
3126 }
3127
3128 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3129 return Combined;
3130
3131 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3132 return Combined;
3133
3134 return SDValue();
3135}
3136
3137// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3138SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3139 SDValue N0 = N->getOperand(0);
3140 EVT VT = N0.getValueType();
3141 SDValue A, B;
3142
3143 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3145 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3146 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3147 }
3148 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3150 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3151 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3152 }
3153
3154 return SDValue();
3155}
3156
3157SDValue DAGCombiner::visitADD(SDNode *N) {
3158 SDValue N0 = N->getOperand(0);
3159 SDValue N1 = N->getOperand(1);
3160 EVT VT = N0.getValueType();
3161 SDLoc DL(N);
3162
3163 if (SDValue Combined = visitADDLike(N))
3164 return Combined;
3165
3166 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3167 return V;
3168
3169 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3170 return V;
3171
3172 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3173 return V;
3174
3175 // Try to match AVGFLOOR fixedwidth pattern
3176 if (SDValue V = foldAddToAvg(N, DL))
3177 return V;
3178
3179 // fold (a+b) -> (a|b) iff a and b share no bits.
3180 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3181 DAG.haveNoCommonBitsSet(N0, N1))
3182 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3183
3184 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3185 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3186 const APInt &C0 = N0->getConstantOperandAPInt(0);
3187 const APInt &C1 = N1->getConstantOperandAPInt(0);
3188 return DAG.getVScale(DL, VT, C0 + C1);
3189 }
3190
3191 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3192 if (N0.getOpcode() == ISD::ADD &&
3193 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3194 N1.getOpcode() == ISD::VSCALE) {
3195 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3196 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3197 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3198 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3199 }
3200
3201 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3202 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3203 N1.getOpcode() == ISD::STEP_VECTOR) {
3204 const APInt &C0 = N0->getConstantOperandAPInt(0);
3205 const APInt &C1 = N1->getConstantOperandAPInt(0);
3206 APInt NewStep = C0 + C1;
3207 return DAG.getStepVector(DL, VT, NewStep);
3208 }
3209
3210 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3211 if (N0.getOpcode() == ISD::ADD &&
3213 N1.getOpcode() == ISD::STEP_VECTOR) {
3214 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3215 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3216 APInt NewStep = SV0 + SV1;
3217 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3218 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3219 }
3220
3221 return SDValue();
3222}
3223
3224SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3225 unsigned Opcode = N->getOpcode();
3226 SDValue N0 = N->getOperand(0);
3227 SDValue N1 = N->getOperand(1);
3228 EVT VT = N0.getValueType();
3229 bool IsSigned = Opcode == ISD::SADDSAT;
3230 SDLoc DL(N);
3231
3232 // fold (add_sat x, undef) -> -1
3233 if (N0.isUndef() || N1.isUndef())
3234 return DAG.getAllOnesConstant(DL, VT);
3235
3236 // fold (add_sat c1, c2) -> c3
3237 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3238 return C;
3239
3240 // canonicalize constant to RHS
3243 return DAG.getNode(Opcode, DL, VT, N1, N0);
3244
3245 // fold vector ops
3246 if (VT.isVector()) {
3247 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3248 return FoldedVOp;
3249
3250 // fold (add_sat x, 0) -> x, vector edition
3252 return N0;
3253 }
3254
3255 // fold (add_sat x, 0) -> x
3256 if (isNullConstant(N1))
3257 return N0;
3258
3259 // If it cannot overflow, transform into an add.
3260 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3261 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3262
3263 return SDValue();
3264}
3265
3267 bool ForceCarryReconstruction = false) {
3268 bool Masked = false;
3269
3270 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3271 while (true) {
3272 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3273 V = V.getOperand(0);
3274 continue;
3275 }
3276
3277 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3278 if (ForceCarryReconstruction)
3279 return V;
3280
3281 Masked = true;
3282 V = V.getOperand(0);
3283 continue;
3284 }
3285
3286 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3287 return V;
3288
3289 break;
3290 }
3291
3292 // If this is not a carry, return.
3293 if (V.getResNo() != 1)
3294 return SDValue();
3295
3296 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3297 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3298 return SDValue();
3299
3300 EVT VT = V->getValueType(0);
3301 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3302 return SDValue();
3303
3304 // If the result is masked, then no matter what kind of bool it is we can
3305 // return. If it isn't, then we need to make sure the bool type is either 0 or
3306 // 1 and not other values.
3307 if (Masked ||
3308 TLI.getBooleanContents(V.getValueType()) ==
3310 return V;
3311
3312 return SDValue();
3313}
3314
3315/// Given the operands of an add/sub operation, see if the 2nd operand is a
3316/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3317/// the opcode and bypass the mask operation.
3318static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3319 SelectionDAG &DAG, const SDLoc &DL) {
3320 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3321 N1 = N1.getOperand(0);
3322
3323 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3324 return SDValue();
3325
3326 EVT VT = N0.getValueType();
3327 SDValue N10 = N1.getOperand(0);
3328 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3329 N10 = N10.getOperand(0);
3330
3331 if (N10.getValueType() != VT)
3332 return SDValue();
3333
3334 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3335 return SDValue();
3336
3337 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3338 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3339 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3340}
3341
3342/// Helper for doing combines based on N0 and N1 being added to each other.
3343SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3344 SDNode *LocReference) {
3345 EVT VT = N0.getValueType();
3346 SDLoc DL(LocReference);
3347
3348 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3349 SDValue Y, N;
3350 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3351 return DAG.getNode(ISD::SUB, DL, VT, N0,
3352 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3353
3354 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3355 return V;
3356
3357 // Look for:
3358 // add (add x, 1), y
3359 // And if the target does not like this form then turn into:
3360 // sub y, (xor x, -1)
3361 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3362 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3363 // Limit this to after legalization if the add has wrap flags
3364 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3365 !N0->getFlags().hasNoSignedWrap()))) {
3366 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3367 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3368 }
3369
3370 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3371 // Hoist one-use subtraction by non-opaque constant:
3372 // (x - C) + y -> (x + y) - C
3373 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3374 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3375 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3376 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3377 }
3378 // Hoist one-use subtraction from non-opaque constant:
3379 // (C - x) + y -> (y - x) + C
3380 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3381 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3382 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3383 }
3384 }
3385
3386 // add (mul x, C), x -> mul x, C+1
3387 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3388 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3389 N0.hasOneUse()) {
3390 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3391 DAG.getConstant(1, DL, VT));
3392 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3393 }
3394
3395 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3396 // rather than 'add 0/-1' (the zext should get folded).
3397 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3398 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3399 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3401 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3402 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3403 }
3404
3405 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3406 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3407 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3408 if (TN->getVT() == MVT::i1) {
3409 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3410 DAG.getConstant(1, DL, VT));
3411 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3412 }
3413 }
3414
3415 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3416 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3417 N1.getResNo() == 0)
3418 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3419 N0, N1.getOperand(0), N1.getOperand(2));
3420
3421 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3423 if (SDValue Carry = getAsCarry(TLI, N1))
3424 return DAG.getNode(ISD::UADDO_CARRY, DL,
3425 DAG.getVTList(VT, Carry.getValueType()), N0,
3426 DAG.getConstant(0, DL, VT), Carry);
3427
3428 return SDValue();
3429}
3430
3431SDValue DAGCombiner::visitADDC(SDNode *N) {
3432 SDValue N0 = N->getOperand(0);
3433 SDValue N1 = N->getOperand(1);
3434 EVT VT = N0.getValueType();
3435 SDLoc DL(N);
3436
3437 // If the flag result is dead, turn this into an ADD.
3438 if (!N->hasAnyUseOfValue(1))
3439 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3440 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3441
3442 // canonicalize constant to RHS.
3443 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3444 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3445 if (N0C && !N1C)
3446 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3447
3448 // fold (addc x, 0) -> x + no carry out
3449 if (isNullConstant(N1))
3450 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3451 DL, MVT::Glue));
3452
3453 // If it cannot overflow, transform into an add.
3455 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3456 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3457
3458 return SDValue();
3459}
3460
3461/**
3462 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3463 * then the flip also occurs if computing the inverse is the same cost.
3464 * This function returns an empty SDValue in case it cannot flip the boolean
3465 * without increasing the cost of the computation. If you want to flip a boolean
3466 * no matter what, use DAG.getLogicalNOT.
3467 */
3469 const TargetLowering &TLI,
3470 bool Force) {
3471 if (Force && isa<ConstantSDNode>(V))
3472 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3473
3474 if (V.getOpcode() != ISD::XOR)
3475 return SDValue();
3476
3477 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3478 return V.getOperand(0);
3479 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3480 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3481 return SDValue();
3482}
3483
3484SDValue DAGCombiner::visitADDO(SDNode *N) {
3485 SDValue N0 = N->getOperand(0);
3486 SDValue N1 = N->getOperand(1);
3487 EVT VT = N0.getValueType();
3488 bool IsSigned = (ISD::SADDO == N->getOpcode());
3489
3490 EVT CarryVT = N->getValueType(1);
3491 SDLoc DL(N);
3492
3493 // If the flag result is dead, turn this into an ADD.
3494 if (!N->hasAnyUseOfValue(1))
3495 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3496 DAG.getUNDEF(CarryVT));
3497
3498 // canonicalize constant to RHS.
3501 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3502
3503 // fold (addo x, 0) -> x + no carry out
3504 if (isNullOrNullSplat(N1))
3505 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3506
3507 // If it cannot overflow, transform into an add.
3508 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3509 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3510 DAG.getConstant(0, DL, CarryVT));
3511
3512 if (IsSigned) {
3513 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3514 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3515 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3516 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3517 } else {
3518 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3519 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3520 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3521 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3522 return CombineTo(
3523 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3524 }
3525
3526 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3527 return Combined;
3528
3529 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3530 return Combined;
3531 }
3532
3533 return SDValue();
3534}
3535
3536SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3537 EVT VT = N0.getValueType();
3538 if (VT.isVector())
3539 return SDValue();
3540
3541 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3542 // If Y + 1 cannot overflow.
3543 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3544 SDValue Y = N1.getOperand(0);
3545 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3547 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3548 N1.getOperand(2));
3549 }
3550
3551 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3553 if (SDValue Carry = getAsCarry(TLI, N1))
3554 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3555 DAG.getConstant(0, SDLoc(N), VT), Carry);
3556
3557 return SDValue();
3558}
3559
3560SDValue DAGCombiner::visitADDE(SDNode *N) {
3561 SDValue N0 = N->getOperand(0);
3562 SDValue N1 = N->getOperand(1);
3563 SDValue CarryIn = N->getOperand(2);
3564
3565 // canonicalize constant to RHS
3566 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3567 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3568 if (N0C && !N1C)
3569 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3570 N1, N0, CarryIn);
3571
3572 // fold (adde x, y, false) -> (addc x, y)
3573 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3574 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3575
3576 return SDValue();
3577}
3578
3579SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3580 SDValue N0 = N->getOperand(0);
3581 SDValue N1 = N->getOperand(1);
3582 SDValue CarryIn = N->getOperand(2);
3583 SDLoc DL(N);
3584
3585 // canonicalize constant to RHS
3586 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3587 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3588 if (N0C && !N1C)
3589 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3590
3591 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3592 if (isNullConstant(CarryIn)) {
3593 if (!LegalOperations ||
3594 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3595 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3596 }
3597
3598 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3599 if (isNullConstant(N0) && isNullConstant(N1)) {
3600 EVT VT = N0.getValueType();
3601 EVT CarryVT = CarryIn.getValueType();
3602 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3603 AddToWorklist(CarryExt.getNode());
3604 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3605 DAG.getConstant(1, DL, VT)),
3606 DAG.getConstant(0, DL, CarryVT));
3607 }
3608
3609 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3610 return Combined;
3611
3612 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3613 return Combined;
3614
3615 // We want to avoid useless duplication.
3616 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3617 // not a binary operation, this is not really possible to leverage this
3618 // existing mechanism for it. However, if more operations require the same
3619 // deduplication logic, then it may be worth generalize.
3620 SDValue Ops[] = {N1, N0, CarryIn};
3621 SDNode *CSENode =
3622 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3623 if (CSENode)
3624 return SDValue(CSENode, 0);
3625
3626 return SDValue();
3627}
3628
3629/**
3630 * If we are facing some sort of diamond carry propagation pattern try to
3631 * break it up to generate something like:
3632 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3633 *
3634 * The end result is usually an increase in operation required, but because the
3635 * carry is now linearized, other transforms can kick in and optimize the DAG.
3636 *
3637 * Patterns typically look something like
3638 * (uaddo A, B)
3639 * / \
3640 * Carry Sum
3641 * | \
3642 * | (uaddo_carry *, 0, Z)
3643 * | /
3644 * \ Carry
3645 * | /
3646 * (uaddo_carry X, *, *)
3647 *
3648 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3649 * produce a combine with a single path for carry propagation.
3650 */
3652 SelectionDAG &DAG, SDValue X,
3653 SDValue Carry0, SDValue Carry1,
3654 SDNode *N) {
3655 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3656 return SDValue();
3657 if (Carry1.getOpcode() != ISD::UADDO)
3658 return SDValue();
3659
3660 SDValue Z;
3661
3662 /**
3663 * First look for a suitable Z. It will present itself in the form of
3664 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3665 */
3666 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3667 isNullConstant(Carry0.getOperand(1))) {
3668 Z = Carry0.getOperand(2);
3669 } else if (Carry0.getOpcode() == ISD::UADDO &&
3670 isOneConstant(Carry0.getOperand(1))) {
3671 EVT VT = Carry0->getValueType(1);
3672 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3673 } else {
3674 // We couldn't find a suitable Z.
3675 return SDValue();
3676 }
3677
3678
3679 auto cancelDiamond = [&](SDValue A,SDValue B) {
3680 SDLoc DL(N);
3681 SDValue NewY =
3682 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3683 Combiner.AddToWorklist(NewY.getNode());
3684 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3685 DAG.getConstant(0, DL, X.getValueType()),
3686 NewY.getValue(1));
3687 };
3688
3689 /**
3690 * (uaddo A, B)
3691 * |
3692 * Sum
3693 * |
3694 * (uaddo_carry *, 0, Z)
3695 */
3696 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3697 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3698 }
3699
3700 /**
3701 * (uaddo_carry A, 0, Z)
3702 * |
3703 * Sum
3704 * |
3705 * (uaddo *, B)
3706 */
3707 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3708 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3709 }
3710
3711 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3712 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3713 }
3714
3715 return SDValue();
3716}
3717
3718// If we are facing some sort of diamond carry/borrow in/out pattern try to
3719// match patterns like:
3720//
3721// (uaddo A, B) CarryIn
3722// | \ |
3723// | \ |
3724// PartialSum PartialCarryOutX /
3725// | | /
3726// | ____|____________/
3727// | / |
3728// (uaddo *, *) \________
3729// | \ \
3730// | \ |
3731// | PartialCarryOutY |
3732// | \ |
3733// | \ /
3734// AddCarrySum | ______/
3735// | /
3736// CarryOut = (or *, *)
3737//
3738// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3739//
3740// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3741//
3742// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3743// with a single path for carry/borrow out propagation.
3745 SDValue N0, SDValue N1, SDNode *N) {
3746 SDValue Carry0 = getAsCarry(TLI, N0);
3747 if (!Carry0)
3748 return SDValue();
3749 SDValue Carry1 = getAsCarry(TLI, N1);
3750 if (!Carry1)
3751 return SDValue();
3752
3753 unsigned Opcode = Carry0.getOpcode();
3754 if (Opcode != Carry1.getOpcode())
3755 return SDValue();
3756 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3757 return SDValue();
3758 // Guarantee identical type of CarryOut
3759 EVT CarryOutType = N->getValueType(0);
3760 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3761 CarryOutType != Carry1.getValue(1).getValueType())
3762 return SDValue();
3763
3764 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3765 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3766 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3767 std::swap(Carry0, Carry1);
3768
3769 // Check if nodes are connected in expected way.
3770 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3771 Carry1.getOperand(1) != Carry0.getValue(0))
3772 return SDValue();
3773
3774 // The carry in value must be on the righthand side for subtraction.
3775 unsigned CarryInOperandNum =
3776 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3777 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3778 return SDValue();
3779 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3780
3781 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3782 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3783 return SDValue();
3784
3785 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3786 CarryIn = getAsCarry(TLI, CarryIn, true);
3787 if (!CarryIn)
3788 return SDValue();
3789
3790 SDLoc DL(N);
3791 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3792 Carry1->getValueType(0));
3793 SDValue Merged =
3794 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3795 Carry0.getOperand(1), CarryIn);
3796
3797 // Please note that because we have proven that the result of the UADDO/USUBO
3798 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3799 // therefore prove that if the first UADDO/USUBO overflows, the second
3800 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3801 // maximum value.
3802 //
3803 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3804 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3805 //
3806 // This is important because it means that OR and XOR can be used to merge
3807 // carry flags; and that AND can return a constant zero.
3808 //
3809 // TODO: match other operations that can merge flags (ADD, etc)
3810 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3811 if (N->getOpcode() == ISD::AND)
3812 return DAG.getConstant(0, DL, CarryOutType);
3813 return Merged.getValue(1);
3814}
3815
3816SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3817 SDValue CarryIn, SDNode *N) {
3818 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3819 // carry.
3820 if (isBitwiseNot(N0))
3821 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3822 SDLoc DL(N);
3823 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3824 N0.getOperand(0), NotC);
3825 return CombineTo(
3826 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3827 }
3828
3829 // Iff the flag result is dead:
3830 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3831 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3832 // or the dependency between the instructions.
3833 if ((N0.getOpcode() == ISD::ADD ||
3834 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3835 N0.getValue(1) != CarryIn)) &&
3836 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3837 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3838 N0.getOperand(0), N0.getOperand(1), CarryIn);
3839
3840 /**
3841 * When one of the uaddo_carry argument is itself a carry, we may be facing
3842 * a diamond carry propagation. In which case we try to transform the DAG
3843 * to ensure linear carry propagation if that is possible.
3844 */
3845 if (auto Y = getAsCarry(TLI, N1)) {
3846 // Because both are carries, Y and Z can be swapped.
3847 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3848 return R;
3849 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3850 return R;
3851 }
3852
3853 return SDValue();
3854}
3855
3856SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3857 SDValue CarryIn, SDNode *N) {
3858 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3859 if (isBitwiseNot(N0)) {
3860 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3861 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3862 N0.getOperand(0), NotC);
3863 }
3864
3865 return SDValue();
3866}
3867
3868SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3869 SDValue N0 = N->getOperand(0);
3870 SDValue N1 = N->getOperand(1);
3871 SDValue CarryIn = N->getOperand(2);
3872 SDLoc DL(N);
3873
3874 // canonicalize constant to RHS
3875 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3876 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3877 if (N0C && !N1C)
3878 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3879
3880 // fold (saddo_carry x, y, false) -> (saddo x, y)
3881 if (isNullConstant(CarryIn)) {
3882 if (!LegalOperations ||
3883 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3884 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3885 }
3886
3887 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3888 return Combined;
3889
3890 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3891 return Combined;
3892
3893 return SDValue();
3894}
3895
3896// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3897// clamp/truncation if necessary.
3899 SDValue RHS, SelectionDAG &DAG,
3900 const SDLoc &DL) {
3901 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3902 "Illegal truncation");
3903
3904 if (DstVT == SrcVT)
3905 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3906
3907 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3908 // clamping RHS.
3910 DstVT.getScalarSizeInBits());
3911 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3912 return SDValue();
3913
3914 SDValue SatLimit =
3916 DstVT.getScalarSizeInBits()),
3917 DL, SrcVT);
3918 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3919 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3920 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3921 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3922}
3923
3924// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3925// usubsat(a,b), optionally as a truncated type.
3926SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3927 if (N->getOpcode() != ISD::SUB ||
3928 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3929 return SDValue();
3930
3931 EVT SubVT = N->getValueType(0);
3932 SDValue Op0 = N->getOperand(0);
3933 SDValue Op1 = N->getOperand(1);
3934
3935 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3936 // they may be converted to usubsat(a,b).
3937 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3938 SDValue MaxLHS = Op0.getOperand(0);
3939 SDValue MaxRHS = Op0.getOperand(1);
3940 if (MaxLHS == Op1)
3941 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3942 if (MaxRHS == Op1)
3943 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3944 }
3945
3946 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3947 SDValue MinLHS = Op1.getOperand(0);
3948 SDValue MinRHS = Op1.getOperand(1);
3949 if (MinLHS == Op0)
3950 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3951 if (MinRHS == Op0)
3952 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3953 }
3954
3955 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3956 if (Op1.getOpcode() == ISD::TRUNCATE &&
3957 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3958 Op1.getOperand(0).hasOneUse()) {
3959 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3960 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3961 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3962 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3963 DAG, DL);
3964 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3965 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3966 DAG, DL);
3967 }
3968
3969 return SDValue();
3970}
3971
3972// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3973// counting leading ones. Broadly, it replaces the substraction with a left
3974// shift.
3975//
3976// * DAG Legalisation Pattern:
3977//
3978// (sub (ctlz (zeroextend (not Src)))
3979// BitWidthDiff)
3980//
3981// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3982// -->
3983//
3984// (ctlz_zero_undef (not (shl (anyextend Src)
3985// BitWidthDiff)))
3986//
3987// * Type Legalisation Pattern:
3988//
3989// (sub (ctlz (and (xor Src XorMask)
3990// AndMask))
3991// BitWidthDiff)
3992//
3993// if AndMask has only trailing ones
3994// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3995// and XorMask has more trailing ones than AndMask
3996// -->
3997//
3998// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3999template <class MatchContextClass>
4001 const SDLoc DL(N);
4002 SDValue N0 = N->getOperand(0);
4003 EVT VT = N0.getValueType();
4004 unsigned BitWidth = VT.getScalarSizeInBits();
4005
4006 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
4007
4008 APInt AndMask;
4009 APInt XorMask;
4010 APInt BitWidthDiff;
4011
4012 SDValue CtlzOp;
4013 SDValue Src;
4014
4015 if (!sd_context_match(
4016 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
4017 return SDValue();
4018
4019 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
4020 // DAG Legalisation Pattern:
4021 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
4022 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
4023 return SDValue();
4024
4025 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
4026 } else if (sd_context_match(CtlzOp, Matcher,
4027 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
4028 m_ConstInt(AndMask)))) {
4029 // Type Legalisation Pattern:
4030 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
4031 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
4032 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
4033 return SDValue();
4034 } else
4035 return SDValue();
4036
4037 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4038 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4039 SDValue Not =
4040 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4041
4042 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4043}
4044
4045// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4047 const SDLoc &DL) {
4048 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4049 SDValue Sub0 = N->getOperand(0);
4050 SDValue Sub1 = N->getOperand(1);
4051
4052 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4053 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4054 DivRem.getOpcode() == ISD::UDIVREM) &&
4055 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4056 DivRem.getOperand(1) == MaybeY) {
4057 return SDValue(DivRem.getNode(), 1);
4058 }
4059 return SDValue();
4060 };
4061
4062 if (Sub1.getOpcode() == ISD::MUL) {
4063 // (sub x, (mul divrem(x,y)[0], y))
4064 SDValue Mul0 = Sub1.getOperand(0);
4065 SDValue Mul1 = Sub1.getOperand(1);
4066
4067 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4068 return Res;
4069
4070 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4071 return Res;
4072
4073 } else if (Sub1.getOpcode() == ISD::SHL) {
4074 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4075 SDValue Shl0 = Sub1.getOperand(0);
4076 SDValue Shl1 = Sub1.getOperand(1);
4077 // Check if Shl0 is divrem(x, Y)[0]
4078 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4079 Shl0.getOpcode() == ISD::UDIVREM) &&
4080 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4081
4082 SDValue Divisor = Shl0.getOperand(1);
4083
4084 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4086 if (!DivC || !ShC)
4087 return SDValue();
4088
4089 if (DivC->getAPIntValue().isPowerOf2() &&
4090 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4091 return SDValue(Shl0.getNode(), 1);
4092 }
4093 }
4094 return SDValue();
4095}
4096
4097// Since it may not be valid to emit a fold to zero for vector initializers
4098// check if we can before folding.
4099static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4100 SelectionDAG &DAG, bool LegalOperations) {
4101 if (!VT.isVector())
4102 return DAG.getConstant(0, DL, VT);
4103 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4104 return DAG.getConstant(0, DL, VT);
4105 return SDValue();
4106}
4107
4108SDValue DAGCombiner::visitSUB(SDNode *N) {
4109 SDValue N0 = N->getOperand(0);
4110 SDValue N1 = N->getOperand(1);
4111 EVT VT = N0.getValueType();
4112 unsigned BitWidth = VT.getScalarSizeInBits();
4113 SDLoc DL(N);
4114
4116 return V;
4117
4118 // fold (sub x, x) -> 0
4119 if (N0 == N1)
4120 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4121
4122 // fold (sub c1, c2) -> c3
4123 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4124 return C;
4125
4126 // fold vector ops
4127 if (VT.isVector()) {
4128 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4129 return FoldedVOp;
4130
4131 // fold (sub x, 0) -> x, vector edition
4133 return N0;
4134 }
4135
4136 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4137 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4138 if (N1.hasOneUse() && hasUMin(VT)) {
4139 SDValue Y;
4140 auto MS0 = m_Specific(N0);
4141 auto MVY = m_Value(Y);
4142 auto MZ = m_Zero();
4143 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4144 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4145
4146 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4147 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4148 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4149 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4150
4151 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4152 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4153 }
4154
4155 if (SDValue NewSel = foldBinOpIntoSelect(N))
4156 return NewSel;
4157
4158 // fold (sub x, c) -> (add x, -c)
4159 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4160 return DAG.getNode(ISD::ADD, DL, VT, N0,
4161 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4162
4163 if (isNullOrNullSplat(N0)) {
4164 // Right-shifting everything out but the sign bit followed by negation is
4165 // the same as flipping arithmetic/logical shift type without the negation:
4166 // -(X >>u 31) -> (X >>s 31)
4167 // -(X >>s 31) -> (X >>u 31)
4168 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4169 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4170 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4171 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4172 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4173 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4174 }
4175 }
4176
4177 // 0 - X --> 0 if the sub is NUW.
4178 if (N->getFlags().hasNoUnsignedWrap())
4179 return N0;
4180
4182 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4183 // N1 must be 0 because negating the minimum signed value is undefined.
4184 if (N->getFlags().hasNoSignedWrap())
4185 return N0;
4186
4187 // 0 - X --> X if X is 0 or the minimum signed value.
4188 return N1;
4189 }
4190
4191 // Convert 0 - abs(x).
4192 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4194 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4195 return Result;
4196
4197 // Similar to the previous rule, but this time targeting an expanded abs.
4198 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4199 // as well as
4200 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4201 // Note that these two are applicable to both signed and unsigned min/max.
4202 SDValue X;
4203 SDValue S0;
4204 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4205 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4206 m_UMax(m_Value(X), NegPat),
4207 m_SMin(m_Value(X), NegPat),
4208 m_UMin(m_Value(X), NegPat))))) {
4209 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4210 if (hasOperation(NewOpc, VT))
4211 return DAG.getNode(NewOpc, DL, VT, X, S0);
4212 }
4213
4214 // Fold neg(splat(neg(x)) -> splat(x)
4215 if (VT.isVector()) {
4216 SDValue N1S = DAG.getSplatValue(N1, true);
4217 if (N1S && N1S.getOpcode() == ISD::SUB &&
4218 isNullConstant(N1S.getOperand(0)))
4219 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4220 }
4221
4222 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4223 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4224 isOneOrOneSplat(N1->getOperand(1))) {
4225 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 1);
4226 if (VT.isVector())
4227 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
4231 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4232 DAG.getValueType(ExtVT));
4233 }
4234 }
4235 }
4236
4237 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4239 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4240
4241 // fold (A - (0-B)) -> A+B
4242 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4243 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4244
4245 // fold A-(A-B) -> B
4246 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4247 return N1.getOperand(1);
4248
4249 // fold (A+B)-A -> B
4250 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4251 return N0.getOperand(1);
4252
4253 // fold (A+B)-B -> A
4254 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4255 return N0.getOperand(0);
4256
4257 // fold (A+C1)-C2 -> A+(C1-C2)
4258 if (N0.getOpcode() == ISD::ADD) {
4259 SDValue N01 = N0.getOperand(1);
4260 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4261 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4262 }
4263
4264 // fold C2-(A+C1) -> (C2-C1)-A
4265 if (N1.getOpcode() == ISD::ADD) {
4266 SDValue N11 = N1.getOperand(1);
4267 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4268 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4269 }
4270
4271 // fold (A-C1)-C2 -> A-(C1+C2)
4272 if (N0.getOpcode() == ISD::SUB) {
4273 SDValue N01 = N0.getOperand(1);
4274 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4275 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4276 }
4277
4278 // fold (c1-A)-c2 -> (c1-c2)-A
4279 if (N0.getOpcode() == ISD::SUB) {
4280 SDValue N00 = N0.getOperand(0);
4281 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4282 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4283 }
4284
4285 SDValue A, B, C;
4286
4287 // fold ((A+(B+C))-B) -> A+C
4288 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4289 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4290
4291 // fold ((A+(B-C))-B) -> A-C
4292 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4293 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4294
4295 // fold ((A-(B-C))-C) -> A-B
4296 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4297 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4298
4299 // fold (A-(B-C)) -> A+(C-B)
4300 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4301 return DAG.getNode(ISD::ADD, DL, VT, N0,
4302 DAG.getNode(ISD::SUB, DL, VT, C, B));
4303
4304 // A - (A & B) -> A & (~B)
4305 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4306 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4307 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4308
4309 // fold (A - (-B * C)) -> (A + (B * C))
4310 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4311 return DAG.getNode(ISD::ADD, DL, VT, N0,
4312 DAG.getNode(ISD::MUL, DL, VT, B, C));
4313
4314 // If either operand of a sub is undef, the result is undef
4315 if (N0.isUndef())
4316 return N0;
4317 if (N1.isUndef())
4318 return N1;
4319
4320 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4321 return V;
4322
4323 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4324 return V;
4325
4326 // Try to match AVGCEIL fixedwidth pattern
4327 if (SDValue V = foldSubToAvg(N, DL))
4328 return V;
4329
4330 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4331 return V;
4332
4333 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4334 return V;
4335
4336 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4337 return V;
4338
4339 // (A - B) - 1 -> add (xor B, -1), A
4341 m_One(/*AllowUndefs=*/true))))
4342 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4343
4344 // Look for:
4345 // sub y, (xor x, -1)
4346 // And if the target does not like this form then turn into:
4347 // add (add x, y), 1
4348 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4349 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4350 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4351 }
4352
4353 // Hoist one-use addition by non-opaque constant:
4354 // (x + C) - y -> (x - y) + C
4355 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4356 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4357 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4358 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4359 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4360 }
4361 // y - (x + C) -> (y - x) - C
4362 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4363 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4364 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4365 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4366 }
4367 // (x - C) - y -> (x - y) - C
4368 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4369 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4370 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4371 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4372 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4373 }
4374 // (C - x) - y -> C - (x + y)
4375 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4376 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4377 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4378 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4379 }
4380
4381 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4382 // rather than 'sub 0/1' (the sext should get folded).
4383 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4384 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4385 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4386 TLI.getBooleanContents(VT) ==
4388 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4389 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4390 }
4391
4392 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4393 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4395 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4396 return DAG.getNode(ISD::ABS, DL, VT, A);
4397
4398 // If the relocation model supports it, consider symbol offsets.
4399 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4400 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4401 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4402 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4403 if (GA->getGlobal() == GB->getGlobal())
4404 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4405 DL, VT);
4406 }
4407
4408 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4409 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4410 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4411 if (TN->getVT() == MVT::i1) {
4412 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4413 DAG.getConstant(1, DL, VT));
4414 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4415 }
4416 }
4417
4418 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4419 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4420 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4421 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4422 }
4423
4424 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4425 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4426 APInt NewStep = -N1.getConstantOperandAPInt(0);
4427 return DAG.getNode(ISD::ADD, DL, VT, N0,
4428 DAG.getStepVector(DL, VT, NewStep));
4429 }
4430
4431 // Prefer an add for more folding potential and possibly better codegen:
4432 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4433 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4434 SDValue ShAmt = N1.getOperand(1);
4435 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4436 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4437 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4438 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4439 }
4440 }
4441
4442 // As with the previous fold, prefer add for more folding potential.
4443 // Subtracting SMIN/0 is the same as adding SMIN/0:
4444 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4445 if (N1.getOpcode() == ISD::SHL) {
4446 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4447 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4448 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4449 }
4450
4451 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4452 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4453 N0.getResNo() == 0 && N0.hasOneUse())
4454 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4455 N0.getOperand(0), N1, N0.getOperand(2));
4456
4458 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4459 if (SDValue Carry = getAsCarry(TLI, N0)) {
4460 SDValue X = N1;
4461 SDValue Zero = DAG.getConstant(0, DL, VT);
4462 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4463 return DAG.getNode(ISD::UADDO_CARRY, DL,
4464 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4465 Carry);
4466 }
4467 }
4468
4469 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4470 // sub C0, X --> xor X, C0
4471 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4472 if (!C0->isOpaque()) {
4473 const APInt &C0Val = C0->getAPIntValue();
4474 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4475 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4476 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4477 }
4478 }
4479
4480 // smax(a,b) - smin(a,b) --> abds(a,b)
4481 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4482 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4484 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4485
4486 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4487 if (hasOperation(ISD::ABDS, VT) &&
4488 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4490 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4491
4492 // umax(a,b) - umin(a,b) --> abdu(a,b)
4493 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4494 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4496 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4497
4498 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4499 if (hasOperation(ISD::ABDU, VT) &&
4500 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4502 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4503
4504 return SDValue();
4505}
4506
4507SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4508 unsigned Opcode = N->getOpcode();
4509 SDValue N0 = N->getOperand(0);
4510 SDValue N1 = N->getOperand(1);
4511 EVT VT = N0.getValueType();
4512 bool IsSigned = Opcode == ISD::SSUBSAT;
4513 SDLoc DL(N);
4514
4515 // fold (sub_sat x, undef) -> 0
4516 if (N0.isUndef() || N1.isUndef())
4517 return DAG.getConstant(0, DL, VT);
4518
4519 // fold (sub_sat x, x) -> 0
4520 if (N0 == N1)
4521 return DAG.getConstant(0, DL, VT);
4522
4523 // fold (sub_sat c1, c2) -> c3
4524 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4525 return C;
4526
4527 // fold vector ops
4528 if (VT.isVector()) {
4529 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4530 return FoldedVOp;
4531
4532 // fold (sub_sat x, 0) -> x, vector edition
4534 return N0;
4535 }
4536
4537 // fold (sub_sat x, 0) -> x
4538 if (isNullConstant(N1))
4539 return N0;
4540
4541 // If it cannot overflow, transform into an sub.
4542 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4543 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4544
4545 return SDValue();
4546}
4547
4548SDValue DAGCombiner::visitSUBC(SDNode *N) {
4549 SDValue N0 = N->getOperand(0);
4550 SDValue N1 = N->getOperand(1);
4551 EVT VT = N0.getValueType();
4552 SDLoc DL(N);
4553
4554 // If the flag result is dead, turn this into an SUB.
4555 if (!N->hasAnyUseOfValue(1))
4556 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4557 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4558
4559 // fold (subc x, x) -> 0 + no borrow
4560 if (N0 == N1)
4561 return CombineTo(N, DAG.getConstant(0, DL, VT),
4562 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4563
4564 // fold (subc x, 0) -> x + no borrow
4565 if (isNullConstant(N1))
4566 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4567
4568 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4569 if (isAllOnesConstant(N0))
4570 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4571 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4572
4573 return SDValue();
4574}
4575
4576SDValue DAGCombiner::visitSUBO(SDNode *N) {
4577 SDValue N0 = N->getOperand(0);
4578 SDValue N1 = N->getOperand(1);
4579 EVT VT = N0.getValueType();
4580 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4581
4582 EVT CarryVT = N->getValueType(1);
4583 SDLoc DL(N);
4584
4585 // If the flag result is dead, turn this into an SUB.
4586 if (!N->hasAnyUseOfValue(1))
4587 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4588 DAG.getUNDEF(CarryVT));
4589
4590 // fold (subo x, x) -> 0 + no borrow
4591 if (N0 == N1)
4592 return CombineTo(N, DAG.getConstant(0, DL, VT),
4593 DAG.getConstant(0, DL, CarryVT));
4594
4595 // fold (subox, c) -> (addo x, -c)
4596 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4597 if (IsSigned && !N1C->isMinSignedValue())
4598 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4599 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4600
4601 // fold (subo x, 0) -> x + no borrow
4602 if (isNullOrNullSplat(N1))
4603 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4604
4605 // If it cannot overflow, transform into an sub.
4606 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4607 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4608 DAG.getConstant(0, DL, CarryVT));
4609
4610 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4611 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4612 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4613 DAG.getConstant(0, DL, CarryVT));
4614
4615 return SDValue();
4616}
4617
4618SDValue DAGCombiner::visitSUBE(SDNode *N) {
4619 SDValue N0 = N->getOperand(0);
4620 SDValue N1 = N->getOperand(1);
4621 SDValue CarryIn = N->getOperand(2);
4622
4623 // fold (sube x, y, false) -> (subc x, y)
4624 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4625 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4626
4627 return SDValue();
4628}
4629
4630SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4631 SDValue N0 = N->getOperand(0);
4632 SDValue N1 = N->getOperand(1);
4633 SDValue CarryIn = N->getOperand(2);
4634
4635 // fold (usubo_carry x, y, false) -> (usubo x, y)
4636 if (isNullConstant(CarryIn)) {
4637 if (!LegalOperations ||
4638 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4639 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4640 }
4641
4642 return SDValue();
4643}
4644
4645SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4646 SDValue N0 = N->getOperand(0);
4647 SDValue N1 = N->getOperand(1);
4648 SDValue CarryIn = N->getOperand(2);
4649
4650 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4651 if (isNullConstant(CarryIn)) {
4652 if (!LegalOperations ||
4653 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4654 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4655 }
4656
4657 return SDValue();
4658}
4659
4660// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4661// UMULFIXSAT here.
4662SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4663 SDValue N0 = N->getOperand(0);
4664 SDValue N1 = N->getOperand(1);
4665 SDValue Scale = N->getOperand(2);
4666 EVT VT = N0.getValueType();
4667
4668 // fold (mulfix x, undef, scale) -> 0
4669 if (N0.isUndef() || N1.isUndef())
4670 return DAG.getConstant(0, SDLoc(N), VT);
4671
4672 // Canonicalize constant to RHS (vector doesn't have to splat)
4675 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4676
4677 // fold (mulfix x, 0, scale) -> 0
4678 if (isNullConstant(N1))
4679 return DAG.getConstant(0, SDLoc(N), VT);
4680
4681 return SDValue();
4682}
4683
4684template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4685 SDValue N0 = N->getOperand(0);
4686 SDValue N1 = N->getOperand(1);
4687 EVT VT = N0.getValueType();
4688 unsigned BitWidth = VT.getScalarSizeInBits();
4689 SDLoc DL(N);
4690 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4691 MatchContextClass Matcher(DAG, TLI, N);
4692
4693 // fold (mul x, undef) -> 0
4694 if (N0.isUndef() || N1.isUndef())
4695 return DAG.getConstant(0, DL, VT);
4696
4697 // fold (mul c1, c2) -> c1*c2
4698 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4699 return C;
4700
4701 // canonicalize constant to RHS (vector doesn't have to splat)
4704 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4705
4706 bool N1IsConst = false;
4707 bool N1IsOpaqueConst = false;
4708 APInt ConstValue1;
4709
4710 // fold vector ops
4711 if (VT.isVector()) {
4712 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4713 if (!UseVP)
4714 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4715 return FoldedVOp;
4716
4717 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4718 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4719 "Splat APInt should be element width");
4720 } else {
4721 N1IsConst = isa<ConstantSDNode>(N1);
4722 if (N1IsConst) {
4723 ConstValue1 = N1->getAsAPIntVal();
4724 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4725 }
4726 }
4727
4728 // fold (mul x, 0) -> 0
4729 if (N1IsConst && ConstValue1.isZero())
4730 return N1;
4731
4732 // fold (mul x, 1) -> x
4733 if (N1IsConst && ConstValue1.isOne())
4734 return N0;
4735
4736 if (!UseVP)
4737 if (SDValue NewSel = foldBinOpIntoSelect(N))
4738 return NewSel;
4739
4740 // fold (mul x, -1) -> 0-x
4741 if (N1IsConst && ConstValue1.isAllOnes())
4742 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4743
4744 // fold (mul x, (1 << c)) -> x << c
4745 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4746 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4747 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4748 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4749 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4750 SDNodeFlags Flags;
4751 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4752 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4753 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4754 }
4755 }
4756
4757 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4758 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4759 unsigned Log2Val = (-ConstValue1).logBase2();
4760
4761 // FIXME: If the input is something that is easily negated (e.g. a
4762 // single-use add), we should put the negate there.
4763 return Matcher.getNode(
4764 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4765 Matcher.getNode(ISD::SHL, DL, VT, N0,
4766 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4767 }
4768
4769 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4770 // hi result is in use in case we hit this mid-legalization.
4771 if (!UseVP) {
4772 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4773 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4774 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4775 // TODO: Can we match commutable operands with getNodeIfExists?
4776 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4777 if (LoHi->hasAnyUseOfValue(1))
4778 return SDValue(LoHi, 0);
4779 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4780 if (LoHi->hasAnyUseOfValue(1))
4781 return SDValue(LoHi, 0);
4782 }
4783 }
4784 }
4785
4786 // Try to transform:
4787 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4788 // mul x, (2^N + 1) --> add (shl x, N), x
4789 // mul x, (2^N - 1) --> sub (shl x, N), x
4790 // Examples: x * 33 --> (x << 5) + x
4791 // x * 15 --> (x << 4) - x
4792 // x * -33 --> -((x << 5) + x)
4793 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4794 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4795 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4796 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4797 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4798 // x * 0xf800 --> (x << 16) - (x << 11)
4799 // x * -0x8800 --> -((x << 15) + (x << 11))
4800 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4801 if (!UseVP && N1IsConst &&
4802 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4803 // TODO: We could handle more general decomposition of any constant by
4804 // having the target set a limit on number of ops and making a
4805 // callback to determine that sequence (similar to sqrt expansion).
4806 unsigned MathOp = ISD::DELETED_NODE;
4807 APInt MulC = ConstValue1.abs();
4808 // The constant `2` should be treated as (2^0 + 1).
4809 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4810 MulC.lshrInPlace(TZeros);
4811 if ((MulC - 1).isPowerOf2())
4812 MathOp = ISD::ADD;
4813 else if ((MulC + 1).isPowerOf2())
4814 MathOp = ISD::SUB;
4815
4816 if (MathOp != ISD::DELETED_NODE) {
4817 unsigned ShAmt =
4818 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4819 ShAmt += TZeros;
4820 assert(ShAmt < BitWidth &&
4821 "multiply-by-constant generated out of bounds shift");
4822 SDValue Shl =
4823 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4824 SDValue R =
4825 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4826 DAG.getNode(ISD::SHL, DL, VT, N0,
4827 DAG.getConstant(TZeros, DL, VT)))
4828 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4829 if (ConstValue1.isNegative())
4830 R = DAG.getNegative(R, DL, VT);
4831 return R;
4832 }
4833 }
4834
4835 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4836 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4837 SDValue N01 = N0.getOperand(1);
4838 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4839 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4840 }
4841
4842 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4843 // use.
4844 {
4845 SDValue Sh, Y;
4846
4847 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4848 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4850 Sh = N0; Y = N1;
4851 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4853 Sh = N1; Y = N0;
4854 }
4855
4856 if (Sh.getNode()) {
4857 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4858 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4859 }
4860 }
4861
4862 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4863 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4867 return Matcher.getNode(
4868 ISD::ADD, DL, VT,
4869 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4870 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4871
4872 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4873 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4874 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4875 const APInt &C0 = N0.getConstantOperandAPInt(0);
4876 const APInt &C1 = NC1->getAPIntValue();
4877 return DAG.getVScale(DL, VT, C0 * C1);
4878 }
4879
4880 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4881 APInt MulVal;
4882 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4883 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4884 const APInt &C0 = N0.getConstantOperandAPInt(0);
4885 APInt NewStep = C0 * MulVal;
4886 return DAG.getStepVector(DL, VT, NewStep);
4887 }
4888
4889 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4890 SDValue X;
4891 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4893 N, Matcher,
4895 m_Deferred(X)))) {
4896 return Matcher.getNode(ISD::ABS, DL, VT, X);
4897 }
4898
4899 // Fold ((mul x, 0/undef) -> 0,
4900 // (mul x, 1) -> x) -> x)
4901 // -> and(x, mask)
4902 // We can replace vectors with '0' and '1' factors with a clearing mask.
4903 if (VT.isFixedLengthVector()) {
4904 unsigned NumElts = VT.getVectorNumElements();
4905 SmallBitVector ClearMask;
4906 ClearMask.reserve(NumElts);
4907 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4908 if (!V || V->isZero()) {
4909 ClearMask.push_back(true);
4910 return true;
4911 }
4912 ClearMask.push_back(false);
4913 return V->isOne();
4914 };
4915 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4916 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4917 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4918 EVT LegalSVT = N1.getOperand(0).getValueType();
4919 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4920 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4922 for (unsigned I = 0; I != NumElts; ++I)
4923 if (ClearMask[I])
4924 Mask[I] = Zero;
4925 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4926 }
4927 }
4928
4929 // reassociate mul
4930 // TODO: Change reassociateOps to support vp ops.
4931 if (!UseVP)
4932 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4933 return RMUL;
4934
4935 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4936 // TODO: Change reassociateReduction to support vp ops.
4937 if (!UseVP)
4938 if (SDValue SD =
4939 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4940 return SD;
4941
4942 // Simplify the operands using demanded-bits information.
4944 return SDValue(N, 0);
4945
4946 return SDValue();
4947}
4948
4949/// Return true if divmod libcall is available.
4951 const TargetLowering &TLI) {
4952 RTLIB::Libcall LC;
4953 EVT NodeType = Node->getValueType(0);
4954 if (!NodeType.isSimple())
4955 return false;
4956 switch (NodeType.getSimpleVT().SimpleTy) {
4957 default: return false; // No libcall for vector types.
4958 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4959 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4960 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4961 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4962 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4963 }
4964
4965 return TLI.getLibcallName(LC) != nullptr;
4966}
4967
4968/// Issue divrem if both quotient and remainder are needed.
4969SDValue DAGCombiner::useDivRem(SDNode *Node) {
4970 if (Node->use_empty())
4971 return SDValue(); // This is a dead node, leave it alone.
4972
4973 unsigned Opcode = Node->getOpcode();
4974 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4975 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4976
4977 // DivMod lib calls can still work on non-legal types if using lib-calls.
4978 EVT VT = Node->getValueType(0);
4979 if (VT.isVector() || !VT.isInteger())
4980 return SDValue();
4981
4982 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4983 return SDValue();
4984
4985 // If DIVREM is going to get expanded into a libcall,
4986 // but there is no libcall available, then don't combine.
4987 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4989 return SDValue();
4990
4991 // If div is legal, it's better to do the normal expansion
4992 unsigned OtherOpcode = 0;
4993 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4994 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4995 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4996 return SDValue();
4997 } else {
4998 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4999 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
5000 return SDValue();
5001 }
5002
5003 SDValue Op0 = Node->getOperand(0);
5004 SDValue Op1 = Node->getOperand(1);
5005 SDValue combined;
5006 for (SDNode *User : Op0->users()) {
5007 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
5008 User->use_empty())
5009 continue;
5010 // Convert the other matching node(s), too;
5011 // otherwise, the DIVREM may get target-legalized into something
5012 // target-specific that we won't be able to recognize.
5013 unsigned UserOpc = User->getOpcode();
5014 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
5015 User->getOperand(0) == Op0 &&
5016 User->getOperand(1) == Op1) {
5017 if (!combined) {
5018 if (UserOpc == OtherOpcode) {
5019 SDVTList VTs = DAG.getVTList(VT, VT);
5020 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
5021 } else if (UserOpc == DivRemOpc) {
5022 combined = SDValue(User, 0);
5023 } else {
5024 assert(UserOpc == Opcode);
5025 continue;
5026 }
5027 }
5028 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
5029 CombineTo(User, combined);
5030 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
5031 CombineTo(User, combined.getValue(1));
5032 }
5033 }
5034 return combined;
5035}
5036
5038 SDValue N0 = N->getOperand(0);
5039 SDValue N1 = N->getOperand(1);
5040 EVT VT = N->getValueType(0);
5041 SDLoc DL(N);
5042
5043 unsigned Opc = N->getOpcode();
5044 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5046
5047 // X / undef -> undef
5048 // X % undef -> undef
5049 // X / 0 -> undef
5050 // X % 0 -> undef
5051 // NOTE: This includes vectors where any divisor element is zero/undef.
5052 if (DAG.isUndef(Opc, {N0, N1}))
5053 return DAG.getUNDEF(VT);
5054
5055 // undef / X -> 0
5056 // undef % X -> 0
5057 if (N0.isUndef())
5058 return DAG.getConstant(0, DL, VT);
5059
5060 // 0 / X -> 0
5061 // 0 % X -> 0
5063 if (N0C && N0C->isZero())
5064 return N0;
5065
5066 // X / X -> 1
5067 // X % X -> 0
5068 if (N0 == N1)
5069 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5070
5071 // X / 1 -> X
5072 // X % 1 -> 0
5073 // If this is a boolean op (single-bit element type), we can't have
5074 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5075 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5076 // it's a 1.
5077 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
5078 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5079
5080 return SDValue();
5081}
5082
5083SDValue DAGCombiner::visitSDIV(SDNode *N) {
5084 SDValue N0 = N->getOperand(0);
5085 SDValue N1 = N->getOperand(1);
5086 EVT VT = N->getValueType(0);
5087 EVT CCVT = getSetCCResultType(VT);
5088 SDLoc DL(N);
5089
5090 // fold (sdiv c1, c2) -> c1/c2
5091 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5092 return C;
5093
5094 // fold vector ops
5095 if (VT.isVector())
5096 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5097 return FoldedVOp;
5098
5099 // fold (sdiv X, -1) -> 0-X
5100 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5101 if (N1C && N1C->isAllOnes())
5102 return DAG.getNegative(N0, DL, VT);
5103
5104 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5105 if (N1C && N1C->isMinSignedValue())
5106 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5107 DAG.getConstant(1, DL, VT),
5108 DAG.getConstant(0, DL, VT));
5109
5110 if (SDValue V = simplifyDivRem(N, DAG))
5111 return V;
5112
5113 if (SDValue NewSel = foldBinOpIntoSelect(N))
5114 return NewSel;
5115
5116 // If we know the sign bits of both operands are zero, strength reduce to a
5117 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5118 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5119 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5120
5121 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5122 // If the corresponding remainder node exists, update its users with
5123 // (Dividend - (Quotient * Divisor).
5124 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5125 { N0, N1 })) {
5126 // If the sdiv has the exact flag we shouldn't propagate it to the
5127 // remainder node.
5128 if (!N->getFlags().hasExact()) {
5129 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5130 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5131 AddToWorklist(Mul.getNode());
5132 AddToWorklist(Sub.getNode());
5133 CombineTo(RemNode, Sub);
5134 }
5135 }
5136 return V;
5137 }
5138
5139 // sdiv, srem -> sdivrem
5140 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5141 // true. Otherwise, we break the simplification logic in visitREM().
5142 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5143 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5144 if (SDValue DivRem = useDivRem(N))
5145 return DivRem;
5146
5147 return SDValue();
5148}
5149
5150static bool isDivisorPowerOfTwo(SDValue Divisor) {
5151 // Helper for determining whether a value is a power-2 constant scalar or a
5152 // vector of such elements.
5153 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5154 if (C->isZero() || C->isOpaque())
5155 return false;
5156 if (C->getAPIntValue().isPowerOf2())
5157 return true;
5158 if (C->getAPIntValue().isNegatedPowerOf2())
5159 return true;
5160 return false;
5161 };
5162
5163 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
5164}
5165
5166SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5167 SDLoc DL(N);
5168 EVT VT = N->getValueType(0);
5169 EVT CCVT = getSetCCResultType(VT);
5170 unsigned BitWidth = VT.getScalarSizeInBits();
5171
5172 // fold (sdiv X, pow2) -> simple ops after legalize
5173 // FIXME: We check for the exact bit here because the generic lowering gives
5174 // better results in that case. The target-specific lowering should learn how
5175 // to handle exact sdivs efficiently.
5176 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5177 // Target-specific implementation of sdiv x, pow2.
5178 if (SDValue Res = BuildSDIVPow2(N))
5179 return Res;
5180
5181 // Create constants that are functions of the shift amount value.
5182 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5183 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5184 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5185 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5186 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5187 if (!isConstantOrConstantVector(Inexact))
5188 return SDValue();
5189
5190 // Splat the sign bit into the register
5191 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5192 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5193 AddToWorklist(Sign.getNode());
5194
5195 // Add (N0 < 0) ? abs2 - 1 : 0;
5196 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5197 AddToWorklist(Srl.getNode());
5198 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5199 AddToWorklist(Add.getNode());
5200 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5201 AddToWorklist(Sra.getNode());
5202
5203 // Special case: (sdiv X, 1) -> X
5204 // Special Case: (sdiv X, -1) -> 0-X
5205 SDValue One = DAG.getConstant(1, DL, VT);
5207 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5208 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5209 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5210 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5211
5212 // If dividing by a positive value, we're done. Otherwise, the result must
5213 // be negated.
5214 SDValue Zero = DAG.getConstant(0, DL, VT);
5215 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5216
5217 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5218 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5219 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5220 return Res;
5221 }
5222
5223 // If integer divide is expensive and we satisfy the requirements, emit an
5224 // alternate sequence. Targets may check function attributes for size/speed
5225 // trade-offs.
5226 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5228 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5229 if (SDValue Op = BuildSDIV(N))
5230 return Op;
5231
5232 return SDValue();
5233}
5234
5235SDValue DAGCombiner::visitUDIV(SDNode *N) {
5236 SDValue N0 = N->getOperand(0);
5237 SDValue N1 = N->getOperand(1);
5238 EVT VT = N->getValueType(0);
5239 EVT CCVT = getSetCCResultType(VT);
5240 SDLoc DL(N);
5241
5242 // fold (udiv c1, c2) -> c1/c2
5243 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5244 return C;
5245
5246 // fold vector ops
5247 if (VT.isVector())
5248 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5249 return FoldedVOp;
5250
5251 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5252 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5253 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5254 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5255 DAG.getConstant(1, DL, VT),
5256 DAG.getConstant(0, DL, VT));
5257 }
5258
5259 if (SDValue V = simplifyDivRem(N, DAG))
5260 return V;
5261
5262 if (SDValue NewSel = foldBinOpIntoSelect(N))
5263 return NewSel;
5264
5265 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5266 // If the corresponding remainder node exists, update its users with
5267 // (Dividend - (Quotient * Divisor).
5268 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5269 { N0, N1 })) {
5270 // If the udiv has the exact flag we shouldn't propagate it to the
5271 // remainder node.
5272 if (!N->getFlags().hasExact()) {
5273 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5274 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5275 AddToWorklist(Mul.getNode());
5276 AddToWorklist(Sub.getNode());
5277 CombineTo(RemNode, Sub);
5278 }
5279 }
5280 return V;
5281 }
5282
5283 // sdiv, srem -> sdivrem
5284 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5285 // true. Otherwise, we break the simplification logic in visitREM().
5286 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5287 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5288 if (SDValue DivRem = useDivRem(N))
5289 return DivRem;
5290
5291 // Simplify the operands using demanded-bits information.
5292 // We don't have demanded bits support for UDIV so this just enables constant
5293 // folding based on known bits.
5295 return SDValue(N, 0);
5296
5297 return SDValue();
5298}
5299
5300SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5301 SDLoc DL(N);
5302 EVT VT = N->getValueType(0);
5303
5304 // fold (udiv x, (1 << c)) -> x >>u c
5305 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5306 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5307 AddToWorklist(LogBase2.getNode());
5308
5309 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5310 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5311 AddToWorklist(Trunc.getNode());
5312 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5313 }
5314 }
5315
5316 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5317 if (N1.getOpcode() == ISD::SHL) {
5318 SDValue N10 = N1.getOperand(0);
5319 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5320 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5321 AddToWorklist(LogBase2.getNode());
5322
5323 EVT ADDVT = N1.getOperand(1).getValueType();
5324 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5325 AddToWorklist(Trunc.getNode());
5326 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5327 AddToWorklist(Add.getNode());
5328 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5329 }
5330 }
5331 }
5332
5333 // fold (udiv x, c) -> alternate
5334 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5336 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5337 if (SDValue Op = BuildUDIV(N))
5338 return Op;
5339
5340 return SDValue();
5341}
5342
5343SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5344 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5345 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5346 // Target-specific implementation of srem x, pow2.
5347 if (SDValue Res = BuildSREMPow2(N))
5348 return Res;
5349 }
5350 return SDValue();
5351}
5352
5353// handles ISD::SREM and ISD::UREM
5354SDValue DAGCombiner::visitREM(SDNode *N) {
5355 unsigned Opcode = N->getOpcode();
5356 SDValue N0 = N->getOperand(0);
5357 SDValue N1 = N->getOperand(1);
5358 EVT VT = N->getValueType(0);
5359 EVT CCVT = getSetCCResultType(VT);
5360
5361 bool isSigned = (Opcode == ISD::SREM);
5362 SDLoc DL(N);
5363
5364 // fold (rem c1, c2) -> c1%c2
5365 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5366 return C;
5367
5368 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5369 // Freeze the numerator to avoid a miscompile with an undefined value.
5370 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5371 CCVT.isVector() == VT.isVector()) {
5372 SDValue F0 = DAG.getFreeze(N0);
5373 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5374 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5375 }
5376
5377 if (SDValue V = simplifyDivRem(N, DAG))
5378 return V;
5379
5380 if (SDValue NewSel = foldBinOpIntoSelect(N))
5381 return NewSel;
5382
5383 if (isSigned) {
5384 // If we know the sign bits of both operands are zero, strength reduce to a
5385 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5386 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5387 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5388 } else {
5389 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5390 // fold (urem x, pow2) -> (and x, pow2-1)
5391 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5392 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5393 AddToWorklist(Add.getNode());
5394 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5395 }
5396 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5397 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5398 // TODO: We should sink the following into isKnownToBePowerOfTwo
5399 // using a OrZero parameter analogous to our handling in ValueTracking.
5400 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5402 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5403 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5404 AddToWorklist(Add.getNode());
5405 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5406 }
5407 }
5408
5409 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5410
5411 // If X/C can be simplified by the division-by-constant logic, lower
5412 // X%C to the equivalent of X-X/C*C.
5413 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5414 // speculative DIV must not cause a DIVREM conversion. We guard against this
5415 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5416 // combine will not return a DIVREM. Regardless, checking cheapness here
5417 // makes sense since the simplification results in fatter code.
5418 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5419 if (isSigned) {
5420 // check if we can build faster implementation for srem
5421 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5422 return OptimizedRem;
5423 }
5424
5425 SDValue OptimizedDiv =
5426 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5427 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5428 // If the equivalent Div node also exists, update its users.
5429 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5430 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5431 { N0, N1 }))
5432 CombineTo(DivNode, OptimizedDiv);
5433 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5434 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5435 AddToWorklist(OptimizedDiv.getNode());
5436 AddToWorklist(Mul.getNode());
5437 return Sub;
5438 }
5439 }
5440
5441 // sdiv, srem -> sdivrem
5442 if (SDValue DivRem = useDivRem(N))
5443 return DivRem.getValue(1);
5444
5445 return SDValue();
5446}
5447
5448SDValue DAGCombiner::visitMULHS(SDNode *N) {
5449 SDValue N0 = N->getOperand(0);
5450 SDValue N1 = N->getOperand(1);
5451 EVT VT = N->getValueType(0);
5452 SDLoc DL(N);
5453
5454 // fold (mulhs c1, c2)
5455 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5456 return C;
5457
5458 // canonicalize constant to RHS.
5461 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5462
5463 if (VT.isVector()) {
5464 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5465 return FoldedVOp;
5466
5467 // fold (mulhs x, 0) -> 0
5468 // do not return N1, because undef node may exist.
5470 return DAG.getConstant(0, DL, VT);
5471 }
5472
5473 // fold (mulhs x, 0) -> 0
5474 if (isNullConstant(N1))
5475 return N1;
5476
5477 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5478 if (isOneConstant(N1))
5479 return DAG.getNode(
5480 ISD::SRA, DL, VT, N0,
5482
5483 // fold (mulhs x, undef) -> 0
5484 if (N0.isUndef() || N1.isUndef())
5485 return DAG.getConstant(0, DL, VT);
5486
5487 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5488 // plus a shift.
5489 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5490 !VT.isVector()) {
5491 MVT Simple = VT.getSimpleVT();
5492 unsigned SimpleSize = Simple.getSizeInBits();
5493 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5494 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5495 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5496 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5497 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5498 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5499 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5500 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5501 }
5502 }
5503
5504 return SDValue();
5505}
5506
5507SDValue DAGCombiner::visitMULHU(SDNode *N) {
5508 SDValue N0 = N->getOperand(0);
5509 SDValue N1 = N->getOperand(1);
5510 EVT VT = N->getValueType(0);
5511 SDLoc DL(N);
5512
5513 // fold (mulhu c1, c2)
5514 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5515 return C;
5516
5517 // canonicalize constant to RHS.
5520 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5521
5522 if (VT.isVector()) {
5523 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5524 return FoldedVOp;
5525
5526 // fold (mulhu x, 0) -> 0
5527 // do not return N1, because undef node may exist.
5529 return DAG.getConstant(0, DL, VT);
5530 }
5531
5532 // fold (mulhu x, 0) -> 0
5533 if (isNullConstant(N1))
5534 return N1;
5535
5536 // fold (mulhu x, 1) -> 0
5537 if (isOneConstant(N1))
5538 return DAG.getConstant(0, DL, VT);
5539
5540 // fold (mulhu x, undef) -> 0
5541 if (N0.isUndef() || N1.isUndef())
5542 return DAG.getConstant(0, DL, VT);
5543
5544 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5545 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5546 hasOperation(ISD::SRL, VT)) {
5547 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5548 unsigned NumEltBits = VT.getScalarSizeInBits();
5549 SDValue SRLAmt = DAG.getNode(
5550 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5551 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5552 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5553 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5554 }
5555 }
5556
5557 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5558 // plus a shift.
5559 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5560 !VT.isVector()) {
5561 MVT Simple = VT.getSimpleVT();
5562 unsigned SimpleSize = Simple.getSizeInBits();
5563 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5564 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5565 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5566 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5567 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5568 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5569 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5570 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5571 }
5572 }
5573
5574 // Simplify the operands using demanded-bits information.
5575 // We don't have demanded bits support for MULHU so this just enables constant
5576 // folding based on known bits.
5578 return SDValue(N, 0);
5579
5580 return SDValue();
5581}
5582
5583SDValue DAGCombiner::visitAVG(SDNode *N) {
5584 unsigned Opcode = N->getOpcode();
5585 SDValue N0 = N->getOperand(0);
5586 SDValue N1 = N->getOperand(1);
5587 EVT VT = N->getValueType(0);
5588 SDLoc DL(N);
5589 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5590
5591 // fold (avg c1, c2)
5592 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5593 return C;
5594
5595 // canonicalize constant to RHS.
5598 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5599
5600 if (VT.isVector())
5601 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5602 return FoldedVOp;
5603
5604 // fold (avg x, undef) -> x
5605 if (N0.isUndef())
5606 return N1;
5607 if (N1.isUndef())
5608 return N0;
5609
5610 // fold (avg x, x) --> x
5611 if (N0 == N1 && Level >= AfterLegalizeTypes)
5612 return N0;
5613
5614 // fold (avgfloor x, 0) -> x >> 1
5615 SDValue X, Y;
5617 return DAG.getNode(ISD::SRA, DL, VT, X,
5618 DAG.getShiftAmountConstant(1, VT, DL));
5620 return DAG.getNode(ISD::SRL, DL, VT, X,
5621 DAG.getShiftAmountConstant(1, VT, DL));
5622
5623 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5624 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5625 if (!IsSigned &&
5626 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5627 X.getValueType() == Y.getValueType() &&
5628 hasOperation(Opcode, X.getValueType())) {
5629 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5630 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5631 }
5632 if (IsSigned &&
5633 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5634 X.getValueType() == Y.getValueType() &&
5635 hasOperation(Opcode, X.getValueType())) {
5636 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5637 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5638 }
5639
5640 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5641 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5642 // Check if avgflooru isn't legal/custom but avgceilu is.
5643 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5644 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5645 if (DAG.isKnownNeverZero(N1))
5646 return DAG.getNode(
5647 ISD::AVGCEILU, DL, VT, N0,
5648 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5649 if (DAG.isKnownNeverZero(N0))
5650 return DAG.getNode(
5651 ISD::AVGCEILU, DL, VT, N1,
5652 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5653 }
5654
5655 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5656 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5657 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5658 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5659 SDValue Add;
5660 if (sd_match(N,
5661 m_c_BinOp(Opcode,
5663 m_One())) ||
5664 sd_match(N, m_c_BinOp(Opcode,
5666 m_Value(Y)))) {
5667
5668 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5669 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5670
5671 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5672 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5673 }
5674 }
5675
5676 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5677 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5678 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5679 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5680 }
5681
5682 return SDValue();
5683}
5684
5685SDValue DAGCombiner::visitABD(SDNode *N) {
5686 unsigned Opcode = N->getOpcode();
5687 SDValue N0 = N->getOperand(0);
5688 SDValue N1 = N->getOperand(1);
5689 EVT VT = N->getValueType(0);
5690 SDLoc DL(N);
5691
5692 // fold (abd c1, c2)
5693 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5694 return C;
5695
5696 // canonicalize constant to RHS.
5699 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5700
5701 if (VT.isVector())
5702 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5703 return FoldedVOp;
5704
5705 // fold (abd x, undef) -> 0
5706 if (N0.isUndef() || N1.isUndef())
5707 return DAG.getConstant(0, DL, VT);
5708
5709 // fold (abd x, x) -> 0
5710 if (N0 == N1)
5711 return DAG.getConstant(0, DL, VT);
5712
5713 SDValue X;
5714
5715 // fold (abds x, 0) -> abs x
5717 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5718 return DAG.getNode(ISD::ABS, DL, VT, X);
5719
5720 // fold (abdu x, 0) -> x
5722 return X;
5723
5724 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5725 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5726 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5727 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5728
5729 return SDValue();
5730}
5731
5732/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5733/// give the opcodes for the two computations that are being performed. Return
5734/// true if a simplification was made.
5735SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5736 unsigned HiOp) {
5737 // If the high half is not needed, just compute the low half.
5738 bool HiExists = N->hasAnyUseOfValue(1);
5739 if (!HiExists && (!LegalOperations ||
5740 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5741 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5742 return CombineTo(N, Res, Res);
5743 }
5744
5745 // If the low half is not needed, just compute the high half.
5746 bool LoExists = N->hasAnyUseOfValue(0);
5747 if (!LoExists && (!LegalOperations ||
5748 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5749 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5750 return CombineTo(N, Res, Res);
5751 }
5752
5753 // If both halves are used, return as it is.
5754 if (LoExists && HiExists)
5755 return SDValue();
5756
5757 // If the two computed results can be simplified separately, separate them.
5758 if (LoExists) {
5759 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5760 AddToWorklist(Lo.getNode());
5761 SDValue LoOpt = combine(Lo.getNode());
5762 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5763 (!LegalOperations ||
5764 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5765 return CombineTo(N, LoOpt, LoOpt);
5766 }
5767
5768 if (HiExists) {
5769 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5770 AddToWorklist(Hi.getNode());
5771 SDValue HiOpt = combine(Hi.getNode());
5772 if (HiOpt.getNode() && HiOpt != Hi &&
5773 (!LegalOperations ||
5774 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5775 return CombineTo(N, HiOpt, HiOpt);
5776 }
5777
5778 return SDValue();
5779}
5780
5781SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5782 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5783 return Res;
5784
5785 SDValue N0 = N->getOperand(0);
5786 SDValue N1 = N->getOperand(1);
5787 EVT VT = N->getValueType(0);
5788 SDLoc DL(N);
5789
5790 // Constant fold.
5792 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5793
5794 // canonicalize constant to RHS (vector doesn't have to splat)
5797 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5798
5799 // If the type is twice as wide is legal, transform the mulhu to a wider
5800 // multiply plus a shift.
5801 if (VT.isSimple() && !VT.isVector()) {
5802 MVT Simple = VT.getSimpleVT();
5803 unsigned SimpleSize = Simple.getSizeInBits();
5804 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5805 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5806 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5807 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5808 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5809 // Compute the high part as N1.
5810 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5811 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5812 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5813 // Compute the low part as N0.
5814 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5815 return CombineTo(N, Lo, Hi);
5816 }
5817 }
5818
5819 return SDValue();
5820}
5821
5822SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5823 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5824 return Res;
5825
5826 SDValue N0 = N->getOperand(0);
5827 SDValue N1 = N->getOperand(1);
5828 EVT VT = N->getValueType(0);
5829 SDLoc DL(N);
5830
5831 // Constant fold.
5833 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5834
5835 // canonicalize constant to RHS (vector doesn't have to splat)
5838 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5839
5840 // (umul_lohi N0, 0) -> (0, 0)
5841 if (isNullConstant(N1)) {
5842 SDValue Zero = DAG.getConstant(0, DL, VT);
5843 return CombineTo(N, Zero, Zero);
5844 }
5845
5846 // (umul_lohi N0, 1) -> (N0, 0)
5847 if (isOneConstant(N1)) {
5848 SDValue Zero = DAG.getConstant(0, DL, VT);
5849 return CombineTo(N, N0, Zero);
5850 }
5851
5852 // If the type is twice as wide is legal, transform the mulhu to a wider
5853 // multiply plus a shift.
5854 if (VT.isSimple() && !VT.isVector()) {
5855 MVT Simple = VT.getSimpleVT();
5856 unsigned SimpleSize = Simple.getSizeInBits();
5857 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5858 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5859 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5860 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5861 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5862 // Compute the high part as N1.
5863 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5864 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5865 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5866 // Compute the low part as N0.
5867 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5868 return CombineTo(N, Lo, Hi);
5869 }
5870 }
5871
5872 return SDValue();
5873}
5874
5875SDValue DAGCombiner::visitMULO(SDNode *N) {
5876 SDValue N0 = N->getOperand(0);
5877 SDValue N1 = N->getOperand(1);
5878 EVT VT = N0.getValueType();
5879 bool IsSigned = (ISD::SMULO == N->getOpcode());
5880
5881 EVT CarryVT = N->getValueType(1);
5882 SDLoc DL(N);
5883
5884 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5885 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5886
5887 // fold operation with constant operands.
5888 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5889 // multiple results.
5890 if (N0C && N1C) {
5891 bool Overflow;
5892 APInt Result =
5893 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5894 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5895 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5896 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5897 }
5898
5899 // canonicalize constant to RHS.
5902 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5903
5904 // fold (mulo x, 0) -> 0 + no carry out
5905 if (isNullOrNullSplat(N1))
5906 return CombineTo(N, DAG.getConstant(0, DL, VT),
5907 DAG.getConstant(0, DL, CarryVT));
5908
5909 // (mulo x, 2) -> (addo x, x)
5910 // FIXME: This needs a freeze.
5911 if (N1C && N1C->getAPIntValue() == 2 &&
5912 (!IsSigned || VT.getScalarSizeInBits() > 2))
5913 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5914 N->getVTList(), N0, N0);
5915
5916 // A 1 bit SMULO overflows if both inputs are 1.
5917 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5918 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5919 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5920 DAG.getConstant(0, DL, VT), ISD::SETNE);
5921 return CombineTo(N, And, Cmp);
5922 }
5923
5924 // If it cannot overflow, transform into a mul.
5925 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5926 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5927 DAG.getConstant(0, DL, CarryVT));
5928 return SDValue();
5929}
5930
5931// Function to calculate whether the Min/Max pair of SDNodes (potentially
5932// swapped around) make a signed saturate pattern, clamping to between a signed
5933// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5934// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5935// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5936// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5938 SDValue N3, ISD::CondCode CC, unsigned &BW,
5939 bool &Unsigned, SelectionDAG &DAG) {
5940 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5941 ISD::CondCode CC) {
5942 // The compare and select operand should be the same or the select operands
5943 // should be truncated versions of the comparison.
5944 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5945 return 0;
5946 // The constants need to be the same or a truncated version of each other.
5949 if (!N1C || !N3C)
5950 return 0;
5951 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5952 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5953 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5954 return 0;
5955 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5956 };
5957
5958 // Check the initial value is a SMIN/SMAX equivalent.
5959 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5960 if (!Opcode0)
5961 return SDValue();
5962
5963 // We could only need one range check, if the fptosi could never produce
5964 // the upper value.
5965 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5966 if (isNullOrNullSplat(N3)) {
5967 EVT IntVT = N0.getValueType().getScalarType();
5968 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5969 if (FPVT.isSimple()) {
5970 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5971 const fltSemantics &Semantics = InputTy->getFltSemantics();
5972 uint32_t MinBitWidth =
5973 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5974 if (IntVT.getSizeInBits() >= MinBitWidth) {
5975 Unsigned = true;
5976 BW = PowerOf2Ceil(MinBitWidth);
5977 return N0;
5978 }
5979 }
5980 }
5981 }
5982
5983 SDValue N00, N01, N02, N03;
5984 ISD::CondCode N0CC;
5985 switch (N0.getOpcode()) {
5986 case ISD::SMIN:
5987 case ISD::SMAX:
5988 N00 = N02 = N0.getOperand(0);
5989 N01 = N03 = N0.getOperand(1);
5990 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5991 break;
5992 case ISD::SELECT_CC:
5993 N00 = N0.getOperand(0);
5994 N01 = N0.getOperand(1);
5995 N02 = N0.getOperand(2);
5996 N03 = N0.getOperand(3);
5997 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5998 break;
5999 case ISD::SELECT:
6000 case ISD::VSELECT:
6001 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
6002 return SDValue();
6003 N00 = N0.getOperand(0).getOperand(0);
6004 N01 = N0.getOperand(0).getOperand(1);
6005 N02 = N0.getOperand(1);
6006 N03 = N0.getOperand(2);
6007 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
6008 break;
6009 default:
6010 return SDValue();
6011 }
6012
6013 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
6014 if (!Opcode1 || Opcode0 == Opcode1)
6015 return SDValue();
6016
6017 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
6018 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
6019 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
6020 return SDValue();
6021
6022 const APInt &MinC = MinCOp->getAPIntValue();
6023 const APInt &MaxC = MaxCOp->getAPIntValue();
6024 APInt MinCPlus1 = MinC + 1;
6025 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
6026 BW = MinCPlus1.exactLogBase2() + 1;
6027 Unsigned = false;
6028 return N02;
6029 }
6030
6031 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
6032 BW = MinCPlus1.exactLogBase2();
6033 Unsigned = true;
6034 return N02;
6035 }
6036
6037 return SDValue();
6038}
6039
6041 SDValue N3, ISD::CondCode CC,
6042 SelectionDAG &DAG) {
6043 unsigned BW;
6044 bool Unsigned;
6045 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6046 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6047 return SDValue();
6048 EVT FPVT = Fp.getOperand(0).getValueType();
6049 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6050 if (FPVT.isVector())
6051 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6052 FPVT.getVectorElementCount());
6053 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6054 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6055 return SDValue();
6056 SDLoc DL(Fp);
6057 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6058 DAG.getValueType(NewVT.getScalarType()));
6059 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6060}
6061
6063 SDValue N3, ISD::CondCode CC,
6064 SelectionDAG &DAG) {
6065 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6066 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6067 // be truncated versions of the setcc (N0/N1).
6068 if ((N0 != N2 &&
6069 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6070 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6071 return SDValue();
6074 if (!N1C || !N3C)
6075 return SDValue();
6076 const APInt &C1 = N1C->getAPIntValue();
6077 const APInt &C3 = N3C->getAPIntValue();
6078 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6079 C1 != C3.zext(C1.getBitWidth()))
6080 return SDValue();
6081
6082 unsigned BW = (C1 + 1).exactLogBase2();
6083 EVT FPVT = N0.getOperand(0).getValueType();
6084 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6085 if (FPVT.isVector())
6086 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6087 FPVT.getVectorElementCount());
6089 FPVT, NewVT))
6090 return SDValue();
6091
6092 SDValue Sat =
6093 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6094 DAG.getValueType(NewVT.getScalarType()));
6095 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6096}
6097
6098SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6099 SDValue N0 = N->getOperand(0);
6100 SDValue N1 = N->getOperand(1);
6101 EVT VT = N0.getValueType();
6102 unsigned Opcode = N->getOpcode();
6103 SDLoc DL(N);
6104
6105 // fold operation with constant operands.
6106 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6107 return C;
6108
6109 // If the operands are the same, this is a no-op.
6110 if (N0 == N1)
6111 return N0;
6112
6113 // Fold operation with vscale operands.
6114 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6115 uint64_t C0 = N0->getConstantOperandVal(0);
6116 uint64_t C1 = N1->getConstantOperandVal(0);
6117 if (Opcode == ISD::UMAX)
6118 return C0 > C1 ? N0 : N1;
6119 else if (Opcode == ISD::UMIN)
6120 return C0 > C1 ? N1 : N0;
6121 }
6122
6123 // canonicalize constant to RHS
6126 return DAG.getNode(Opcode, DL, VT, N1, N0);
6127
6128 // fold vector ops
6129 if (VT.isVector())
6130 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6131 return FoldedVOp;
6132
6133 // reassociate minmax
6134 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6135 return RMINMAX;
6136
6137 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6138 // Only do this if:
6139 // 1. The current op isn't legal and the flipped is.
6140 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6141 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6142 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6143 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6144 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6145 unsigned AltOpcode;
6146 switch (Opcode) {
6147 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6148 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6149 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6150 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6151 default: llvm_unreachable("Unknown MINMAX opcode");
6152 }
6153 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6154 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6155 }
6156
6157 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6159 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6160 return S;
6161 if (Opcode == ISD::UMIN)
6162 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6163 return S;
6164
6165 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6166 auto ReductionOpcode = [](unsigned Opcode) {
6167 switch (Opcode) {
6168 case ISD::SMIN:
6169 return ISD::VECREDUCE_SMIN;
6170 case ISD::SMAX:
6171 return ISD::VECREDUCE_SMAX;
6172 case ISD::UMIN:
6173 return ISD::VECREDUCE_UMIN;
6174 case ISD::UMAX:
6175 return ISD::VECREDUCE_UMAX;
6176 default:
6177 llvm_unreachable("Unexpected opcode");
6178 }
6179 };
6180 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6181 SDLoc(N), VT, N0, N1))
6182 return SD;
6183
6184 // Simplify the operands using demanded-bits information.
6186 return SDValue(N, 0);
6187
6188 return SDValue();
6189}
6190
6191/// If this is a bitwise logic instruction and both operands have the same
6192/// opcode, try to sink the other opcode after the logic instruction.
6193SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6194 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6195 EVT VT = N0.getValueType();
6196 unsigned LogicOpcode = N->getOpcode();
6197 unsigned HandOpcode = N0.getOpcode();
6198 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6199 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6200
6201 // Bail early if none of these transforms apply.
6202 if (N0.getNumOperands() == 0)
6203 return SDValue();
6204
6205 // FIXME: We should check number of uses of the operands to not increase
6206 // the instruction count for all transforms.
6207
6208 // Handle size-changing casts (or sign_extend_inreg).
6209 SDValue X = N0.getOperand(0);
6210 SDValue Y = N1.getOperand(0);
6211 EVT XVT = X.getValueType();
6212 SDLoc DL(N);
6213 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6214 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6215 N0.getOperand(1) == N1.getOperand(1))) {
6216 // If both operands have other uses, this transform would create extra
6217 // instructions without eliminating anything.
6218 if (!N0.hasOneUse() && !N1.hasOneUse())
6219 return SDValue();
6220 // We need matching integer source types.
6221 if (XVT != Y.getValueType())
6222 return SDValue();
6223 // Don't create an illegal op during or after legalization. Don't ever
6224 // create an unsupported vector op.
6225 if ((VT.isVector() || LegalOperations) &&
6226 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6227 return SDValue();
6228 // Avoid infinite looping with PromoteIntBinOp.
6229 // TODO: Should we apply desirable/legal constraints to all opcodes?
6230 if ((HandOpcode == ISD::ANY_EXTEND ||
6231 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6232 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6233 return SDValue();
6234 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6235 SDNodeFlags LogicFlags;
6236 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6237 ISD::isExtOpcode(HandOpcode));
6238 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6239 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6240 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6241 return DAG.getNode(HandOpcode, DL, VT, Logic);
6242 }
6243
6244 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6245 if (HandOpcode == ISD::TRUNCATE) {
6246 // If both operands have other uses, this transform would create extra
6247 // instructions without eliminating anything.
6248 if (!N0.hasOneUse() && !N1.hasOneUse())
6249 return SDValue();
6250 // We need matching source types.
6251 if (XVT != Y.getValueType())
6252 return SDValue();
6253 // Don't create an illegal op during or after legalization.
6254 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6255 return SDValue();
6256 // Be extra careful sinking truncate. If it's free, there's no benefit in
6257 // widening a binop. Also, don't create a logic op on an illegal type.
6258 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6259 return SDValue();
6260 if (!TLI.isTypeLegal(XVT))
6261 return SDValue();
6262 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6263 return DAG.getNode(HandOpcode, DL, VT, Logic);
6264 }
6265
6266 // For binops SHL/SRL/SRA/AND:
6267 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6268 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6269 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6270 N0.getOperand(1) == N1.getOperand(1)) {
6271 // If either operand has other uses, this transform is not an improvement.
6272 if (!N0.hasOneUse() || !N1.hasOneUse())
6273 return SDValue();
6274 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6275 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6276 }
6277
6278 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6279 if (HandOpcode == ISD::BSWAP) {
6280 // If either operand has other uses, this transform is not an improvement.
6281 if (!N0.hasOneUse() || !N1.hasOneUse())
6282 return SDValue();
6283 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6284 return DAG.getNode(HandOpcode, DL, VT, Logic);
6285 }
6286
6287 // For funnel shifts FSHL/FSHR:
6288 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6289 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6290 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6291 N0.getOperand(2) == N1.getOperand(2)) {
6292 if (!N0.hasOneUse() || !N1.hasOneUse())
6293 return SDValue();
6294 SDValue X1 = N0.getOperand(1);
6295 SDValue Y1 = N1.getOperand(1);
6296 SDValue S = N0.getOperand(2);
6297 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6298 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6299 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6300 }
6301
6302 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6303 // Only perform this optimization up until type legalization, before
6304 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6305 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6306 // we don't want to undo this promotion.
6307 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6308 // on scalars.
6309 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6310 Level <= AfterLegalizeTypes) {
6311 // Input types must be integer and the same.
6312 if (XVT.isInteger() && XVT == Y.getValueType() &&
6313 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6314 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6315 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6316 return DAG.getNode(HandOpcode, DL, VT, Logic);
6317 }
6318 }
6319
6320 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6321 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6322 // If both shuffles use the same mask, and both shuffle within a single
6323 // vector, then it is worthwhile to move the swizzle after the operation.
6324 // The type-legalizer generates this pattern when loading illegal
6325 // vector types from memory. In many cases this allows additional shuffle
6326 // optimizations.
6327 // There are other cases where moving the shuffle after the xor/and/or
6328 // is profitable even if shuffles don't perform a swizzle.
6329 // If both shuffles use the same mask, and both shuffles have the same first
6330 // or second operand, then it might still be profitable to move the shuffle
6331 // after the xor/and/or operation.
6332 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6333 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6334 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6335 assert(X.getValueType() == Y.getValueType() &&
6336 "Inputs to shuffles are not the same type");
6337
6338 // Check that both shuffles use the same mask. The masks are known to be of
6339 // the same length because the result vector type is the same.
6340 // Check also that shuffles have only one use to avoid introducing extra
6341 // instructions.
6342 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6343 !SVN0->getMask().equals(SVN1->getMask()))
6344 return SDValue();
6345
6346 // Don't try to fold this node if it requires introducing a
6347 // build vector of all zeros that might be illegal at this stage.
6348 SDValue ShOp = N0.getOperand(1);
6349 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6350 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6351
6352 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6353 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6354 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6355 N0.getOperand(0), N1.getOperand(0));
6356 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6357 }
6358
6359 // Don't try to fold this node if it requires introducing a
6360 // build vector of all zeros that might be illegal at this stage.
6361 ShOp = N0.getOperand(0);
6362 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6363 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6364
6365 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6366 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6367 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6368 N1.getOperand(1));
6369 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6370 }
6371 }
6372
6373 return SDValue();
6374}
6375
6376/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6377SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6378 const SDLoc &DL) {
6379 SDValue LL, LR, RL, RR, N0CC, N1CC;
6380 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6381 !isSetCCEquivalent(N1, RL, RR, N1CC))
6382 return SDValue();
6383
6384 assert(N0.getValueType() == N1.getValueType() &&
6385 "Unexpected operand types for bitwise logic op");
6386 assert(LL.getValueType() == LR.getValueType() &&
6387 RL.getValueType() == RR.getValueType() &&
6388 "Unexpected operand types for setcc");
6389
6390 // If we're here post-legalization or the logic op type is not i1, the logic
6391 // op type must match a setcc result type. Also, all folds require new
6392 // operations on the left and right operands, so those types must match.
6393 EVT VT = N0.getValueType();
6394 EVT OpVT = LL.getValueType();
6395 if (LegalOperations || VT.getScalarType() != MVT::i1)
6396 if (VT != getSetCCResultType(OpVT))
6397 return SDValue();
6398 if (OpVT != RL.getValueType())
6399 return SDValue();
6400
6401 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6402 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6403 bool IsInteger = OpVT.isInteger();
6404 if (LR == RR && CC0 == CC1 && IsInteger) {
6405 bool IsZero = isNullOrNullSplat(LR);
6406 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6407
6408 // All bits clear?
6409 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6410 // All sign bits clear?
6411 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6412 // Any bits set?
6413 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6414 // Any sign bits set?
6415 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6416
6417 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6418 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6419 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6420 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6421 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6422 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6423 AddToWorklist(Or.getNode());
6424 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6425 }
6426
6427 // All bits set?
6428 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6429 // All sign bits set?
6430 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6431 // Any bits clear?
6432 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6433 // Any sign bits clear?
6434 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6435
6436 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6437 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6438 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6439 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6440 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6441 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6442 AddToWorklist(And.getNode());
6443 return DAG.getSetCC(DL, VT, And, LR, CC1);
6444 }
6445 }
6446
6447 // TODO: What is the 'or' equivalent of this fold?
6448 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6449 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6450 IsInteger && CC0 == ISD::SETNE &&
6451 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6452 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6453 SDValue One = DAG.getConstant(1, DL, OpVT);
6454 SDValue Two = DAG.getConstant(2, DL, OpVT);
6455 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6456 AddToWorklist(Add.getNode());
6457 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6458 }
6459
6460 // Try more general transforms if the predicates match and the only user of
6461 // the compares is the 'and' or 'or'.
6462 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6463 N0.hasOneUse() && N1.hasOneUse()) {
6464 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6465 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6466 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6467 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6468 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6469 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6470 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6471 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6472 }
6473
6474 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6475 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6476 // Match a shared variable operand and 2 non-opaque constant operands.
6477 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6478 // The difference of the constants must be a single bit.
6479 const APInt &CMax =
6480 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6481 const APInt &CMin =
6482 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6483 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6484 };
6485 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6486 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6487 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6488 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6489 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6490 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6491 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6492 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6493 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6494 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6495 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6496 }
6497 }
6498 }
6499
6500 // Canonicalize equivalent operands to LL == RL.
6501 if (LL == RR && LR == RL) {
6503 std::swap(RL, RR);
6504 }
6505
6506 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6507 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6508 if (LL == RL && LR == RR) {
6509 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6510 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6511 if (NewCC != ISD::SETCC_INVALID &&
6512 (!LegalOperations ||
6513 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6514 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6515 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6516 }
6517
6518 return SDValue();
6519}
6520
6521static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6522 SelectionDAG &DAG) {
6523 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6524}
6525
6526static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6527 SelectionDAG &DAG) {
6528 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6529}
6530
6531// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6532static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6533 ISD::CondCode CC, unsigned OrAndOpcode,
6534 SelectionDAG &DAG,
6535 bool isFMAXNUMFMINNUM_IEEE,
6536 bool isFMAXNUMFMINNUM) {
6537 // The optimization cannot be applied for all the predicates because
6538 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6539 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6540 // applied at all if one of the operands is a signaling NaN.
6541
6542 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6543 // are non NaN values.
6544 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6545 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6546 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6547 isFMAXNUMFMINNUM_IEEE
6548 ? ISD::FMINNUM_IEEE
6550 }
6551
6552 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6553 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6554 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6555 isFMAXNUMFMINNUM_IEEE
6556 ? ISD::FMAXNUM_IEEE
6558 }
6559
6560 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6561 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6562 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6563 // that there are not any sNaNs, then the optimization is not valid
6564 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6565 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6566 // we can prove that we do not have any sNaNs, then we can do the
6567 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6568 // cases.
6569 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6570 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6571 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6572 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6573 isFMAXNUMFMINNUM_IEEE
6574 ? ISD::FMINNUM_IEEE
6576 }
6577
6578 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6579 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6580 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6581 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6582 isFMAXNUMFMINNUM_IEEE
6583 ? ISD::FMAXNUM_IEEE
6585 }
6586
6587 return ISD::DELETED_NODE;
6588}
6589
6592 assert(
6593 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6594 "Invalid Op to combine SETCC with");
6595
6596 // TODO: Search past casts/truncates.
6597 SDValue LHS = LogicOp->getOperand(0);
6598 SDValue RHS = LogicOp->getOperand(1);
6599 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6600 !LHS->hasOneUse() || !RHS->hasOneUse())
6601 return SDValue();
6602
6603 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6605 LogicOp, LHS.getNode(), RHS.getNode());
6606
6607 SDValue LHS0 = LHS->getOperand(0);
6608 SDValue RHS0 = RHS->getOperand(0);
6609 SDValue LHS1 = LHS->getOperand(1);
6610 SDValue RHS1 = RHS->getOperand(1);
6611 // TODO: We don't actually need a splat here, for vectors we just need the
6612 // invariants to hold for each element.
6613 auto *LHS1C = isConstOrConstSplat(LHS1);
6614 auto *RHS1C = isConstOrConstSplat(RHS1);
6615 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6616 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6617 EVT VT = LogicOp->getValueType(0);
6618 EVT OpVT = LHS0.getValueType();
6619 SDLoc DL(LogicOp);
6620
6621 // Check if the operands of an and/or operation are comparisons and if they
6622 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6623 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6624 // sequence will be replaced with min-cmp sequence:
6625 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6626 // and and-cmp-cmp will be replaced with max-cmp sequence:
6627 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6628 // The optimization does not work for `==` or `!=` .
6629 // The two comparisons should have either the same predicate or the
6630 // predicate of one of the comparisons is the opposite of the other one.
6631 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6632 TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
6633 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6634 TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
6635 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6636 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6637 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6638 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6639 (OpVT.isFloatingPoint() &&
6640 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6642 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6643 CCL != ISD::SETTRUE &&
6644 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6645
6646 SDValue CommonValue, Operand1, Operand2;
6648 if (CCL == CCR) {
6649 if (LHS0 == RHS0) {
6650 CommonValue = LHS0;
6651 Operand1 = LHS1;
6652 Operand2 = RHS1;
6654 } else if (LHS1 == RHS1) {
6655 CommonValue = LHS1;
6656 Operand1 = LHS0;
6657 Operand2 = RHS0;
6658 CC = CCL;
6659 }
6660 } else {
6661 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6662 if (LHS0 == RHS1) {
6663 CommonValue = LHS0;
6664 Operand1 = LHS1;
6665 Operand2 = RHS0;
6666 CC = CCR;
6667 } else if (RHS0 == LHS1) {
6668 CommonValue = LHS1;
6669 Operand1 = LHS0;
6670 Operand2 = RHS1;
6671 CC = CCL;
6672 }
6673 }
6674
6675 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6676 // handle it using OR/AND.
6677 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6678 CC = ISD::SETCC_INVALID;
6679 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6680 CC = ISD::SETCC_INVALID;
6681
6682 if (CC != ISD::SETCC_INVALID) {
6683 unsigned NewOpcode = ISD::DELETED_NODE;
6684 bool IsSigned = isSignedIntSetCC(CC);
6685 if (OpVT.isInteger()) {
6686 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6687 CC == ISD::SETLT || CC == ISD::SETULT);
6688 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6689 if (IsLess == IsOr)
6690 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6691 else
6692 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6693 } else if (OpVT.isFloatingPoint())
6694 NewOpcode =
6695 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6696 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6697
6698 if (NewOpcode != ISD::DELETED_NODE) {
6699 SDValue MinMaxValue =
6700 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6701 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6702 }
6703 }
6704 }
6705
6706 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6707 LHS0.getValueType() == RHS0.getValueType() &&
6708 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6709 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6710 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6711
6712 if (TargetPreference == AndOrSETCCFoldKind::None)
6713 return SDValue();
6714
6715 if (CCL == CCR &&
6716 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6717 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6718 const APInt &APLhs = LHS1C->getAPIntValue();
6719 const APInt &APRhs = RHS1C->getAPIntValue();
6720
6721 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6722 // case this is just a compare).
6723 if (APLhs == (-APRhs) &&
6724 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6725 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6726 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6727 // (icmp eq A, C) | (icmp eq A, -C)
6728 // -> (icmp eq Abs(A), C)
6729 // (icmp ne A, C) & (icmp ne A, -C)
6730 // -> (icmp ne Abs(A), C)
6731 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6732 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6733 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6734 } else if (TargetPreference &
6736
6737 // AndOrSETCCFoldKind::AddAnd:
6738 // A == C0 | A == C1
6739 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6740 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6741 // A != C0 & A != C1
6742 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6743 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6744
6745 // AndOrSETCCFoldKind::NotAnd:
6746 // A == C0 | A == C1
6747 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6748 // -> ~A & smin(C0, C1) == 0
6749 // A != C0 & A != C1
6750 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6751 // -> ~A & smin(C0, C1) != 0
6752
6753 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6754 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6755 APInt Dif = MaxC - MinC;
6756 if (!Dif.isZero() && Dif.isPowerOf2()) {
6757 if (MaxC.isAllOnes() &&
6758 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6759 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6760 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6761 DAG.getConstant(MinC, DL, OpVT));
6762 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6763 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6764 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6765
6766 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6767 DAG.getConstant(-MinC, DL, OpVT));
6768 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6769 DAG.getConstant(~Dif, DL, OpVT));
6770 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6771 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6772 }
6773 }
6774 }
6775 }
6776
6777 return SDValue();
6778}
6779
6780// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6781// We canonicalize to the `select` form in the middle end, but the `and` form
6782// gets better codegen and all tested targets (arm, x86, riscv)
6784 const SDLoc &DL, SelectionDAG &DAG) {
6785 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6786 if (!isNullConstant(F))
6787 return SDValue();
6788
6789 EVT CondVT = Cond.getValueType();
6790 if (TLI.getBooleanContents(CondVT) !=
6792 return SDValue();
6793
6794 if (T.getOpcode() != ISD::AND)
6795 return SDValue();
6796
6797 if (!isOneConstant(T.getOperand(1)))
6798 return SDValue();
6799
6800 EVT OpVT = T.getValueType();
6801
6802 SDValue CondMask =
6803 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6804 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6805}
6806
6807/// This contains all DAGCombine rules which reduce two values combined by
6808/// an And operation to a single value. This makes them reusable in the context
6809/// of visitSELECT(). Rules involving constants are not included as
6810/// visitSELECT() already handles those cases.
6811SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6812 EVT VT = N1.getValueType();
6813 SDLoc DL(N);
6814
6815 // fold (and x, undef) -> 0
6816 if (N0.isUndef() || N1.isUndef())
6817 return DAG.getConstant(0, DL, VT);
6818
6819 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6820 return V;
6821
6822 // Canonicalize:
6823 // and(x, add) -> and(add, x)
6824 if (N1.getOpcode() == ISD::ADD)
6825 std::swap(N0, N1);
6826
6827 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6828 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6829 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6830 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6831 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6832 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6833 // immediate for an add, but it is legal if its top c2 bits are set,
6834 // transform the ADD so the immediate doesn't need to be materialized
6835 // in a register.
6836 APInt ADDC = ADDI->getAPIntValue();
6837 APInt SRLC = SRLI->getAPIntValue();
6838 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6839 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6841 SRLC.getZExtValue());
6842 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6843 ADDC |= Mask;
6844 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6845 SDLoc DL0(N0);
6846 SDValue NewAdd =
6847 DAG.getNode(ISD::ADD, DL0, VT,
6848 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6849 CombineTo(N0.getNode(), NewAdd);
6850 // Return N so it doesn't get rechecked!
6851 return SDValue(N, 0);
6852 }
6853 }
6854 }
6855 }
6856 }
6857 }
6858
6859 return SDValue();
6860}
6861
6862bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6863 EVT LoadResultTy, EVT &ExtVT) {
6864 if (!AndC->getAPIntValue().isMask())
6865 return false;
6866
6867 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6868
6869 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6870 EVT LoadedVT = LoadN->getMemoryVT();
6871
6872 if (ExtVT == LoadedVT &&
6873 (!LegalOperations ||
6874 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6875 // ZEXTLOAD will match without needing to change the size of the value being
6876 // loaded.
6877 return true;
6878 }
6879
6880 // Do not change the width of a volatile or atomic loads.
6881 if (!LoadN->isSimple())
6882 return false;
6883
6884 // Do not generate loads of non-round integer types since these can
6885 // be expensive (and would be wrong if the type is not byte sized).
6886 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6887 return false;
6888
6889 if (LegalOperations &&
6890 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6891 return false;
6892
6893 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6894 return false;
6895
6896 return true;
6897}
6898
6899bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6900 ISD::LoadExtType ExtType, EVT &MemVT,
6901 unsigned ShAmt) {
6902 if (!LDST)
6903 return false;
6904
6905 // Only allow byte offsets.
6906 if (ShAmt % 8)
6907 return false;
6908 const unsigned ByteShAmt = ShAmt / 8;
6909
6910 // Do not generate loads of non-round integer types since these can
6911 // be expensive (and would be wrong if the type is not byte sized).
6912 if (!MemVT.isRound())
6913 return false;
6914
6915 // Don't change the width of a volatile or atomic loads.
6916 if (!LDST->isSimple())
6917 return false;
6918
6919 EVT LdStMemVT = LDST->getMemoryVT();
6920
6921 // Bail out when changing the scalable property, since we can't be sure that
6922 // we're actually narrowing here.
6923 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6924 return false;
6925
6926 // Verify that we are actually reducing a load width here.
6927 if (LdStMemVT.bitsLT(MemVT))
6928 return false;
6929
6930 // Ensure that this isn't going to produce an unsupported memory access.
6931 if (ShAmt) {
6932 const Align LDSTAlign = LDST->getAlign();
6933 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6934 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6935 LDST->getAddressSpace(), NarrowAlign,
6936 LDST->getMemOperand()->getFlags()))
6937 return false;
6938 }
6939
6940 // It's not possible to generate a constant of extended or untyped type.
6941 EVT PtrType = LDST->getBasePtr().getValueType();
6942 if (PtrType == MVT::Untyped || PtrType.isExtended())
6943 return false;
6944
6945 if (isa<LoadSDNode>(LDST)) {
6946 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6947 // Don't transform one with multiple uses, this would require adding a new
6948 // load.
6949 if (!SDValue(Load, 0).hasOneUse())
6950 return false;
6951
6952 if (LegalOperations &&
6953 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6954 return false;
6955
6956 // For the transform to be legal, the load must produce only two values
6957 // (the value loaded and the chain). Don't transform a pre-increment
6958 // load, for example, which produces an extra value. Otherwise the
6959 // transformation is not equivalent, and the downstream logic to replace
6960 // uses gets things wrong.
6961 if (Load->getNumValues() > 2)
6962 return false;
6963
6964 // If the load that we're shrinking is an extload and we're not just
6965 // discarding the extension we can't simply shrink the load. Bail.
6966 // TODO: It would be possible to merge the extensions in some cases.
6967 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6968 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6969 return false;
6970
6971 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
6972 return false;
6973 } else {
6974 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6975 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6976 // Can't write outside the original store
6977 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6978 return false;
6979
6980 if (LegalOperations &&
6981 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6982 return false;
6983 }
6984 return true;
6985}
6986
6987bool DAGCombiner::SearchForAndLoads(SDNode *N,
6988 SmallVectorImpl<LoadSDNode*> &Loads,
6989 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6990 ConstantSDNode *Mask,
6991 SDNode *&NodeToMask) {
6992 // Recursively search for the operands, looking for loads which can be
6993 // narrowed.
6994 for (SDValue Op : N->op_values()) {
6995 if (Op.getValueType().isVector())
6996 return false;
6997
6998 // Some constants may need fixing up later if they are too large.
6999 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7000 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
7001 "Expected bitwise logic operation");
7002 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
7003 NodesWithConsts.insert(N);
7004 continue;
7005 }
7006
7007 if (!Op.hasOneUse())
7008 return false;
7009
7010 switch(Op.getOpcode()) {
7011 case ISD::LOAD: {
7012 auto *Load = cast<LoadSDNode>(Op);
7013 EVT ExtVT;
7014 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
7015 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
7016
7017 // ZEXTLOAD is already small enough.
7018 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
7019 ExtVT.bitsGE(Load->getMemoryVT()))
7020 continue;
7021
7022 // Use LE to convert equal sized loads to zext.
7023 if (ExtVT.bitsLE(Load->getMemoryVT()))
7024 Loads.push_back(Load);
7025
7026 continue;
7027 }
7028 return false;
7029 }
7030 case ISD::ZERO_EXTEND:
7031 case ISD::AssertZext: {
7032 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
7033 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
7034 EVT VT = Op.getOpcode() == ISD::AssertZext ?
7035 cast<VTSDNode>(Op.getOperand(1))->getVT() :
7036 Op.getOperand(0).getValueType();
7037
7038 // We can accept extending nodes if the mask is wider or an equal
7039 // width to the original type.
7040 if (ExtVT.bitsGE(VT))
7041 continue;
7042 break;
7043 }
7044 case ISD::OR:
7045 case ISD::XOR:
7046 case ISD::AND:
7047 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7048 NodeToMask))
7049 return false;
7050 continue;
7051 }
7052
7053 // Allow one node which will masked along with any loads found.
7054 if (NodeToMask)
7055 return false;
7056
7057 // Also ensure that the node to be masked only produces one data result.
7058 NodeToMask = Op.getNode();
7059 if (NodeToMask->getNumValues() > 1) {
7060 bool HasValue = false;
7061 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7062 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7063 if (VT != MVT::Glue && VT != MVT::Other) {
7064 if (HasValue) {
7065 NodeToMask = nullptr;
7066 return false;
7067 }
7068 HasValue = true;
7069 }
7070 }
7071 assert(HasValue && "Node to be masked has no data result?");
7072 }
7073 }
7074 return true;
7075}
7076
7077bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7078 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7079 if (!Mask)
7080 return false;
7081
7082 if (!Mask->getAPIntValue().isMask())
7083 return false;
7084
7085 // No need to do anything if the and directly uses a load.
7086 if (isa<LoadSDNode>(N->getOperand(0)))
7087 return false;
7088
7090 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7091 SDNode *FixupNode = nullptr;
7092 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7093 if (Loads.empty())
7094 return false;
7095
7096 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7097 SDValue MaskOp = N->getOperand(1);
7098
7099 // If it exists, fixup the single node we allow in the tree that needs
7100 // masking.
7101 if (FixupNode) {
7102 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7103 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7104 FixupNode->getValueType(0),
7105 SDValue(FixupNode, 0), MaskOp);
7106 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7107 if (And.getOpcode() == ISD ::AND)
7108 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7109 }
7110
7111 // Narrow any constants that need it.
7112 for (auto *LogicN : NodesWithConsts) {
7113 SDValue Op0 = LogicN->getOperand(0);
7114 SDValue Op1 = LogicN->getOperand(1);
7115
7116 // We only need to fix AND if both inputs are constants. And we only need
7117 // to fix one of the constants.
7118 if (LogicN->getOpcode() == ISD::AND &&
7120 continue;
7121
7122 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7123 Op0 =
7124 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7125
7126 if (isa<ConstantSDNode>(Op1))
7127 Op1 =
7128 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7129
7130 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7131 std::swap(Op0, Op1);
7132
7133 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7134 }
7135
7136 // Create narrow loads.
7137 for (auto *Load : Loads) {
7138 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7139 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7140 SDValue(Load, 0), MaskOp);
7141 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7142 if (And.getOpcode() == ISD ::AND)
7143 And = SDValue(
7144 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7145 SDValue NewLoad = reduceLoadWidth(And.getNode());
7146 assert(NewLoad &&
7147 "Shouldn't be masking the load if it can't be narrowed");
7148 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7149 }
7150 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7151 return true;
7152 }
7153 return false;
7154}
7155
7156// Unfold
7157// x & (-1 'logical shift' y)
7158// To
7159// (x 'opposite logical shift' y) 'logical shift' y
7160// if it is better for performance.
7161SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7162 assert(N->getOpcode() == ISD::AND);
7163
7164 SDValue N0 = N->getOperand(0);
7165 SDValue N1 = N->getOperand(1);
7166
7167 // Do we actually prefer shifts over mask?
7169 return SDValue();
7170
7171 // Try to match (-1 '[outer] logical shift' y)
7172 unsigned OuterShift;
7173 unsigned InnerShift; // The opposite direction to the OuterShift.
7174 SDValue Y; // Shift amount.
7175 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7176 if (!M.hasOneUse())
7177 return false;
7178 OuterShift = M->getOpcode();
7179 if (OuterShift == ISD::SHL)
7180 InnerShift = ISD::SRL;
7181 else if (OuterShift == ISD::SRL)
7182 InnerShift = ISD::SHL;
7183 else
7184 return false;
7185 if (!isAllOnesConstant(M->getOperand(0)))
7186 return false;
7187 Y = M->getOperand(1);
7188 return true;
7189 };
7190
7191 SDValue X;
7192 if (matchMask(N1))
7193 X = N0;
7194 else if (matchMask(N0))
7195 X = N1;
7196 else
7197 return SDValue();
7198
7199 SDLoc DL(N);
7200 EVT VT = N->getValueType(0);
7201
7202 // tmp = x 'opposite logical shift' y
7203 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7204 // ret = tmp 'logical shift' y
7205 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7206
7207 return T1;
7208}
7209
7210/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7211/// For a target with a bit test, this is expected to become test + set and save
7212/// at least 1 instruction.
7214 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7215
7216 // Look through an optional extension.
7217 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7218 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7219 And0 = And0.getOperand(0);
7220 if (!isOneConstant(And1) || !And0.hasOneUse())
7221 return SDValue();
7222
7223 SDValue Src = And0;
7224
7225 // Attempt to find a 'not' op.
7226 // TODO: Should we favor test+set even without the 'not' op?
7227 bool FoundNot = false;
7228 if (isBitwiseNot(Src)) {
7229 FoundNot = true;
7230 Src = Src.getOperand(0);
7231
7232 // Look though an optional truncation. The source operand may not be the
7233 // same type as the original 'and', but that is ok because we are masking
7234 // off everything but the low bit.
7235 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7236 Src = Src.getOperand(0);
7237 }
7238
7239 // Match a shift-right by constant.
7240 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7241 return SDValue();
7242
7243 // This is probably not worthwhile without a supported type.
7244 EVT SrcVT = Src.getValueType();
7245 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7246 if (!TLI.isTypeLegal(SrcVT))
7247 return SDValue();
7248
7249 // We might have looked through casts that make this transform invalid.
7250 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7251 SDValue ShiftAmt = Src.getOperand(1);
7252 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7253 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7254 return SDValue();
7255
7256 // Set source to shift source.
7257 Src = Src.getOperand(0);
7258
7259 // Try again to find a 'not' op.
7260 // TODO: Should we favor test+set even with two 'not' ops?
7261 if (!FoundNot) {
7262 if (!isBitwiseNot(Src))
7263 return SDValue();
7264 Src = Src.getOperand(0);
7265 }
7266
7267 if (!TLI.hasBitTest(Src, ShiftAmt))
7268 return SDValue();
7269
7270 // Turn this into a bit-test pattern using mask op + setcc:
7271 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7272 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7273 SDLoc DL(And);
7274 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7275 EVT CCVT =
7276 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7277 SDValue Mask = DAG.getConstant(
7278 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7279 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7280 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7281 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7282 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7283}
7284
7285/// For targets that support usubsat, match a bit-hack form of that operation
7286/// that ends in 'and' and convert it.
7288 EVT VT = N->getValueType(0);
7289 unsigned BitWidth = VT.getScalarSizeInBits();
7290 APInt SignMask = APInt::getSignMask(BitWidth);
7291
7292 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7293 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7294 // xor/add with SMIN (signmask) are logically equivalent.
7295 SDValue X;
7296 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7298 m_SpecificInt(BitWidth - 1))))) &&
7301 m_SpecificInt(BitWidth - 1))))))
7302 return SDValue();
7303
7304 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7305 DAG.getConstant(SignMask, DL, VT));
7306}
7307
7308/// Given a bitwise logic operation N with a matching bitwise logic operand,
7309/// fold a pattern where 2 of the source operands are identically shifted
7310/// values. For example:
7311/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7313 SelectionDAG &DAG) {
7314 unsigned LogicOpcode = N->getOpcode();
7315 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7316 "Expected bitwise logic operation");
7317
7318 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7319 return SDValue();
7320
7321 // Match another bitwise logic op and a shift.
7322 unsigned ShiftOpcode = ShiftOp.getOpcode();
7323 if (LogicOp.getOpcode() != LogicOpcode ||
7324 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7325 ShiftOpcode == ISD::SRA))
7326 return SDValue();
7327
7328 // Match another shift op inside the first logic operand. Handle both commuted
7329 // possibilities.
7330 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7331 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7332 SDValue X1 = ShiftOp.getOperand(0);
7333 SDValue Y = ShiftOp.getOperand(1);
7334 SDValue X0, Z;
7335 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7336 LogicOp.getOperand(0).getOperand(1) == Y) {
7337 X0 = LogicOp.getOperand(0).getOperand(0);
7338 Z = LogicOp.getOperand(1);
7339 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7340 LogicOp.getOperand(1).getOperand(1) == Y) {
7341 X0 = LogicOp.getOperand(1).getOperand(0);
7342 Z = LogicOp.getOperand(0);
7343 } else {
7344 return SDValue();
7345 }
7346
7347 EVT VT = N->getValueType(0);
7348 SDLoc DL(N);
7349 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7350 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7351 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7352}
7353
7354/// Given a tree of logic operations with shape like
7355/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7356/// try to match and fold shift operations with the same shift amount.
7357/// For example:
7358/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7359/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7361 SDValue RightHand, SelectionDAG &DAG) {
7362 unsigned LogicOpcode = N->getOpcode();
7363 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7364 "Expected bitwise logic operation");
7365 if (LeftHand.getOpcode() != LogicOpcode ||
7366 RightHand.getOpcode() != LogicOpcode)
7367 return SDValue();
7368 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7369 return SDValue();
7370
7371 // Try to match one of following patterns:
7372 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7373 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7374 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7375 // itself.
7376 SDValue CombinedShifts, W;
7377 SDValue R0 = RightHand.getOperand(0);
7378 SDValue R1 = RightHand.getOperand(1);
7379 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7380 W = R1;
7381 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7382 W = R0;
7383 else
7384 return SDValue();
7385
7386 EVT VT = N->getValueType(0);
7387 SDLoc DL(N);
7388 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7389}
7390
7391/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7392/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7393/// pattern. This is typically a better representation for targets without a
7394/// fused "and-not" operation.
7396 const TargetLowering &TLI, const SDLoc &DL) {
7397 // Note that masked-merge variants using XOR or ADD expressions are
7398 // normalized to OR by InstCombine so we only check for OR or AND.
7399 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7400 "Must be called with ISD::OR or ISD::AND node");
7401
7402 // If the target supports and-not, don't fold this.
7403 if (TLI.hasAndNot(SDValue(Node, 0)))
7404 return SDValue();
7405
7406 SDValue M, X, Y;
7407
7408 if (sd_match(Node,
7410 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7411 sd_match(Node,
7413 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7414 EVT VT = M.getValueType();
7415 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7416 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7417 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7418 }
7419 return SDValue();
7420}
7421
7422SDValue DAGCombiner::visitAND(SDNode *N) {
7423 SDValue N0 = N->getOperand(0);
7424 SDValue N1 = N->getOperand(1);
7425 EVT VT = N1.getValueType();
7426 SDLoc DL(N);
7427
7428 // x & x --> x
7429 if (N0 == N1)
7430 return N0;
7431
7432 // fold (and c1, c2) -> c1&c2
7433 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7434 return C;
7435
7436 // canonicalize constant to RHS
7439 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7440
7441 if (areBitwiseNotOfEachother(N0, N1))
7442 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7443
7444 // fold vector ops
7445 if (VT.isVector()) {
7446 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7447 return FoldedVOp;
7448
7449 // fold (and x, 0) -> 0, vector edition
7451 // do not return N1, because undef node may exist in N1
7453 N1.getValueType());
7454
7455 // fold (and x, -1) -> x, vector edition
7457 return N0;
7458
7459 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7460 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7461 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7462 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7463 EVT LoadVT = MLoad->getMemoryVT();
7464 EVT ExtVT = VT;
7465 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7466 // For this AND to be a zero extension of the masked load the elements
7467 // of the BuildVec must mask the bottom bits of the extended element
7468 // type
7469 uint64_t ElementSize =
7471 if (Splat->getAPIntValue().isMask(ElementSize)) {
7472 SDValue NewLoad = DAG.getMaskedLoad(
7473 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7474 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7475 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7476 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7477 bool LoadHasOtherUsers = !N0.hasOneUse();
7478 CombineTo(N, NewLoad);
7479 if (LoadHasOtherUsers)
7480 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7481 return SDValue(N, 0);
7482 }
7483 }
7484 }
7485 }
7486
7487 // fold (and x, -1) -> x
7488 if (isAllOnesConstant(N1))
7489 return N0;
7490
7491 // if (and x, c) is known to be zero, return 0
7492 unsigned BitWidth = VT.getScalarSizeInBits();
7493 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7495 return DAG.getConstant(0, DL, VT);
7496
7497 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7498 return R;
7499
7500 if (SDValue NewSel = foldBinOpIntoSelect(N))
7501 return NewSel;
7502
7503 // reassociate and
7504 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7505 return RAND;
7506
7507 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7508 if (SDValue SD =
7509 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7510 return SD;
7511
7512 // fold (and (or x, C), D) -> D if (C & D) == D
7513 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7514 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7515 };
7516 if (N0.getOpcode() == ISD::OR &&
7517 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7518 return N1;
7519
7520 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7521 SDValue N0Op0 = N0.getOperand(0);
7522 EVT SrcVT = N0Op0.getValueType();
7523 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7524 APInt Mask = ~N1C->getAPIntValue();
7525 Mask = Mask.trunc(SrcBitWidth);
7526
7527 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7528 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7529 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7530
7531 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7532 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7533 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7534 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7535 TLI.isNarrowingProfitable(N, VT, SrcVT))
7536 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7537 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7538 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7539 }
7540
7541 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7542 if (ISD::isExtOpcode(N0.getOpcode())) {
7543 unsigned ExtOpc = N0.getOpcode();
7544 SDValue N0Op0 = N0.getOperand(0);
7545 if (N0Op0.getOpcode() == ISD::AND &&
7546 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7547 N0->hasOneUse() && N0Op0->hasOneUse()) {
7548 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7549 {N0Op0.getOperand(1)})) {
7550 if (SDValue NewMask =
7551 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7552 return DAG.getNode(ISD::AND, DL, VT,
7553 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7554 NewMask);
7555 }
7556 }
7557 }
7558 }
7559
7560 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7561 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7562 // already be zero by virtue of the width of the base type of the load.
7563 //
7564 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7565 // more cases.
7566 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7568 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7569 N0.getOperand(0).getResNo() == 0) ||
7570 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7571 auto *Load =
7572 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7573
7574 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7575 // This can be a pure constant or a vector splat, in which case we treat the
7576 // vector as a scalar and use the splat value.
7577 APInt Constant = APInt::getZero(1);
7578 if (const ConstantSDNode *C = isConstOrConstSplat(
7579 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7580 Constant = C->getAPIntValue();
7581 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7582 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7583 APInt SplatValue, SplatUndef;
7584 unsigned SplatBitSize;
7585 bool HasAnyUndefs;
7586 // Endianness should not matter here. Code below makes sure that we only
7587 // use the result if the SplatBitSize is a multiple of the vector element
7588 // size. And after that we AND all element sized parts of the splat
7589 // together. So the end result should be the same regardless of in which
7590 // order we do those operations.
7591 const bool IsBigEndian = false;
7592 bool IsSplat =
7593 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7594 HasAnyUndefs, EltBitWidth, IsBigEndian);
7595
7596 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7597 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7598 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7599 // Undef bits can contribute to a possible optimisation if set, so
7600 // set them.
7601 SplatValue |= SplatUndef;
7602
7603 // The splat value may be something like "0x00FFFFFF", which means 0 for
7604 // the first vector value and FF for the rest, repeating. We need a mask
7605 // that will apply equally to all members of the vector, so AND all the
7606 // lanes of the constant together.
7607 Constant = APInt::getAllOnes(EltBitWidth);
7608 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7609 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7610 }
7611 }
7612
7613 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7614 // actually legal and isn't going to get expanded, else this is a false
7615 // optimisation.
7616 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7617 Load->getValueType(0),
7618 Load->getMemoryVT());
7619
7620 // Resize the constant to the same size as the original memory access before
7621 // extension. If it is still the AllOnesValue then this AND is completely
7622 // unneeded.
7623 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7624
7625 bool B;
7626 switch (Load->getExtensionType()) {
7627 default: B = false; break;
7628 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7629 case ISD::ZEXTLOAD:
7630 case ISD::NON_EXTLOAD: B = true; break;
7631 }
7632
7633 if (B && Constant.isAllOnes()) {
7634 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7635 // preserve semantics once we get rid of the AND.
7636 SDValue NewLoad(Load, 0);
7637
7638 // Fold the AND away. NewLoad may get replaced immediately.
7639 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7640
7641 if (Load->getExtensionType() == ISD::EXTLOAD) {
7642 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7643 Load->getValueType(0), SDLoc(Load),
7644 Load->getChain(), Load->getBasePtr(),
7645 Load->getOffset(), Load->getMemoryVT(),
7646 Load->getMemOperand());
7647 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7648 if (Load->getNumValues() == 3) {
7649 // PRE/POST_INC loads have 3 values.
7650 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7651 NewLoad.getValue(2) };
7652 CombineTo(Load, To, 3, true);
7653 } else {
7654 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7655 }
7656 }
7657
7658 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7659 }
7660 }
7661
7662 // Try to convert a constant mask AND into a shuffle clear mask.
7663 if (VT.isVector())
7664 if (SDValue Shuffle = XformToShuffleWithZero(N))
7665 return Shuffle;
7666
7667 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7668 return Combined;
7669
7670 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7672 SDValue Ext = N0.getOperand(0);
7673 EVT ExtVT = Ext->getValueType(0);
7674 SDValue Extendee = Ext->getOperand(0);
7675
7676 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7677 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7678 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7679 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7680 // => (extract_subvector (iN_zeroext v))
7681 SDValue ZeroExtExtendee =
7682 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7683
7684 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7685 N0.getOperand(1));
7686 }
7687 }
7688
7689 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7690 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7691 EVT MemVT = GN0->getMemoryVT();
7692 EVT ScalarVT = MemVT.getScalarType();
7693
7694 if (SDValue(GN0, 0).hasOneUse() &&
7695 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7697 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7698 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7699
7700 SDValue ZExtLoad = DAG.getMaskedGather(
7701 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7702 GN0->getIndexType(), ISD::ZEXTLOAD);
7703
7704 CombineTo(N, ZExtLoad);
7705 AddToWorklist(ZExtLoad.getNode());
7706 // Avoid recheck of N.
7707 return SDValue(N, 0);
7708 }
7709 }
7710
7711 // fold (and (load x), 255) -> (zextload x, i8)
7712 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7713 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7714 if (SDValue Res = reduceLoadWidth(N))
7715 return Res;
7716
7717 if (LegalTypes) {
7718 // Attempt to propagate the AND back up to the leaves which, if they're
7719 // loads, can be combined to narrow loads and the AND node can be removed.
7720 // Perform after legalization so that extend nodes will already be
7721 // combined into the loads.
7722 if (BackwardsPropagateMask(N))
7723 return SDValue(N, 0);
7724 }
7725
7726 if (SDValue Combined = visitANDLike(N0, N1, N))
7727 return Combined;
7728
7729 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7730 if (N0.getOpcode() == N1.getOpcode())
7731 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7732 return V;
7733
7734 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7735 return R;
7736 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7737 return R;
7738
7739 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7740 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7741 SDValue X, Y, Z, NotY;
7742 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7743 if (sd_match(N,
7744 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7745 sd_match(NotY, m_Not(m_Value(Y))) &&
7746 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7747 return DAG.getNode(ISD::AND, DL, VT, X,
7748 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7749
7750 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7751 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7752 if (sd_match(N, m_And(m_Value(X),
7753 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7754 sd_match(NotY, m_Not(m_Value(Y))) &&
7755 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7756 return DAG.getNode(ISD::AND, DL, VT, X,
7757 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7758
7759 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7760 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7761 if (TLI.hasAndNot(SDValue(N, 0)))
7762 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7763 return Folded;
7764
7765 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7766 // If we are shifting down an extended sign bit, see if we can simplify
7767 // this to shifting the MSB directly to expose further simplifications.
7768 // This pattern often appears after sext_inreg legalization.
7769 APInt Amt;
7770 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7771 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7772 return DAG.getNode(ISD::SRL, DL, VT, X,
7773 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7774
7775 // Masking the negated extension of a boolean is just the zero-extended
7776 // boolean:
7777 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7778 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7779 //
7780 // Note: the SimplifyDemandedBits fold below can make an information-losing
7781 // transform, and then we have no way to find this better fold.
7782 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7783 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7784 X.getOperand(0).getScalarValueSizeInBits() == 1)
7785 return X;
7786 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7787 X.getOperand(0).getScalarValueSizeInBits() == 1)
7788 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7789 }
7790
7791 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7792 // fold (and (sra)) -> (and (srl)) when possible.
7794 return SDValue(N, 0);
7795
7796 // fold (zext_inreg (extload x)) -> (zextload x)
7797 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7798 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7799 (ISD::isEXTLoad(N0.getNode()) ||
7800 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7801 auto *LN0 = cast<LoadSDNode>(N0);
7802 EVT MemVT = LN0->getMemoryVT();
7803 // If we zero all the possible extended bits, then we can turn this into
7804 // a zextload if we are running before legalize or the operation is legal.
7805 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7806 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7807 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7808 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7809 ((!LegalOperations && LN0->isSimple()) ||
7810 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7811 SDValue ExtLoad =
7812 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7813 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7814 AddToWorklist(N);
7815 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7816 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7817 }
7818 }
7819
7820 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7821 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7822 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7823 N0.getOperand(1), false))
7824 return BSwap;
7825 }
7826
7827 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7828 return Shifts;
7829
7830 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7831 return V;
7832
7833 // Recognize the following pattern:
7834 //
7835 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7836 //
7837 // where bitmask is a mask that clears the upper bits of AndVT. The
7838 // number of bits in bitmask must be a power of two.
7839 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7840 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7841 return false;
7842
7844 if (!C)
7845 return false;
7846
7847 if (!C->getAPIntValue().isMask(
7848 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7849 return false;
7850
7851 return true;
7852 };
7853
7854 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7855 if (IsAndZeroExtMask(N0, N1))
7856 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7857
7858 if (hasOperation(ISD::USUBSAT, VT))
7859 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7860 return V;
7861
7862 // Postpone until legalization completed to avoid interference with bswap
7863 // folding
7864 if (LegalOperations || VT.isVector())
7865 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7866 return R;
7867
7868 if (VT.isScalarInteger() && VT != MVT::i1)
7869 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7870 return R;
7871
7872 return SDValue();
7873}
7874
7875/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7876SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7877 bool DemandHighBits) {
7878 if (!LegalOperations)
7879 return SDValue();
7880
7881 EVT VT = N->getValueType(0);
7882 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7883 return SDValue();
7885 return SDValue();
7886
7887 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7888 bool LookPassAnd0 = false;
7889 bool LookPassAnd1 = false;
7890 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7891 std::swap(N0, N1);
7892 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7893 std::swap(N0, N1);
7894 if (N0.getOpcode() == ISD::AND) {
7895 if (!N0->hasOneUse())
7896 return SDValue();
7897 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7898 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7899 // This is needed for X86.
7900 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7901 N01C->getZExtValue() != 0xFFFF))
7902 return SDValue();
7903 N0 = N0.getOperand(0);
7904 LookPassAnd0 = true;
7905 }
7906
7907 if (N1.getOpcode() == ISD::AND) {
7908 if (!N1->hasOneUse())
7909 return SDValue();
7910 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7911 if (!N11C || N11C->getZExtValue() != 0xFF)
7912 return SDValue();
7913 N1 = N1.getOperand(0);
7914 LookPassAnd1 = true;
7915 }
7916
7917 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7918 std::swap(N0, N1);
7919 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7920 return SDValue();
7921 if (!N0->hasOneUse() || !N1->hasOneUse())
7922 return SDValue();
7923
7924 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7925 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7926 if (!N01C || !N11C)
7927 return SDValue();
7928 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7929 return SDValue();
7930
7931 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7932 SDValue N00 = N0->getOperand(0);
7933 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7934 if (!N00->hasOneUse())
7935 return SDValue();
7936 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7937 if (!N001C || N001C->getZExtValue() != 0xFF)
7938 return SDValue();
7939 N00 = N00.getOperand(0);
7940 LookPassAnd0 = true;
7941 }
7942
7943 SDValue N10 = N1->getOperand(0);
7944 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7945 if (!N10->hasOneUse())
7946 return SDValue();
7947 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7948 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7949 // for X86.
7950 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7951 N101C->getZExtValue() != 0xFFFF))
7952 return SDValue();
7953 N10 = N10.getOperand(0);
7954 LookPassAnd1 = true;
7955 }
7956
7957 if (N00 != N10)
7958 return SDValue();
7959
7960 // Make sure everything beyond the low halfword gets set to zero since the SRL
7961 // 16 will clear the top bits.
7962 unsigned OpSizeInBits = VT.getSizeInBits();
7963 if (OpSizeInBits > 16) {
7964 // If the left-shift isn't masked out then the only way this is a bswap is
7965 // if all bits beyond the low 8 are 0. In that case the entire pattern
7966 // reduces to a left shift anyway: leave it for other parts of the combiner.
7967 if (DemandHighBits && !LookPassAnd0)
7968 return SDValue();
7969
7970 // However, if the right shift isn't masked out then it might be because
7971 // it's not needed. See if we can spot that too. If the high bits aren't
7972 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7973 // upper bits to be zero.
7974 if (!LookPassAnd1) {
7975 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7976 if (!DAG.MaskedValueIsZero(N10,
7977 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7978 return SDValue();
7979 }
7980 }
7981
7982 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7983 if (OpSizeInBits > 16) {
7984 SDLoc DL(N);
7985 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7986 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7987 }
7988 return Res;
7989}
7990
7991/// Return true if the specified node is an element that makes up a 32-bit
7992/// packed halfword byteswap.
7993/// ((x & 0x000000ff) << 8) |
7994/// ((x & 0x0000ff00) >> 8) |
7995/// ((x & 0x00ff0000) << 8) |
7996/// ((x & 0xff000000) >> 8)
7998 if (!N->hasOneUse())
7999 return false;
8000
8001 unsigned Opc = N.getOpcode();
8002 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
8003 return false;
8004
8005 SDValue N0 = N.getOperand(0);
8006 unsigned Opc0 = N0.getOpcode();
8007 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
8008 return false;
8009
8010 ConstantSDNode *N1C = nullptr;
8011 // SHL or SRL: look upstream for AND mask operand
8012 if (Opc == ISD::AND)
8013 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8014 else if (Opc0 == ISD::AND)
8016 if (!N1C)
8017 return false;
8018
8019 unsigned MaskByteOffset;
8020 switch (N1C->getZExtValue()) {
8021 default:
8022 return false;
8023 case 0xFF: MaskByteOffset = 0; break;
8024 case 0xFF00: MaskByteOffset = 1; break;
8025 case 0xFFFF:
8026 // In case demanded bits didn't clear the bits that will be shifted out.
8027 // This is needed for X86.
8028 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
8029 MaskByteOffset = 1;
8030 break;
8031 }
8032 return false;
8033 case 0xFF0000: MaskByteOffset = 2; break;
8034 case 0xFF000000: MaskByteOffset = 3; break;
8035 }
8036
8037 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8038 if (Opc == ISD::AND) {
8039 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8040 // (x >> 8) & 0xff
8041 // (x >> 8) & 0xff0000
8042 if (Opc0 != ISD::SRL)
8043 return false;
8045 if (!C || C->getZExtValue() != 8)
8046 return false;
8047 } else {
8048 // (x << 8) & 0xff00
8049 // (x << 8) & 0xff000000
8050 if (Opc0 != ISD::SHL)
8051 return false;
8053 if (!C || C->getZExtValue() != 8)
8054 return false;
8055 }
8056 } else if (Opc == ISD::SHL) {
8057 // (x & 0xff) << 8
8058 // (x & 0xff0000) << 8
8059 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8060 return false;
8061 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8062 if (!C || C->getZExtValue() != 8)
8063 return false;
8064 } else { // Opc == ISD::SRL
8065 // (x & 0xff00) >> 8
8066 // (x & 0xff000000) >> 8
8067 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8068 return false;
8069 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8070 if (!C || C->getZExtValue() != 8)
8071 return false;
8072 }
8073
8074 if (Parts[MaskByteOffset])
8075 return false;
8076
8077 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8078 return true;
8079}
8080
8081// Match 2 elements of a packed halfword bswap.
8083 if (N.getOpcode() == ISD::OR)
8084 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8085 isBSwapHWordElement(N.getOperand(1), Parts);
8086
8087 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8088 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8089 if (!C || C->getAPIntValue() != 16)
8090 return false;
8091 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8092 return true;
8093 }
8094
8095 return false;
8096}
8097
8098// Match this pattern:
8099// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8100// And rewrite this to:
8101// (rotr (bswap A), 16)
8103 SelectionDAG &DAG, SDNode *N, SDValue N0,
8104 SDValue N1, EVT VT) {
8105 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8106 "MatchBSwapHWordOrAndAnd: expecting i32");
8107 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8108 return SDValue();
8109 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8110 return SDValue();
8111 // TODO: this is too restrictive; lifting this restriction requires more tests
8112 if (!N0->hasOneUse() || !N1->hasOneUse())
8113 return SDValue();
8116 if (!Mask0 || !Mask1)
8117 return SDValue();
8118 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8119 Mask1->getAPIntValue() != 0x00ff00ff)
8120 return SDValue();
8121 SDValue Shift0 = N0.getOperand(0);
8122 SDValue Shift1 = N1.getOperand(0);
8123 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8124 return SDValue();
8125 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8126 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8127 if (!ShiftAmt0 || !ShiftAmt1)
8128 return SDValue();
8129 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8130 return SDValue();
8131 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8132 return SDValue();
8133
8134 SDLoc DL(N);
8135 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8136 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8137 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8138}
8139
8140/// Match a 32-bit packed halfword bswap. That is
8141/// ((x & 0x000000ff) << 8) |
8142/// ((x & 0x0000ff00) >> 8) |
8143/// ((x & 0x00ff0000) << 8) |
8144/// ((x & 0xff000000) >> 8)
8145/// => (rotl (bswap x), 16)
8146SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8147 if (!LegalOperations)
8148 return SDValue();
8149
8150 EVT VT = N->getValueType(0);
8151 if (VT != MVT::i32)
8152 return SDValue();
8154 return SDValue();
8155
8156 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8157 return BSwap;
8158
8159 // Try again with commuted operands.
8160 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8161 return BSwap;
8162
8163
8164 // Look for either
8165 // (or (bswaphpair), (bswaphpair))
8166 // (or (or (bswaphpair), (and)), (and))
8167 // (or (or (and), (bswaphpair)), (and))
8168 SDNode *Parts[4] = {};
8169
8170 if (isBSwapHWordPair(N0, Parts)) {
8171 // (or (or (and), (and)), (or (and), (and)))
8172 if (!isBSwapHWordPair(N1, Parts))
8173 return SDValue();
8174 } else if (N0.getOpcode() == ISD::OR) {
8175 // (or (or (or (and), (and)), (and)), (and))
8176 if (!isBSwapHWordElement(N1, Parts))
8177 return SDValue();
8178 SDValue N00 = N0.getOperand(0);
8179 SDValue N01 = N0.getOperand(1);
8180 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8181 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8182 return SDValue();
8183 } else {
8184 return SDValue();
8185 }
8186
8187 // Make sure the parts are all coming from the same node.
8188 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8189 return SDValue();
8190
8191 SDLoc DL(N);
8192 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8193 SDValue(Parts[0], 0));
8194
8195 // Result of the bswap should be rotated by 16. If it's not legal, then
8196 // do (x << 16) | (x >> 16).
8197 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8199 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8201 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8202 return DAG.getNode(ISD::OR, DL, VT,
8203 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8204 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8205}
8206
8207/// This contains all DAGCombine rules which reduce two values combined by
8208/// an Or operation to a single value \see visitANDLike().
8209SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8210 EVT VT = N1.getValueType();
8211
8212 // fold (or x, undef) -> -1
8213 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8214 return DAG.getAllOnesConstant(DL, VT);
8215
8216 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8217 return V;
8218
8219 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8220 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8221 // Don't increase # computations.
8222 (N0->hasOneUse() || N1->hasOneUse())) {
8223 // We can only do this xform if we know that bits from X that are set in C2
8224 // but not in C1 are already zero. Likewise for Y.
8225 if (const ConstantSDNode *N0O1C =
8227 if (const ConstantSDNode *N1O1C =
8229 // We can only do this xform if we know that bits from X that are set in
8230 // C2 but not in C1 are already zero. Likewise for Y.
8231 const APInt &LHSMask = N0O1C->getAPIntValue();
8232 const APInt &RHSMask = N1O1C->getAPIntValue();
8233
8234 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8235 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8236 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8237 N0.getOperand(0), N1.getOperand(0));
8238 return DAG.getNode(ISD::AND, DL, VT, X,
8239 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8240 }
8241 }
8242 }
8243 }
8244
8245 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8246 if (N0.getOpcode() == ISD::AND &&
8247 N1.getOpcode() == ISD::AND &&
8248 N0.getOperand(0) == N1.getOperand(0) &&
8249 // Don't increase # computations.
8250 (N0->hasOneUse() || N1->hasOneUse())) {
8251 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8252 N0.getOperand(1), N1.getOperand(1));
8253 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8254 }
8255
8256 return SDValue();
8257}
8258
8259/// OR combines for which the commuted variant will be tried as well.
8261 SDNode *N) {
8262 EVT VT = N0.getValueType();
8263 unsigned BW = VT.getScalarSizeInBits();
8264 SDLoc DL(N);
8265
8266 auto peekThroughResize = [](SDValue V) {
8267 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8268 return V->getOperand(0);
8269 return V;
8270 };
8271
8272 SDValue N0Resized = peekThroughResize(N0);
8273 if (N0Resized.getOpcode() == ISD::AND) {
8274 SDValue N1Resized = peekThroughResize(N1);
8275 SDValue N00 = N0Resized.getOperand(0);
8276 SDValue N01 = N0Resized.getOperand(1);
8277
8278 // fold or (and x, y), x --> x
8279 if (N00 == N1Resized || N01 == N1Resized)
8280 return N1;
8281
8282 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8283 // TODO: Set AllowUndefs = true.
8284 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8285 /* AllowUndefs */ false)) {
8286 if (peekThroughResize(NotOperand) == N1Resized)
8287 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8288 N1);
8289 }
8290
8291 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8292 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8293 /* AllowUndefs */ false)) {
8294 if (peekThroughResize(NotOperand) == N1Resized)
8295 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8296 N1);
8297 }
8298 }
8299
8300 SDValue X, Y;
8301
8302 // fold or (xor X, N1), N1 --> or X, N1
8303 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8304 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8305
8306 // fold or (xor x, y), (x and/or y) --> or x, y
8307 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8308 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8310 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8311
8312 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8313 return R;
8314
8315 auto peekThroughZext = [](SDValue V) {
8316 if (V->getOpcode() == ISD::ZERO_EXTEND)
8317 return V->getOperand(0);
8318 return V;
8319 };
8320
8321 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8322 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8323 N0.getOperand(0) == N1.getOperand(0) &&
8324 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8325 return N0;
8326
8327 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8328 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8329 N0.getOperand(1) == N1.getOperand(0) &&
8330 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8331 return N0;
8332
8333 // Attempt to match a legalized build_pair-esque pattern:
8334 // or(shl(aext(Hi),BW/2),zext(Lo))
8335 SDValue Lo, Hi;
8336 if (sd_match(N0,
8338 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8339 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8340 Lo.getValueType() == Hi.getValueType()) {
8341 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8342 SDValue NotLo, NotHi;
8343 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8344 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8345 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8346 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8347 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8348 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8349 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8350 }
8351 }
8352
8353 return SDValue();
8354}
8355
8356SDValue DAGCombiner::visitOR(SDNode *N) {
8357 SDValue N0 = N->getOperand(0);
8358 SDValue N1 = N->getOperand(1);
8359 EVT VT = N1.getValueType();
8360 SDLoc DL(N);
8361
8362 // x | x --> x
8363 if (N0 == N1)
8364 return N0;
8365
8366 // fold (or c1, c2) -> c1|c2
8367 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8368 return C;
8369
8370 // canonicalize constant to RHS
8373 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8374
8375 // fold vector ops
8376 if (VT.isVector()) {
8377 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8378 return FoldedVOp;
8379
8380 // fold (or x, 0) -> x, vector edition
8382 return N0;
8383
8384 // fold (or x, -1) -> -1, vector edition
8386 // do not return N1, because undef node may exist in N1
8387 return DAG.getAllOnesConstant(DL, N1.getValueType());
8388
8389 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8390 // Do this only if the resulting type / shuffle is legal.
8391 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8392 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8393 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8394 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8395 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8396 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8397 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8398 // Ensure both shuffles have a zero input.
8399 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8400 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8401 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8402 bool CanFold = true;
8403 int NumElts = VT.getVectorNumElements();
8404 SmallVector<int, 4> Mask(NumElts, -1);
8405
8406 for (int i = 0; i != NumElts; ++i) {
8407 int M0 = SV0->getMaskElt(i);
8408 int M1 = SV1->getMaskElt(i);
8409
8410 // Determine if either index is pointing to a zero vector.
8411 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8412 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8413
8414 // If one element is zero and the otherside is undef, keep undef.
8415 // This also handles the case that both are undef.
8416 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8417 continue;
8418
8419 // Make sure only one of the elements is zero.
8420 if (M0Zero == M1Zero) {
8421 CanFold = false;
8422 break;
8423 }
8424
8425 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8426
8427 // We have a zero and non-zero element. If the non-zero came from
8428 // SV0 make the index a LHS index. If it came from SV1, make it
8429 // a RHS index. We need to mod by NumElts because we don't care
8430 // which operand it came from in the original shuffles.
8431 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8432 }
8433
8434 if (CanFold) {
8435 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8436 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8437 SDValue LegalShuffle =
8438 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8439 if (LegalShuffle)
8440 return LegalShuffle;
8441 }
8442 }
8443 }
8444 }
8445
8446 // fold (or x, 0) -> x
8447 if (isNullConstant(N1))
8448 return N0;
8449
8450 // fold (or x, -1) -> -1
8451 if (isAllOnesConstant(N1))
8452 return N1;
8453
8454 if (SDValue NewSel = foldBinOpIntoSelect(N))
8455 return NewSel;
8456
8457 // fold (or x, c) -> c iff (x & ~c) == 0
8458 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8459 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8460 return N1;
8461
8462 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8463 return R;
8464
8465 if (SDValue Combined = visitORLike(N0, N1, DL))
8466 return Combined;
8467
8468 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8469 return Combined;
8470
8471 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8472 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8473 return BSwap;
8474 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8475 return BSwap;
8476
8477 // reassociate or
8478 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8479 return ROR;
8480
8481 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8482 if (SDValue SD =
8483 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8484 return SD;
8485
8486 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8487 // iff (c1 & c2) != 0 or c1/c2 are undef.
8488 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8489 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8490 };
8491 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8492 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8493 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8494 {N1, N0.getOperand(1)})) {
8495 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8496 AddToWorklist(IOR.getNode());
8497 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8498 }
8499 }
8500
8501 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8502 return Combined;
8503 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8504 return Combined;
8505
8506 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8507 if (N0.getOpcode() == N1.getOpcode())
8508 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8509 return V;
8510
8511 // See if this is some rotate idiom.
8512 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8513 return Rot;
8514
8515 if (SDValue Load = MatchLoadCombine(N))
8516 return Load;
8517
8518 // Simplify the operands using demanded-bits information.
8520 return SDValue(N, 0);
8521
8522 // If OR can be rewritten into ADD, try combines based on ADD.
8523 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8524 DAG.isADDLike(SDValue(N, 0)))
8525 if (SDValue Combined = visitADDLike(N))
8526 return Combined;
8527
8528 // Postpone until legalization completed to avoid interference with bswap
8529 // folding
8530 if (LegalOperations || VT.isVector())
8531 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8532 return R;
8533
8534 if (VT.isScalarInteger() && VT != MVT::i1)
8535 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8536 return R;
8537
8538 return SDValue();
8539}
8540
8542 SDValue &Mask) {
8543 if (Op.getOpcode() == ISD::AND &&
8544 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8545 Mask = Op.getOperand(1);
8546 return Op.getOperand(0);
8547 }
8548 return Op;
8549}
8550
8551/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8552static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8553 SDValue &Mask) {
8554 Op = stripConstantMask(DAG, Op, Mask);
8555 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8556 Shift = Op;
8557 return true;
8558 }
8559 return false;
8560}
8561
8562/// Helper function for visitOR to extract the needed side of a rotate idiom
8563/// from a shl/srl/mul/udiv. This is meant to handle cases where
8564/// InstCombine merged some outside op with one of the shifts from
8565/// the rotate pattern.
8566/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8567/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8568/// patterns:
8569///
8570/// (or (add v v) (shrl v bitwidth-1)):
8571/// expands (add v v) -> (shl v 1)
8572///
8573/// (or (mul v c0) (shrl (mul v c1) c2)):
8574/// expands (mul v c0) -> (shl (mul v c1) c3)
8575///
8576/// (or (udiv v c0) (shl (udiv v c1) c2)):
8577/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8578///
8579/// (or (shl v c0) (shrl (shl v c1) c2)):
8580/// expands (shl v c0) -> (shl (shl v c1) c3)
8581///
8582/// (or (shrl v c0) (shl (shrl v c1) c2)):
8583/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8584///
8585/// Such that in all cases, c3+c2==bitwidth(op v c1).
8587 SDValue ExtractFrom, SDValue &Mask,
8588 const SDLoc &DL) {
8589 assert(OppShift && ExtractFrom && "Empty SDValue");
8590 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8591 return SDValue();
8592
8593 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8594
8595 // Value and Type of the shift.
8596 SDValue OppShiftLHS = OppShift.getOperand(0);
8597 EVT ShiftedVT = OppShiftLHS.getValueType();
8598
8599 // Amount of the existing shift.
8600 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8601
8602 // (add v v) -> (shl v 1)
8603 // TODO: Should this be a general DAG canonicalization?
8604 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8605 ExtractFrom.getOpcode() == ISD::ADD &&
8606 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8607 ExtractFrom.getOperand(0) == OppShiftLHS &&
8608 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8609 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8610 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8611
8612 // Preconditions:
8613 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8614 //
8615 // Find opcode of the needed shift to be extracted from (op0 v c0).
8616 unsigned Opcode = ISD::DELETED_NODE;
8617 bool IsMulOrDiv = false;
8618 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8619 // opcode or its arithmetic (mul or udiv) variant.
8620 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8621 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8622 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8623 return false;
8624 Opcode = NeededShift;
8625 return true;
8626 };
8627 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8628 // that the needed shift can be extracted from.
8629 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8630 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8631 return SDValue();
8632
8633 // op0 must be the same opcode on both sides, have the same LHS argument,
8634 // and produce the same value type.
8635 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8636 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8637 ShiftedVT != ExtractFrom.getValueType())
8638 return SDValue();
8639
8640 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8641 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8642 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8643 ConstantSDNode *ExtractFromCst =
8644 isConstOrConstSplat(ExtractFrom.getOperand(1));
8645 // TODO: We should be able to handle non-uniform constant vectors for these values
8646 // Check that we have constant values.
8647 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8648 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8649 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8650 return SDValue();
8651
8652 // Compute the shift amount we need to extract to complete the rotate.
8653 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8654 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8655 return SDValue();
8656 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8657 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8658 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8659 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8660 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8661
8662 // Now try extract the needed shift from the ExtractFrom op and see if the
8663 // result matches up with the existing shift's LHS op.
8664 if (IsMulOrDiv) {
8665 // Op to extract from is a mul or udiv by a constant.
8666 // Check:
8667 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8668 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8669 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8670 NeededShiftAmt.getZExtValue());
8671 APInt ResultAmt;
8672 APInt Rem;
8673 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8674 if (Rem != 0 || ResultAmt != OppLHSAmt)
8675 return SDValue();
8676 } else {
8677 // Op to extract from is a shift by a constant.
8678 // Check:
8679 // c2 - (bitwidth(op0 v c0) - c1) == c0
8680 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8681 ExtractFromAmt.getBitWidth()))
8682 return SDValue();
8683 }
8684
8685 // Return the expanded shift op that should allow a rotate to be formed.
8686 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8687 EVT ResVT = ExtractFrom.getValueType();
8688 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8689 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8690}
8691
8692// Return true if we can prove that, whenever Neg and Pos are both in the
8693// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8694// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8695//
8696// (or (shift1 X, Neg), (shift2 X, Pos))
8697//
8698// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8699// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8700// to consider shift amounts with defined behavior.
8701//
8702// The IsRotate flag should be set when the LHS of both shifts is the same.
8703// Otherwise if matching a general funnel shift, it should be clear.
8704static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8705 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8706 const auto &TLI = DAG.getTargetLoweringInfo();
8707 // If EltSize is a power of 2 then:
8708 //
8709 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8710 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8711 //
8712 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8713 // for the stronger condition:
8714 //
8715 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8716 //
8717 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8718 // we can just replace Neg with Neg' for the rest of the function.
8719 //
8720 // In other cases we check for the even stronger condition:
8721 //
8722 // Neg == EltSize - Pos [B]
8723 //
8724 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8725 // behavior if Pos == 0 (and consequently Neg == EltSize).
8726 //
8727 // We could actually use [A] whenever EltSize is a power of 2, but the
8728 // only extra cases that it would match are those uninteresting ones
8729 // where Neg and Pos are never in range at the same time. E.g. for
8730 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8731 // as well as (sub 32, Pos), but:
8732 //
8733 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8734 //
8735 // always invokes undefined behavior for 32-bit X.
8736 //
8737 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8738 // This allows us to peek through any operations that only affect Mask's
8739 // un-demanded bits.
8740 //
8741 // NOTE: We can only do this when matching operations which won't modify the
8742 // least Log2(EltSize) significant bits and not a general funnel shift.
8743 unsigned MaskLoBits = 0;
8744 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8745 unsigned Bits = Log2_64(EltSize);
8746 unsigned NegBits = Neg.getScalarValueSizeInBits();
8747 if (NegBits >= Bits) {
8748 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8749 if (SDValue Inner =
8751 Neg = Inner;
8752 MaskLoBits = Bits;
8753 }
8754 }
8755 }
8756
8757 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8758 if (Neg.getOpcode() != ISD::SUB)
8759 return false;
8761 if (!NegC)
8762 return false;
8763 SDValue NegOp1 = Neg.getOperand(1);
8764
8765 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8766 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8767 // are redundant for the purpose of the equality.
8768 if (MaskLoBits) {
8769 unsigned PosBits = Pos.getScalarValueSizeInBits();
8770 if (PosBits >= MaskLoBits) {
8771 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8772 if (SDValue Inner =
8774 Pos = Inner;
8775 }
8776 }
8777 }
8778
8779 // The condition we need is now:
8780 //
8781 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8782 //
8783 // If NegOp1 == Pos then we need:
8784 //
8785 // EltSize & Mask == NegC & Mask
8786 //
8787 // (because "x & Mask" is a truncation and distributes through subtraction).
8788 //
8789 // We also need to account for a potential truncation of NegOp1 if the amount
8790 // has already been legalized to a shift amount type.
8791 APInt Width;
8792 if ((Pos == NegOp1) ||
8793 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8794 Width = NegC->getAPIntValue();
8795
8796 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8797 // Then the condition we want to prove becomes:
8798 //
8799 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8800 //
8801 // which, again because "x & Mask" is a truncation, becomes:
8802 //
8803 // NegC & Mask == (EltSize - PosC) & Mask
8804 // EltSize & Mask == (NegC + PosC) & Mask
8805 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8806 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8807 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8808 else
8809 return false;
8810 } else
8811 return false;
8812
8813 // Now we just need to check that EltSize & Mask == Width & Mask.
8814 if (MaskLoBits)
8815 // EltSize & Mask is 0 since Mask is EltSize - 1.
8816 return Width.getLoBits(MaskLoBits) == 0;
8817 return Width == EltSize;
8818}
8819
8820// A subroutine of MatchRotate used once we have found an OR of two opposite
8821// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8822// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8823// former being preferred if supported. InnerPos and InnerNeg are Pos and
8824// Neg with outer conversions stripped away.
8825SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8826 SDValue Neg, SDValue InnerPos,
8827 SDValue InnerNeg, bool FromAdd,
8828 bool HasPos, unsigned PosOpcode,
8829 unsigned NegOpcode, const SDLoc &DL) {
8830 // fold (or/add (shl x, (*ext y)),
8831 // (srl x, (*ext (sub 32, y)))) ->
8832 // (rotl x, y) or (rotr x, (sub 32, y))
8833 //
8834 // fold (or/add (shl x, (*ext (sub 32, y))),
8835 // (srl x, (*ext y))) ->
8836 // (rotr x, y) or (rotl x, (sub 32, y))
8837 EVT VT = Shifted.getValueType();
8838 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8839 /*IsRotate*/ true, FromAdd))
8840 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8841 HasPos ? Pos : Neg);
8842
8843 return SDValue();
8844}
8845
8846// A subroutine of MatchRotate used once we have found an OR of two opposite
8847// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8848// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8849// former being preferred if supported. InnerPos and InnerNeg are Pos and
8850// Neg with outer conversions stripped away.
8851// TODO: Merge with MatchRotatePosNeg.
8852SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8853 SDValue Neg, SDValue InnerPos,
8854 SDValue InnerNeg, bool FromAdd,
8855 bool HasPos, unsigned PosOpcode,
8856 unsigned NegOpcode, const SDLoc &DL) {
8857 EVT VT = N0.getValueType();
8858 unsigned EltBits = VT.getScalarSizeInBits();
8859
8860 // fold (or/add (shl x0, (*ext y)),
8861 // (srl x1, (*ext (sub 32, y)))) ->
8862 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8863 //
8864 // fold (or/add (shl x0, (*ext (sub 32, y))),
8865 // (srl x1, (*ext y))) ->
8866 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8867 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8868 FromAdd))
8869 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8870 HasPos ? Pos : Neg);
8871
8872 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8873 // so for now just use the PosOpcode case if its legal.
8874 // TODO: When can we use the NegOpcode case?
8875 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8876 SDValue X;
8877 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8878 // -> (fshl x0, x1, y)
8879 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8880 sd_match(InnerNeg,
8881 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8883 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8884 }
8885
8886 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8887 // -> (fshr x0, x1, y)
8888 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8889 sd_match(InnerPos,
8890 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8892 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8893 }
8894
8895 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8896 // -> (fshr x0, x1, y)
8897 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8898 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8899 sd_match(InnerPos,
8900 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8902 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8903 }
8904 }
8905
8906 return SDValue();
8907}
8908
8909// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8910// many idioms for rotate, and if the target supports rotation instructions,
8911// generate a rot[lr]. This also matches funnel shift patterns, similar to
8912// rotation but with different shifted sources.
8913SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8914 bool FromAdd) {
8915 EVT VT = LHS.getValueType();
8916
8917 // The target must have at least one rotate/funnel flavor.
8918 // We still try to match rotate by constant pre-legalization.
8919 // TODO: Support pre-legalization funnel-shift by constant.
8920 bool HasROTL = hasOperation(ISD::ROTL, VT);
8921 bool HasROTR = hasOperation(ISD::ROTR, VT);
8922 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8923 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8924
8925 // If the type is going to be promoted and the target has enabled custom
8926 // lowering for rotate, allow matching rotate by non-constants. Only allow
8927 // this for scalar types.
8928 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8932 }
8933
8934 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8935 return SDValue();
8936
8937 // Check for truncated rotate.
8938 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8939 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8940 assert(LHS.getValueType() == RHS.getValueType());
8941 if (SDValue Rot =
8942 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
8943 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8944 }
8945
8946 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8947 SDValue LHSShift; // The shift.
8948 SDValue LHSMask; // AND value if any.
8949 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8950
8951 SDValue RHSShift; // The shift.
8952 SDValue RHSMask; // AND value if any.
8953 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8954
8955 // If neither side matched a rotate half, bail
8956 if (!LHSShift && !RHSShift)
8957 return SDValue();
8958
8959 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8960 // side of the rotate, so try to handle that here. In all cases we need to
8961 // pass the matched shift from the opposite side to compute the opcode and
8962 // needed shift amount to extract. We still want to do this if both sides
8963 // matched a rotate half because one half may be a potential overshift that
8964 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8965 // single one).
8966
8967 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8968 if (LHSShift)
8969 if (SDValue NewRHSShift =
8970 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8971 RHSShift = NewRHSShift;
8972 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8973 if (RHSShift)
8974 if (SDValue NewLHSShift =
8975 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8976 LHSShift = NewLHSShift;
8977
8978 // If a side is still missing, nothing else we can do.
8979 if (!RHSShift || !LHSShift)
8980 return SDValue();
8981
8982 // At this point we've matched or extracted a shift op on each side.
8983
8984 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8985 return SDValue(); // Shifts must disagree.
8986
8987 // Canonicalize shl to left side in a shl/srl pair.
8988 if (RHSShift.getOpcode() == ISD::SHL) {
8989 std::swap(LHS, RHS);
8990 std::swap(LHSShift, RHSShift);
8991 std::swap(LHSMask, RHSMask);
8992 }
8993
8994 // Something has gone wrong - we've lost the shl/srl pair - bail.
8995 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8996 return SDValue();
8997
8998 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8999 SDValue LHSShiftArg = LHSShift.getOperand(0);
9000 SDValue LHSShiftAmt = LHSShift.getOperand(1);
9001 SDValue RHSShiftArg = RHSShift.getOperand(0);
9002 SDValue RHSShiftAmt = RHSShift.getOperand(1);
9003
9004 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
9005 ConstantSDNode *RHS) {
9006 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
9007 };
9008
9009 auto ApplyMasks = [&](SDValue Res) {
9010 // If there is an AND of either shifted operand, apply it to the result.
9011 if (LHSMask.getNode() || RHSMask.getNode()) {
9014
9015 if (LHSMask.getNode()) {
9016 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
9017 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9018 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
9019 }
9020 if (RHSMask.getNode()) {
9021 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
9022 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9023 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
9024 }
9025
9026 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
9027 }
9028
9029 return Res;
9030 };
9031
9032 // TODO: Support pre-legalization funnel-shift by constant.
9033 bool IsRotate = LHSShiftArg == RHSShiftArg;
9034 if (!IsRotate && !(HasFSHL || HasFSHR)) {
9035 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
9036 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9037 // Look for a disguised rotate by constant.
9038 // The common shifted operand X may be hidden inside another 'or'.
9039 SDValue X, Y;
9040 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9041 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9042 return false;
9043 if (CommonOp == Or.getOperand(0)) {
9044 X = CommonOp;
9045 Y = Or.getOperand(1);
9046 return true;
9047 }
9048 if (CommonOp == Or.getOperand(1)) {
9049 X = CommonOp;
9050 Y = Or.getOperand(0);
9051 return true;
9052 }
9053 return false;
9054 };
9055
9056 SDValue Res;
9057 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9058 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9059 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9060 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9061 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9062 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9063 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9064 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9065 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9066 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9067 } else {
9068 return SDValue();
9069 }
9070
9071 return ApplyMasks(Res);
9072 }
9073
9074 return SDValue(); // Requires funnel shift support.
9075 }
9076
9077 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9078 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9079 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9080 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9081 // iff C1+C2 == EltSizeInBits
9082 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9083 SDValue Res;
9084 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9085 bool UseROTL = !LegalOperations || HasROTL;
9086 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9087 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9088 } else {
9089 bool UseFSHL = !LegalOperations || HasFSHL;
9090 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9091 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9092 }
9093
9094 return ApplyMasks(Res);
9095 }
9096
9097 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9098 // shift.
9099 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9100 return SDValue();
9101
9102 // If there is a mask here, and we have a variable shift, we can't be sure
9103 // that we're masking out the right stuff.
9104 if (LHSMask.getNode() || RHSMask.getNode())
9105 return SDValue();
9106
9107 // If the shift amount is sign/zext/any-extended just peel it off.
9108 SDValue LExtOp0 = LHSShiftAmt;
9109 SDValue RExtOp0 = RHSShiftAmt;
9110 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9111 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9112 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9113 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9114 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9115 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9116 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9117 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9118 LExtOp0 = LHSShiftAmt.getOperand(0);
9119 RExtOp0 = RHSShiftAmt.getOperand(0);
9120 }
9121
9122 if (IsRotate && (HasROTL || HasROTR)) {
9123 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9124 LExtOp0, RExtOp0, FromAdd, HasROTL,
9126 return TryL;
9127
9128 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9129 RExtOp0, LExtOp0, FromAdd, HasROTR,
9131 return TryR;
9132 }
9133
9134 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9135 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9136 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9137 return TryL;
9138
9139 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9140 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9141 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9142 return TryR;
9143
9144 return SDValue();
9145}
9146
9147/// Recursively traverses the expression calculating the origin of the requested
9148/// byte of the given value. Returns std::nullopt if the provider can't be
9149/// calculated.
9150///
9151/// For all the values except the root of the expression, we verify that the
9152/// value has exactly one use and if not then return std::nullopt. This way if
9153/// the origin of the byte is returned it's guaranteed that the values which
9154/// contribute to the byte are not used outside of this expression.
9155
9156/// However, there is a special case when dealing with vector loads -- we allow
9157/// more than one use if the load is a vector type. Since the values that
9158/// contribute to the byte ultimately come from the ExtractVectorElements of the
9159/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9160/// because those operations are independent from the pattern to be combined.
9161/// For vector loads, we simply care that the ByteProviders are adjacent
9162/// positions of the same vector, and their index matches the byte that is being
9163/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9164/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9165/// byte position we are trying to provide for the LoadCombine. If these do
9166/// not match, then we can not combine the vector loads. \p Index uses the
9167/// byte position we are trying to provide for and is matched against the
9168/// shl and load size. The \p Index algorithm ensures the requested byte is
9169/// provided for by the pattern, and the pattern does not over provide bytes.
9170///
9171///
9172/// The supported LoadCombine pattern for vector loads is as follows
9173/// or
9174/// / \
9175/// or shl
9176/// / \ |
9177/// or shl zext
9178/// / \ | |
9179/// shl zext zext EVE*
9180/// | | | |
9181/// zext EVE* EVE* LOAD
9182/// | | |
9183/// EVE* LOAD LOAD
9184/// |
9185/// LOAD
9186///
9187/// *ExtractVectorElement
9189
9190static std::optional<SDByteProvider>
9191calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9192 std::optional<uint64_t> VectorIndex,
9193 unsigned StartingIndex = 0) {
9194
9195 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9196 if (Depth == 10)
9197 return std::nullopt;
9198
9199 // Only allow multiple uses if the instruction is a vector load (in which
9200 // case we will use the load for every ExtractVectorElement)
9201 if (Depth && !Op.hasOneUse() &&
9202 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9203 return std::nullopt;
9204
9205 // Fail to combine if we have encountered anything but a LOAD after handling
9206 // an ExtractVectorElement.
9207 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9208 return std::nullopt;
9209
9210 unsigned BitWidth = Op.getScalarValueSizeInBits();
9211 if (BitWidth % 8 != 0)
9212 return std::nullopt;
9213 unsigned ByteWidth = BitWidth / 8;
9214 assert(Index < ByteWidth && "invalid index requested");
9215 (void) ByteWidth;
9216
9217 switch (Op.getOpcode()) {
9218 case ISD::OR: {
9219 auto LHS =
9220 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9221 if (!LHS)
9222 return std::nullopt;
9223 auto RHS =
9224 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9225 if (!RHS)
9226 return std::nullopt;
9227
9228 if (LHS->isConstantZero())
9229 return RHS;
9230 if (RHS->isConstantZero())
9231 return LHS;
9232 return std::nullopt;
9233 }
9234 case ISD::SHL: {
9235 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9236 if (!ShiftOp)
9237 return std::nullopt;
9238
9239 uint64_t BitShift = ShiftOp->getZExtValue();
9240
9241 if (BitShift % 8 != 0)
9242 return std::nullopt;
9243 uint64_t ByteShift = BitShift / 8;
9244
9245 // If we are shifting by an amount greater than the index we are trying to
9246 // provide, then do not provide anything. Otherwise, subtract the index by
9247 // the amount we shifted by.
9248 return Index < ByteShift
9250 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9251 Depth + 1, VectorIndex, Index);
9252 }
9253 case ISD::ANY_EXTEND:
9254 case ISD::SIGN_EXTEND:
9255 case ISD::ZERO_EXTEND: {
9256 SDValue NarrowOp = Op->getOperand(0);
9257 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9258 if (NarrowBitWidth % 8 != 0)
9259 return std::nullopt;
9260 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9261
9262 if (Index >= NarrowByteWidth)
9263 return Op.getOpcode() == ISD::ZERO_EXTEND
9264 ? std::optional<SDByteProvider>(
9266 : std::nullopt;
9267 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9268 StartingIndex);
9269 }
9270 case ISD::BSWAP:
9271 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9272 Depth + 1, VectorIndex, StartingIndex);
9274 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9275 if (!OffsetOp)
9276 return std::nullopt;
9277
9278 VectorIndex = OffsetOp->getZExtValue();
9279
9280 SDValue NarrowOp = Op->getOperand(0);
9281 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9282 if (NarrowBitWidth % 8 != 0)
9283 return std::nullopt;
9284 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9285 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9286 // type, leaving the high bits undefined.
9287 if (Index >= NarrowByteWidth)
9288 return std::nullopt;
9289
9290 // Check to see if the position of the element in the vector corresponds
9291 // with the byte we are trying to provide for. In the case of a vector of
9292 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9293 // the element will provide a range of bytes. For example, if we have a
9294 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9295 // 3).
9296 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9297 return std::nullopt;
9298 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9299 return std::nullopt;
9300
9301 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9302 VectorIndex, StartingIndex);
9303 }
9304 case ISD::LOAD: {
9305 auto L = cast<LoadSDNode>(Op.getNode());
9306 if (!L->isSimple() || L->isIndexed())
9307 return std::nullopt;
9308
9309 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9310 if (NarrowBitWidth % 8 != 0)
9311 return std::nullopt;
9312 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9313
9314 // If the width of the load does not reach byte we are trying to provide for
9315 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9316 // question
9317 if (Index >= NarrowByteWidth)
9318 return L->getExtensionType() == ISD::ZEXTLOAD
9319 ? std::optional<SDByteProvider>(
9321 : std::nullopt;
9322
9323 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9324 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9325 }
9326 }
9327
9328 return std::nullopt;
9329}
9330
9331static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9332 return i;
9333}
9334
9335static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9336 return BW - i - 1;
9337}
9338
9339// Check if the bytes offsets we are looking at match with either big or
9340// little endian value loaded. Return true for big endian, false for little
9341// endian, and std::nullopt if match failed.
9342static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
9343 int64_t FirstOffset) {
9344 // The endian can be decided only when it is 2 bytes at least.
9345 unsigned Width = ByteOffsets.size();
9346 if (Width < 2)
9347 return std::nullopt;
9348
9349 bool BigEndian = true, LittleEndian = true;
9350 for (unsigned i = 0; i < Width; i++) {
9351 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9352 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9353 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9354 if (!BigEndian && !LittleEndian)
9355 return std::nullopt;
9356 }
9357
9358 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9359 "little endian");
9360 return BigEndian;
9361}
9362
9363// Look through one layer of truncate or extend.
9365 switch (Value.getOpcode()) {
9366 case ISD::TRUNCATE:
9367 case ISD::ZERO_EXTEND:
9368 case ISD::SIGN_EXTEND:
9369 case ISD::ANY_EXTEND:
9370 return Value.getOperand(0);
9371 }
9372 return SDValue();
9373}
9374
9375/// Match a pattern where a wide type scalar value is stored by several narrow
9376/// stores. Fold it into a single store or a BSWAP and a store if the targets
9377/// supports it.
9378///
9379/// Assuming little endian target:
9380/// i8 *p = ...
9381/// i32 val = ...
9382/// p[0] = (val >> 0) & 0xFF;
9383/// p[1] = (val >> 8) & 0xFF;
9384/// p[2] = (val >> 16) & 0xFF;
9385/// p[3] = (val >> 24) & 0xFF;
9386/// =>
9387/// *((i32)p) = val;
9388///
9389/// i8 *p = ...
9390/// i32 val = ...
9391/// p[0] = (val >> 24) & 0xFF;
9392/// p[1] = (val >> 16) & 0xFF;
9393/// p[2] = (val >> 8) & 0xFF;
9394/// p[3] = (val >> 0) & 0xFF;
9395/// =>
9396/// *((i32)p) = BSWAP(val);
9397SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9398 // The matching looks for "store (trunc x)" patterns that appear early but are
9399 // likely to be replaced by truncating store nodes during combining.
9400 // TODO: If there is evidence that running this later would help, this
9401 // limitation could be removed. Legality checks may need to be added
9402 // for the created store and optional bswap/rotate.
9403 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9404 return SDValue();
9405
9406 // We only handle merging simple stores of 1-4 bytes.
9407 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9408 EVT MemVT = N->getMemoryVT();
9409 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9410 !N->isSimple() || N->isIndexed())
9411 return SDValue();
9412
9413 // Collect all of the stores in the chain, upto the maximum store width (i64).
9414 SDValue Chain = N->getChain();
9416 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9417 unsigned MaxWideNumBits = 64;
9418 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9419 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9420 // All stores must be the same size to ensure that we are writing all of the
9421 // bytes in the wide value.
9422 // This store should have exactly one use as a chain operand for another
9423 // store in the merging set. If there are other chain uses, then the
9424 // transform may not be safe because order of loads/stores outside of this
9425 // set may not be preserved.
9426 // TODO: We could allow multiple sizes by tracking each stored byte.
9427 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9428 Store->isIndexed() || !Store->hasOneUse())
9429 return SDValue();
9430 Stores.push_back(Store);
9431 Chain = Store->getChain();
9432 if (MaxStores < Stores.size())
9433 return SDValue();
9434 }
9435 // There is no reason to continue if we do not have at least a pair of stores.
9436 if (Stores.size() < 2)
9437 return SDValue();
9438
9439 // Handle simple types only.
9440 LLVMContext &Context = *DAG.getContext();
9441 unsigned NumStores = Stores.size();
9442 unsigned WideNumBits = NumStores * NarrowNumBits;
9443 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9444 return SDValue();
9445
9446 // Check if all bytes of the source value that we are looking at are stored
9447 // to the same base address. Collect offsets from Base address into OffsetMap.
9448 SDValue SourceValue;
9449 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9450 int64_t FirstOffset = INT64_MAX;
9451 StoreSDNode *FirstStore = nullptr;
9452 std::optional<BaseIndexOffset> Base;
9453 for (auto *Store : Stores) {
9454 // All the stores store different parts of the CombinedValue. A truncate is
9455 // required to get the partial value.
9456 SDValue Trunc = Store->getValue();
9457 if (Trunc.getOpcode() != ISD::TRUNCATE)
9458 return SDValue();
9459 // Other than the first/last part, a shift operation is required to get the
9460 // offset.
9461 int64_t Offset = 0;
9462 SDValue WideVal = Trunc.getOperand(0);
9463 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9464 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9465 // The shift amount must be a constant multiple of the narrow type.
9466 // It is translated to the offset address in the wide source value "y".
9467 //
9468 // x = srl y, ShiftAmtC
9469 // i8 z = trunc x
9470 // store z, ...
9471 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9472 if (ShiftAmtC % NarrowNumBits != 0)
9473 return SDValue();
9474
9475 // Make sure we aren't reading bits that are shifted in.
9476 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9477 return SDValue();
9478
9479 Offset = ShiftAmtC / NarrowNumBits;
9480 WideVal = WideVal.getOperand(0);
9481 }
9482
9483 // Stores must share the same source value with different offsets.
9484 if (!SourceValue)
9485 SourceValue = WideVal;
9486 else if (SourceValue != WideVal) {
9487 // Truncate and extends can be stripped to see if the values are related.
9488 if (stripTruncAndExt(SourceValue) != WideVal &&
9489 stripTruncAndExt(WideVal) != SourceValue)
9490 return SDValue();
9491
9492 if (WideVal.getScalarValueSizeInBits() >
9493 SourceValue.getScalarValueSizeInBits())
9494 SourceValue = WideVal;
9495
9496 // Give up if the source value type is smaller than the store size.
9497 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9498 return SDValue();
9499 }
9500
9501 // Stores must share the same base address.
9502 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9503 int64_t ByteOffsetFromBase = 0;
9504 if (!Base)
9505 Base = Ptr;
9506 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9507 return SDValue();
9508
9509 // Remember the first store.
9510 if (ByteOffsetFromBase < FirstOffset) {
9511 FirstStore = Store;
9512 FirstOffset = ByteOffsetFromBase;
9513 }
9514 // Map the offset in the store and the offset in the combined value, and
9515 // early return if it has been set before.
9516 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9517 return SDValue();
9518 OffsetMap[Offset] = ByteOffsetFromBase;
9519 }
9520
9521 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9522
9523 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9524 assert(FirstStore && "First store must be set");
9525
9526 // Check that a store of the wide type is both allowed and fast on the target
9527 const DataLayout &Layout = DAG.getDataLayout();
9528 unsigned Fast = 0;
9529 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9530 *FirstStore->getMemOperand(), &Fast);
9531 if (!Allowed || !Fast)
9532 return SDValue();
9533
9534 // Check if the pieces of the value are going to the expected places in memory
9535 // to merge the stores.
9536 auto checkOffsets = [&](bool MatchLittleEndian) {
9537 if (MatchLittleEndian) {
9538 for (unsigned i = 0; i != NumStores; ++i)
9539 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9540 return false;
9541 } else { // MatchBigEndian by reversing loop counter.
9542 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9543 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9544 return false;
9545 }
9546 return true;
9547 };
9548
9549 // Check if the offsets line up for the native data layout of this target.
9550 bool NeedBswap = false;
9551 bool NeedRotate = false;
9552 if (!checkOffsets(Layout.isLittleEndian())) {
9553 // Special-case: check if byte offsets line up for the opposite endian.
9554 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9555 NeedBswap = true;
9556 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9557 NeedRotate = true;
9558 else
9559 return SDValue();
9560 }
9561
9562 SDLoc DL(N);
9563 if (WideVT != SourceValue.getValueType()) {
9564 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9565 "Unexpected store value to merge");
9566 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9567 }
9568
9569 // Before legalize we can introduce illegal bswaps/rotates which will be later
9570 // converted to an explicit bswap sequence. This way we end up with a single
9571 // store and byte shuffling instead of several stores and byte shuffling.
9572 if (NeedBswap) {
9573 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9574 } else if (NeedRotate) {
9575 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9576 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9577 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9578 }
9579
9580 SDValue NewStore =
9581 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9582 FirstStore->getPointerInfo(), FirstStore->getAlign());
9583
9584 // Rely on other DAG combine rules to remove the other individual stores.
9585 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9586 return NewStore;
9587}
9588
9589/// Match a pattern where a wide type scalar value is loaded by several narrow
9590/// loads and combined by shifts and ors. Fold it into a single load or a load
9591/// and a BSWAP if the targets supports it.
9592///
9593/// Assuming little endian target:
9594/// i8 *a = ...
9595/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9596/// =>
9597/// i32 val = *((i32)a)
9598///
9599/// i8 *a = ...
9600/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9601/// =>
9602/// i32 val = BSWAP(*((i32)a))
9603///
9604/// TODO: This rule matches complex patterns with OR node roots and doesn't
9605/// interact well with the worklist mechanism. When a part of the pattern is
9606/// updated (e.g. one of the loads) its direct users are put into the worklist,
9607/// but the root node of the pattern which triggers the load combine is not
9608/// necessarily a direct user of the changed node. For example, once the address
9609/// of t28 load is reassociated load combine won't be triggered:
9610/// t25: i32 = add t4, Constant:i32<2>
9611/// t26: i64 = sign_extend t25
9612/// t27: i64 = add t2, t26
9613/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9614/// t29: i32 = zero_extend t28
9615/// t32: i32 = shl t29, Constant:i8<8>
9616/// t33: i32 = or t23, t32
9617/// As a possible fix visitLoad can check if the load can be a part of a load
9618/// combine pattern and add corresponding OR roots to the worklist.
9619SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9620 assert(N->getOpcode() == ISD::OR &&
9621 "Can only match load combining against OR nodes");
9622
9623 // Handles simple types only
9624 EVT VT = N->getValueType(0);
9625 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9626 return SDValue();
9627 unsigned ByteWidth = VT.getSizeInBits() / 8;
9628
9629 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9630 auto MemoryByteOffset = [&](SDByteProvider P) {
9631 assert(P.hasSrc() && "Must be a memory byte provider");
9632 auto *Load = cast<LoadSDNode>(P.Src.value());
9633
9634 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9635
9636 assert(LoadBitWidth % 8 == 0 &&
9637 "can only analyze providers for individual bytes not bit");
9638 unsigned LoadByteWidth = LoadBitWidth / 8;
9639 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9640 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9641 };
9642
9643 std::optional<BaseIndexOffset> Base;
9644 SDValue Chain;
9645
9646 SmallPtrSet<LoadSDNode *, 8> Loads;
9647 std::optional<SDByteProvider> FirstByteProvider;
9648 int64_t FirstOffset = INT64_MAX;
9649
9650 // Check if all the bytes of the OR we are looking at are loaded from the same
9651 // base address. Collect bytes offsets from Base address in ByteOffsets.
9652 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9653 unsigned ZeroExtendedBytes = 0;
9654 for (int i = ByteWidth - 1; i >= 0; --i) {
9655 auto P =
9656 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9657 /*StartingIndex*/ i);
9658 if (!P)
9659 return SDValue();
9660
9661 if (P->isConstantZero()) {
9662 // It's OK for the N most significant bytes to be 0, we can just
9663 // zero-extend the load.
9664 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9665 return SDValue();
9666 continue;
9667 }
9668 assert(P->hasSrc() && "provenance should either be memory or zero");
9669 auto *L = cast<LoadSDNode>(P->Src.value());
9670
9671 // All loads must share the same chain
9672 SDValue LChain = L->getChain();
9673 if (!Chain)
9674 Chain = LChain;
9675 else if (Chain != LChain)
9676 return SDValue();
9677
9678 // Loads must share the same base address
9679 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9680 int64_t ByteOffsetFromBase = 0;
9681
9682 // For vector loads, the expected load combine pattern will have an
9683 // ExtractElement for each index in the vector. While each of these
9684 // ExtractElements will be accessing the same base address as determined
9685 // by the load instruction, the actual bytes they interact with will differ
9686 // due to different ExtractElement indices. To accurately determine the
9687 // byte position of an ExtractElement, we offset the base load ptr with
9688 // the index multiplied by the byte size of each element in the vector.
9689 if (L->getMemoryVT().isVector()) {
9690 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9691 if (LoadWidthInBit % 8 != 0)
9692 return SDValue();
9693 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9694 Ptr.addToOffset(ByteOffsetFromVector);
9695 }
9696
9697 if (!Base)
9698 Base = Ptr;
9699
9700 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9701 return SDValue();
9702
9703 // Calculate the offset of the current byte from the base address
9704 ByteOffsetFromBase += MemoryByteOffset(*P);
9705 ByteOffsets[i] = ByteOffsetFromBase;
9706
9707 // Remember the first byte load
9708 if (ByteOffsetFromBase < FirstOffset) {
9709 FirstByteProvider = P;
9710 FirstOffset = ByteOffsetFromBase;
9711 }
9712
9713 Loads.insert(L);
9714 }
9715
9716 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9717 "memory, so there must be at least one load which produces the value");
9718 assert(Base && "Base address of the accessed memory location must be set");
9719 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9720
9721 bool NeedsZext = ZeroExtendedBytes > 0;
9722
9723 EVT MemVT =
9724 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9725
9726 if (!MemVT.isSimple())
9727 return SDValue();
9728
9729 // Before legalize we can introduce too wide illegal loads which will be later
9730 // split into legal sized loads. This enables us to combine i64 load by i8
9731 // patterns to a couple of i32 loads on 32 bit targets.
9732 if (LegalOperations &&
9733 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9734 MemVT))
9735 return SDValue();
9736
9737 // Check if the bytes of the OR we are looking at match with either big or
9738 // little endian value load
9739 std::optional<bool> IsBigEndian = isBigEndian(
9740 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9741 if (!IsBigEndian)
9742 return SDValue();
9743
9744 assert(FirstByteProvider && "must be set");
9745
9746 // Ensure that the first byte is loaded from zero offset of the first load.
9747 // So the combined value can be loaded from the first load address.
9748 if (MemoryByteOffset(*FirstByteProvider) != 0)
9749 return SDValue();
9750 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9751
9752 // The node we are looking at matches with the pattern, check if we can
9753 // replace it with a single (possibly zero-extended) load and bswap + shift if
9754 // needed.
9755
9756 // If the load needs byte swap check if the target supports it
9757 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9758
9759 // Before legalize we can introduce illegal bswaps which will be later
9760 // converted to an explicit bswap sequence. This way we end up with a single
9761 // load and byte shuffling instead of several loads and byte shuffling.
9762 // We do not introduce illegal bswaps when zero-extending as this tends to
9763 // introduce too many arithmetic instructions.
9764 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9765 !TLI.isOperationLegal(ISD::BSWAP, VT))
9766 return SDValue();
9767
9768 // If we need to bswap and zero extend, we have to insert a shift. Check that
9769 // it is legal.
9770 if (NeedsBswap && NeedsZext && LegalOperations &&
9771 !TLI.isOperationLegal(ISD::SHL, VT))
9772 return SDValue();
9773
9774 // Check that a load of the wide type is both allowed and fast on the target
9775 unsigned Fast = 0;
9776 bool Allowed =
9777 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9778 *FirstLoad->getMemOperand(), &Fast);
9779 if (!Allowed || !Fast)
9780 return SDValue();
9781
9782 SDValue NewLoad =
9783 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9784 Chain, FirstLoad->getBasePtr(),
9785 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9786
9787 // Transfer chain users from old loads to the new load.
9788 for (LoadSDNode *L : Loads)
9789 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9790
9791 if (!NeedsBswap)
9792 return NewLoad;
9793
9794 SDValue ShiftedLoad =
9795 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9796 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9797 VT, SDLoc(N)))
9798 : NewLoad;
9799 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9800}
9801
9802// If the target has andn, bsl, or a similar bit-select instruction,
9803// we want to unfold masked merge, with canonical pattern of:
9804// | A | |B|
9805// ((x ^ y) & m) ^ y
9806// | D |
9807// Into:
9808// (x & m) | (y & ~m)
9809// If y is a constant, m is not a 'not', and the 'andn' does not work with
9810// immediates, we unfold into a different pattern:
9811// ~(~x & m) & (m | y)
9812// If x is a constant, m is a 'not', and the 'andn' does not work with
9813// immediates, we unfold into a different pattern:
9814// (x | ~m) & ~(~m & ~y)
9815// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9816// the very least that breaks andnpd / andnps patterns, and because those
9817// patterns are simplified in IR and shouldn't be created in the DAG
9818SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9819 assert(N->getOpcode() == ISD::XOR);
9820
9821 // Don't touch 'not' (i.e. where y = -1).
9822 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9823 return SDValue();
9824
9825 EVT VT = N->getValueType(0);
9826
9827 // There are 3 commutable operators in the pattern,
9828 // so we have to deal with 8 possible variants of the basic pattern.
9829 SDValue X, Y, M;
9830 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9831 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9832 return false;
9833 SDValue Xor = And.getOperand(XorIdx);
9834 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9835 return false;
9836 SDValue Xor0 = Xor.getOperand(0);
9837 SDValue Xor1 = Xor.getOperand(1);
9838 // Don't touch 'not' (i.e. where y = -1).
9839 if (isAllOnesOrAllOnesSplat(Xor1))
9840 return false;
9841 if (Other == Xor0)
9842 std::swap(Xor0, Xor1);
9843 if (Other != Xor1)
9844 return false;
9845 X = Xor0;
9846 Y = Xor1;
9847 M = And.getOperand(XorIdx ? 0 : 1);
9848 return true;
9849 };
9850
9851 SDValue N0 = N->getOperand(0);
9852 SDValue N1 = N->getOperand(1);
9853 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9854 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9855 return SDValue();
9856
9857 // Don't do anything if the mask is constant. This should not be reachable.
9858 // InstCombine should have already unfolded this pattern, and DAGCombiner
9859 // probably shouldn't produce it, too.
9860 if (isa<ConstantSDNode>(M.getNode()))
9861 return SDValue();
9862
9863 // We can transform if the target has AndNot
9864 if (!TLI.hasAndNot(M))
9865 return SDValue();
9866
9867 SDLoc DL(N);
9868
9869 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9870 // a bitwise not that would already allow ANDN to be used.
9871 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9872 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9873 // If not, we need to do a bit more work to make sure andn is still used.
9874 SDValue NotX = DAG.getNOT(DL, X, VT);
9875 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9876 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9877 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9878 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9879 }
9880
9881 // If X is a constant and M is a bitwise not, check that 'andn' works with
9882 // immediates.
9883 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9884 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9885 // If not, we need to do a bit more work to make sure andn is still used.
9886 SDValue NotM = M.getOperand(0);
9887 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9888 SDValue NotY = DAG.getNOT(DL, Y, VT);
9889 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9890 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9891 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9892 }
9893
9894 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9895 SDValue NotM = DAG.getNOT(DL, M, VT);
9896 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9897
9898 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9899}
9900
9901SDValue DAGCombiner::visitXOR(SDNode *N) {
9902 SDValue N0 = N->getOperand(0);
9903 SDValue N1 = N->getOperand(1);
9904 EVT VT = N0.getValueType();
9905 SDLoc DL(N);
9906
9907 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9908 if (N0.isUndef() && N1.isUndef())
9909 return DAG.getConstant(0, DL, VT);
9910
9911 // fold (xor x, undef) -> undef
9912 if (N0.isUndef())
9913 return N0;
9914 if (N1.isUndef())
9915 return N1;
9916
9917 // fold (xor c1, c2) -> c1^c2
9918 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9919 return C;
9920
9921 // canonicalize constant to RHS
9924 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9925
9926 // fold vector ops
9927 if (VT.isVector()) {
9928 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9929 return FoldedVOp;
9930
9931 // fold (xor x, 0) -> x, vector edition
9933 return N0;
9934 }
9935
9936 // fold (xor x, 0) -> x
9937 if (isNullConstant(N1))
9938 return N0;
9939
9940 if (SDValue NewSel = foldBinOpIntoSelect(N))
9941 return NewSel;
9942
9943 // reassociate xor
9944 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9945 return RXOR;
9946
9947 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9948 if (SDValue SD =
9949 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9950 return SD;
9951
9952 // fold (a^b) -> (a|b) iff a and b share no bits.
9953 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9954 DAG.haveNoCommonBitsSet(N0, N1))
9955 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9956
9957 // look for 'add-like' folds:
9958 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9959 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9961 if (SDValue Combined = visitADDLike(N))
9962 return Combined;
9963
9964 // fold not (setcc x, y, cc) -> setcc x y !cc
9965 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
9966 unsigned N0Opcode = N0.getOpcode();
9967 SDValue LHS, RHS, CC;
9968 if (TLI.isConstTrueVal(N1) &&
9969 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
9970 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
9971 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
9973 LHS.getValueType());
9974 if (!LegalOperations ||
9975 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9976 switch (N0Opcode) {
9977 default:
9978 llvm_unreachable("Unhandled SetCC Equivalent!");
9979 case ISD::SETCC:
9980 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9981 case ISD::SELECT_CC:
9982 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9983 N0.getOperand(3), NotCC);
9984 case ISD::STRICT_FSETCC:
9985 case ISD::STRICT_FSETCCS: {
9986 if (N0.hasOneUse()) {
9987 // FIXME Can we handle multiple uses? Could we token factor the chain
9988 // results from the new/old setcc?
9989 SDValue SetCC =
9990 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9991 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9992 CombineTo(N, SetCC);
9993 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9994 recursivelyDeleteUnusedNodes(N0.getNode());
9995 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9996 }
9997 break;
9998 }
9999 }
10000 }
10001 }
10002
10003 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
10004 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10005 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
10006 SDValue V = N0.getOperand(0);
10007 SDLoc DL0(N0);
10008 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
10009 DAG.getConstant(1, DL0, V.getValueType()));
10010 AddToWorklist(V.getNode());
10011 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
10012 }
10013
10014 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
10015 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
10016 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
10017 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10018 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10019 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
10020 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10021 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10022 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10023 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10024 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10025 }
10026 }
10027 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
10028 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
10029 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
10030 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10031 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10032 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
10033 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10034 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10035 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10036 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10037 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10038 }
10039 }
10040
10041 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10042 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10043 SDValue Y = N0.getOperand(0);
10044 SDValue X = N0.getOperand(1);
10045
10046 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10047 APInt NotYValue = ~YConst->getAPIntValue();
10048 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10049 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10050 }
10051 }
10052
10053 // fold (not (add X, -1)) -> (neg X)
10054 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10056 return DAG.getNegative(N0.getOperand(0), DL, VT);
10057 }
10058
10059 // fold (xor (and x, y), y) -> (and (not x), y)
10060 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10061 SDValue X = N0.getOperand(0);
10062 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10063 AddToWorklist(NotX.getNode());
10064 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10065 }
10066
10067 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10068 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10069 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10070 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10071 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10072 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10073 SDValue S0 = S.getOperand(0);
10074 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10075 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10076 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10077 return DAG.getNode(ISD::ABS, DL, VT, S0);
10078 }
10079 }
10080
10081 // fold (xor x, x) -> 0
10082 if (N0 == N1)
10083 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10084
10085 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10086 // Here is a concrete example of this equivalence:
10087 // i16 x == 14
10088 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10089 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10090 //
10091 // =>
10092 //
10093 // i16 ~1 == 0b1111111111111110
10094 // i16 rol(~1, 14) == 0b1011111111111111
10095 //
10096 // Some additional tips to help conceptualize this transform:
10097 // - Try to see the operation as placing a single zero in a value of all ones.
10098 // - There exists no value for x which would allow the result to contain zero.
10099 // - Values of x larger than the bitwidth are undefined and do not require a
10100 // consistent result.
10101 // - Pushing the zero left requires shifting one bits in from the right.
10102 // A rotate left of ~1 is a nice way of achieving the desired result.
10103 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10105 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10106 N0.getOperand(1));
10107 }
10108
10109 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10110 if (N0Opcode == N1.getOpcode())
10111 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10112 return V;
10113
10114 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10115 return R;
10116 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10117 return R;
10118 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10119 return R;
10120
10121 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10122 if (SDValue MM = unfoldMaskedMerge(N))
10123 return MM;
10124
10125 // Simplify the expression using non-local knowledge.
10127 return SDValue(N, 0);
10128
10129 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10130 return Combined;
10131
10132 // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10133 // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10134 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10135 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10136 SDValue Op0;
10137 if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
10138 m_SMax(m_Value(Op0), m_Specific(N1)),
10139 m_UMin(m_Value(Op0), m_Specific(N1)),
10140 m_UMax(m_Value(Op0), m_Specific(N1)))))) {
10141
10142 if (isa<ConstantSDNode>(N1) ||
10144 // For vectors, only optimize when the constant is zero or all-ones to
10145 // avoid generating more instructions
10146 if (VT.isVector()) {
10147 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10148 if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10149 return SDValue();
10150 }
10151
10152 // Avoid the fold if the minmax operation is legal and select is expensive
10153 if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
10155 return SDValue();
10156
10157 EVT CCVT = getSetCCResultType(VT);
10158 ISD::CondCode CC;
10159 switch (N0.getOpcode()) {
10160 case ISD::SMIN:
10161 CC = ISD::SETLT;
10162 break;
10163 case ISD::SMAX:
10164 CC = ISD::SETGT;
10165 break;
10166 case ISD::UMIN:
10167 CC = ISD::SETULT;
10168 break;
10169 case ISD::UMAX:
10170 CC = ISD::SETUGT;
10171 break;
10172 }
10173 SDValue FN1 = DAG.getFreeze(N1);
10174 SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
10175 SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
10176 SDValue Zero = DAG.getConstant(0, DL, VT);
10177 return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
10178 }
10179 }
10180
10181 return SDValue();
10182}
10183
10184/// If we have a shift-by-constant of a bitwise logic op that itself has a
10185/// shift-by-constant operand with identical opcode, we may be able to convert
10186/// that into 2 independent shifts followed by the logic op. This is a
10187/// throughput improvement.
10189 // Match a one-use bitwise logic op.
10190 SDValue LogicOp = Shift->getOperand(0);
10191 if (!LogicOp.hasOneUse())
10192 return SDValue();
10193
10194 unsigned LogicOpcode = LogicOp.getOpcode();
10195 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10196 LogicOpcode != ISD::XOR)
10197 return SDValue();
10198
10199 // Find a matching one-use shift by constant.
10200 unsigned ShiftOpcode = Shift->getOpcode();
10201 SDValue C1 = Shift->getOperand(1);
10202 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10203 assert(C1Node && "Expected a shift with constant operand");
10204 const APInt &C1Val = C1Node->getAPIntValue();
10205 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10206 const APInt *&ShiftAmtVal) {
10207 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10208 return false;
10209
10210 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10211 if (!ShiftCNode)
10212 return false;
10213
10214 // Capture the shifted operand and shift amount value.
10215 ShiftOp = V.getOperand(0);
10216 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10217
10218 // Shift amount types do not have to match their operand type, so check that
10219 // the constants are the same width.
10220 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10221 return false;
10222
10223 // The fold is not valid if the sum of the shift values doesn't fit in the
10224 // given shift amount type.
10225 bool Overflow = false;
10226 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10227 if (Overflow)
10228 return false;
10229
10230 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10231 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10232 return false;
10233
10234 return true;
10235 };
10236
10237 // Logic ops are commutative, so check each operand for a match.
10238 SDValue X, Y;
10239 const APInt *C0Val;
10240 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10241 Y = LogicOp.getOperand(1);
10242 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10243 Y = LogicOp.getOperand(0);
10244 else
10245 return SDValue();
10246
10247 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10248 SDLoc DL(Shift);
10249 EVT VT = Shift->getValueType(0);
10250 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10251 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10252 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10253 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10254 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10255 LogicOp->getFlags());
10256}
10257
10258/// Handle transforms common to the three shifts, when the shift amount is a
10259/// constant.
10260/// We are looking for: (shift being one of shl/sra/srl)
10261/// shift (binop X, C0), C1
10262/// And want to transform into:
10263/// binop (shift X, C1), (shift C0, C1)
10264SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10265 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10266
10267 // Do not turn a 'not' into a regular xor.
10268 if (isBitwiseNot(N->getOperand(0)))
10269 return SDValue();
10270
10271 // The inner binop must be one-use, since we want to replace it.
10272 SDValue LHS = N->getOperand(0);
10273 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10274 return SDValue();
10275
10276 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10277 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10278 return R;
10279
10280 // We want to pull some binops through shifts, so that we have (and (shift))
10281 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10282 // thing happens with address calculations, so it's important to canonicalize
10283 // it.
10284 switch (LHS.getOpcode()) {
10285 default:
10286 return SDValue();
10287 case ISD::OR:
10288 case ISD::XOR:
10289 case ISD::AND:
10290 break;
10291 case ISD::ADD:
10292 if (N->getOpcode() != ISD::SHL)
10293 return SDValue(); // only shl(add) not sr[al](add).
10294 break;
10295 }
10296
10297 // FIXME: disable this unless the input to the binop is a shift by a constant
10298 // or is copy/select. Enable this in other cases when figure out it's exactly
10299 // profitable.
10300 SDValue BinOpLHSVal = LHS.getOperand(0);
10301 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10302 BinOpLHSVal.getOpcode() == ISD::SRA ||
10303 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10304 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10305 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10306 BinOpLHSVal.getOpcode() == ISD::SELECT;
10307
10308 if (!IsShiftByConstant && !IsCopyOrSelect)
10309 return SDValue();
10310
10311 if (IsCopyOrSelect && N->hasOneUse())
10312 return SDValue();
10313
10314 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10315 SDLoc DL(N);
10316 EVT VT = N->getValueType(0);
10317 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10318 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10319 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10320 N->getOperand(1));
10321 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10322 }
10323
10324 return SDValue();
10325}
10326
10327SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10328 assert(N->getOpcode() == ISD::TRUNCATE);
10329 assert(N->getOperand(0).getOpcode() == ISD::AND);
10330
10331 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10332 EVT TruncVT = N->getValueType(0);
10333 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10334 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10335 SDValue N01 = N->getOperand(0).getOperand(1);
10336 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10337 SDLoc DL(N);
10338 SDValue N00 = N->getOperand(0).getOperand(0);
10339 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10340 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10341 AddToWorklist(Trunc00.getNode());
10342 AddToWorklist(Trunc01.getNode());
10343 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10344 }
10345 }
10346
10347 return SDValue();
10348}
10349
10350SDValue DAGCombiner::visitRotate(SDNode *N) {
10351 SDLoc dl(N);
10352 SDValue N0 = N->getOperand(0);
10353 SDValue N1 = N->getOperand(1);
10354 EVT VT = N->getValueType(0);
10355 unsigned Bitsize = VT.getScalarSizeInBits();
10356
10357 // fold (rot x, 0) -> x
10358 if (isNullOrNullSplat(N1))
10359 return N0;
10360
10361 // fold (rot x, c) -> x iff (c % BitSize) == 0
10362 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10363 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10364 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10365 return N0;
10366 }
10367
10368 // fold (rot x, c) -> (rot x, c % BitSize)
10369 bool OutOfRange = false;
10370 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10371 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10372 return true;
10373 };
10374 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10375 EVT AmtVT = N1.getValueType();
10376 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10377 if (SDValue Amt =
10378 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10379 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10380 }
10381
10382 // rot i16 X, 8 --> bswap X
10383 auto *RotAmtC = isConstOrConstSplat(N1);
10384 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10385 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10386 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10387
10388 // Simplify the operands using demanded-bits information.
10390 return SDValue(N, 0);
10391
10392 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10393 if (N1.getOpcode() == ISD::TRUNCATE &&
10394 N1.getOperand(0).getOpcode() == ISD::AND) {
10395 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10396 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10397 }
10398
10399 unsigned NextOp = N0.getOpcode();
10400
10401 // fold (rot* (rot* x, c2), c1)
10402 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10403 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10404 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10406 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10407 EVT ShiftVT = N1.getValueType();
10408 bool SameSide = (N->getOpcode() == NextOp);
10409 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10410 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10411 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10412 {N1, BitsizeC});
10413 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10414 {N0.getOperand(1), BitsizeC});
10415 if (Norm1 && Norm2)
10416 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10417 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10418 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10419 {CombinedShift, BitsizeC});
10420 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10421 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10422 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10423 CombinedShiftNorm);
10424 }
10425 }
10426 }
10427 return SDValue();
10428}
10429
10430SDValue DAGCombiner::visitSHL(SDNode *N) {
10431 SDValue N0 = N->getOperand(0);
10432 SDValue N1 = N->getOperand(1);
10433 if (SDValue V = DAG.simplifyShift(N0, N1))
10434 return V;
10435
10436 SDLoc DL(N);
10437 EVT VT = N0.getValueType();
10438 EVT ShiftVT = N1.getValueType();
10439 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10440
10441 // fold (shl c1, c2) -> c1<<c2
10442 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10443 return C;
10444
10445 // fold vector ops
10446 if (VT.isVector()) {
10447 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10448 return FoldedVOp;
10449
10450 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10451 // If setcc produces all-one true value then:
10452 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10453 if (N1CV && N1CV->isConstant()) {
10454 if (N0.getOpcode() == ISD::AND) {
10455 SDValue N00 = N0->getOperand(0);
10456 SDValue N01 = N0->getOperand(1);
10457 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10458
10459 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10462 if (SDValue C =
10463 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10464 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10465 }
10466 }
10467 }
10468 }
10469
10470 if (SDValue NewSel = foldBinOpIntoSelect(N))
10471 return NewSel;
10472
10473 // if (shl x, c) is known to be zero, return 0
10474 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10475 return DAG.getConstant(0, DL, VT);
10476
10477 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10478 if (N1.getOpcode() == ISD::TRUNCATE &&
10479 N1.getOperand(0).getOpcode() == ISD::AND) {
10480 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10481 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10482 }
10483
10484 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10485 if (N0.getOpcode() == ISD::SHL) {
10486 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10487 ConstantSDNode *RHS) {
10488 APInt c1 = LHS->getAPIntValue();
10489 APInt c2 = RHS->getAPIntValue();
10490 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10491 return (c1 + c2).uge(OpSizeInBits);
10492 };
10493 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10494 return DAG.getConstant(0, DL, VT);
10495
10496 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10497 ConstantSDNode *RHS) {
10498 APInt c1 = LHS->getAPIntValue();
10499 APInt c2 = RHS->getAPIntValue();
10500 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10501 return (c1 + c2).ult(OpSizeInBits);
10502 };
10503 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10504 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10505 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10506 }
10507 }
10508
10509 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10510 // For this to be valid, the second form must not preserve any of the bits
10511 // that are shifted out by the inner shift in the first form. This means
10512 // the outer shift size must be >= the number of bits added by the ext.
10513 // As a corollary, we don't care what kind of ext it is.
10514 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10515 N0.getOpcode() == ISD::ANY_EXTEND ||
10516 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10517 N0.getOperand(0).getOpcode() == ISD::SHL) {
10518 SDValue N0Op0 = N0.getOperand(0);
10519 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10520 EVT InnerVT = N0Op0.getValueType();
10521 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10522
10523 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10524 ConstantSDNode *RHS) {
10525 APInt c1 = LHS->getAPIntValue();
10526 APInt c2 = RHS->getAPIntValue();
10527 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10528 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10529 (c1 + c2).uge(OpSizeInBits);
10530 };
10531 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10532 /*AllowUndefs*/ false,
10533 /*AllowTypeMismatch*/ true))
10534 return DAG.getConstant(0, DL, VT);
10535
10536 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10537 ConstantSDNode *RHS) {
10538 APInt c1 = LHS->getAPIntValue();
10539 APInt c2 = RHS->getAPIntValue();
10540 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10541 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10542 (c1 + c2).ult(OpSizeInBits);
10543 };
10544 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10545 /*AllowUndefs*/ false,
10546 /*AllowTypeMismatch*/ true)) {
10547 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10548 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10549 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10550 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10551 }
10552 }
10553
10554 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10555 // Only fold this if the inner zext has no other uses to avoid increasing
10556 // the total number of instructions.
10557 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10558 N0.getOperand(0).getOpcode() == ISD::SRL) {
10559 SDValue N0Op0 = N0.getOperand(0);
10560 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10561
10562 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10563 APInt c1 = LHS->getAPIntValue();
10564 APInt c2 = RHS->getAPIntValue();
10565 zeroExtendToMatch(c1, c2);
10566 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10567 };
10568 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10569 /*AllowUndefs*/ false,
10570 /*AllowTypeMismatch*/ true)) {
10571 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10572 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10573 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10574 AddToWorklist(NewSHL.getNode());
10575 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10576 }
10577 }
10578
10579 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10580 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10581 ConstantSDNode *RHS) {
10582 const APInt &LHSC = LHS->getAPIntValue();
10583 const APInt &RHSC = RHS->getAPIntValue();
10584 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10585 LHSC.getZExtValue() <= RHSC.getZExtValue();
10586 };
10587
10588 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10589 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10590 if (N0->getFlags().hasExact()) {
10591 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10592 /*AllowUndefs*/ false,
10593 /*AllowTypeMismatch*/ true)) {
10594 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10595 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10596 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10597 }
10598 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10599 /*AllowUndefs*/ false,
10600 /*AllowTypeMismatch*/ true)) {
10601 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10602 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10603 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10604 }
10605 }
10606
10607 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10608 // (and (srl x, (sub c1, c2), MASK)
10609 // Only fold this if the inner shift has no other uses -- if it does,
10610 // folding this will increase the total number of instructions.
10611 if (N0.getOpcode() == ISD::SRL &&
10612 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10614 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10615 /*AllowUndefs*/ false,
10616 /*AllowTypeMismatch*/ true)) {
10617 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10618 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10619 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10620 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10621 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10622 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10623 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10624 }
10625 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10626 /*AllowUndefs*/ false,
10627 /*AllowTypeMismatch*/ true)) {
10628 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10629 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10630 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10631 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10632 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10633 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10634 }
10635 }
10636 }
10637
10638 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10639 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10640 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10641 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10642 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10643 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10644 }
10645
10646 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10647 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10648 // Variant of version done on multiply, except mul by a power of 2 is turned
10649 // into a shift.
10650 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10651 TLI.isDesirableToCommuteWithShift(N, Level)) {
10652 SDValue N01 = N0.getOperand(1);
10653 if (SDValue Shl1 =
10654 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10655 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10656 AddToWorklist(Shl0.getNode());
10657 SDNodeFlags Flags;
10658 // Preserve the disjoint flag for Or.
10659 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10661 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10662 }
10663 }
10664
10665 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10666 // TODO: Add zext/add_nuw variant with suitable test coverage
10667 // TODO: Should we limit this with isLegalAddImmediate?
10668 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10669 N0.getOperand(0).getOpcode() == ISD::ADD &&
10670 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10671 TLI.isDesirableToCommuteWithShift(N, Level)) {
10672 SDValue Add = N0.getOperand(0);
10673 SDLoc DL(N0);
10674 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10675 {Add.getOperand(1)})) {
10676 if (SDValue ShlC =
10677 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10678 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10679 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10680 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10681 }
10682 }
10683 }
10684
10685 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10686 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10687 SDValue N01 = N0.getOperand(1);
10688 if (SDValue Shl =
10689 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10690 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10691 }
10692
10693 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10694 if (N1C && !N1C->isOpaque())
10695 if (SDValue NewSHL = visitShiftByConstant(N))
10696 return NewSHL;
10697
10698 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10699 // target.
10700 if (((N1.getOpcode() == ISD::CTTZ &&
10701 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10703 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10705 SDValue Y = N1.getOperand(0);
10706 SDLoc DL(N);
10707 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10708 SDValue And =
10709 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10710 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10711 }
10712
10714 return SDValue(N, 0);
10715
10716 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10717 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10718 const APInt &C0 = N0.getConstantOperandAPInt(0);
10719 const APInt &C1 = N1C->getAPIntValue();
10720 return DAG.getVScale(DL, VT, C0 << C1);
10721 }
10722
10723 SDValue X;
10724 APInt VS0;
10725
10726 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10727 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10728 SDNodeFlags Flags;
10729 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10730 N0->getFlags().hasNoUnsignedWrap());
10731
10732 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10733 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10734 }
10735
10736 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10737 APInt ShlVal;
10738 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10739 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10740 const APInt &C0 = N0.getConstantOperandAPInt(0);
10741 if (ShlVal.ult(C0.getBitWidth())) {
10742 APInt NewStep = C0 << ShlVal;
10743 return DAG.getStepVector(DL, VT, NewStep);
10744 }
10745 }
10746
10747 return SDValue();
10748}
10749
10750// Transform a right shift of a multiply into a multiply-high.
10751// Examples:
10752// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10753// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10755 const TargetLowering &TLI) {
10756 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10757 "SRL or SRA node is required here!");
10758
10759 // Check the shift amount. Proceed with the transformation if the shift
10760 // amount is constant.
10761 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10762 if (!ShiftAmtSrc)
10763 return SDValue();
10764
10765 // The operation feeding into the shift must be a multiply.
10766 SDValue ShiftOperand = N->getOperand(0);
10767 if (ShiftOperand.getOpcode() != ISD::MUL)
10768 return SDValue();
10769
10770 // Both operands must be equivalent extend nodes.
10771 SDValue LeftOp = ShiftOperand.getOperand(0);
10772 SDValue RightOp = ShiftOperand.getOperand(1);
10773
10774 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10775 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10776
10777 if (!IsSignExt && !IsZeroExt)
10778 return SDValue();
10779
10780 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10781 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10782
10783 // return true if U may use the lower bits of its operands
10784 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10785 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10786 return true;
10787 }
10788 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10789 if (!UShiftAmtSrc) {
10790 return true;
10791 }
10792 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10793 return UShiftAmt < NarrowVTSize;
10794 };
10795
10796 // If the lower part of the MUL is also used and MUL_LOHI is supported
10797 // do not introduce the MULH in favor of MUL_LOHI
10798 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10799 if (!ShiftOperand.hasOneUse() &&
10800 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10801 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10802 return SDValue();
10803 }
10804
10805 SDValue MulhRightOp;
10807 unsigned ActiveBits = IsSignExt
10808 ? Constant->getAPIntValue().getSignificantBits()
10809 : Constant->getAPIntValue().getActiveBits();
10810 if (ActiveBits > NarrowVTSize)
10811 return SDValue();
10812 MulhRightOp = DAG.getConstant(
10813 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10814 NarrowVT);
10815 } else {
10816 if (LeftOp.getOpcode() != RightOp.getOpcode())
10817 return SDValue();
10818 // Check that the two extend nodes are the same type.
10819 if (NarrowVT != RightOp.getOperand(0).getValueType())
10820 return SDValue();
10821 MulhRightOp = RightOp.getOperand(0);
10822 }
10823
10824 EVT WideVT = LeftOp.getValueType();
10825 // Proceed with the transformation if the wide types match.
10826 assert((WideVT == RightOp.getValueType()) &&
10827 "Cannot have a multiply node with two different operand types.");
10828
10829 // Proceed with the transformation if the wide type is twice as large
10830 // as the narrow type.
10831 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10832 return SDValue();
10833
10834 // Check the shift amount with the narrow type size.
10835 // Proceed with the transformation if the shift amount is the width
10836 // of the narrow type.
10837 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10838 if (ShiftAmt != NarrowVTSize)
10839 return SDValue();
10840
10841 // If the operation feeding into the MUL is a sign extend (sext),
10842 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10843 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10844
10845 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10846 // or if it is a vector type then we could transform to an acceptable type and
10847 // rely on legalization to split/combine the result.
10848 if (NarrowVT.isVector()) {
10849 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10850 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10851 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10852 return SDValue();
10853 } else {
10854 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10855 return SDValue();
10856 }
10857
10858 SDValue Result =
10859 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10860 bool IsSigned = N->getOpcode() == ISD::SRA;
10861 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10862}
10863
10864// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10865// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10867 unsigned Opcode = N->getOpcode();
10868 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10869 return SDValue();
10870
10871 SDValue N0 = N->getOperand(0);
10872 EVT VT = N->getValueType(0);
10873 SDLoc DL(N);
10874 SDValue X, Y;
10875
10876 // If both operands are bswap/bitreverse, ignore the multiuse
10878 m_UnaryOp(Opcode, m_Value(Y))))))
10879 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10880
10881 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10883 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10884 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10885 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10886 }
10887
10888 return SDValue();
10889}
10890
10891SDValue DAGCombiner::visitSRA(SDNode *N) {
10892 SDValue N0 = N->getOperand(0);
10893 SDValue N1 = N->getOperand(1);
10894 if (SDValue V = DAG.simplifyShift(N0, N1))
10895 return V;
10896
10897 SDLoc DL(N);
10898 EVT VT = N0.getValueType();
10899 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10900
10901 // fold (sra c1, c2) -> (sra c1, c2)
10902 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10903 return C;
10904
10905 // Arithmetic shifting an all-sign-bit value is a no-op.
10906 // fold (sra 0, x) -> 0
10907 // fold (sra -1, x) -> -1
10908 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10909 return N0;
10910
10911 // fold vector ops
10912 if (VT.isVector())
10913 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10914 return FoldedVOp;
10915
10916 if (SDValue NewSel = foldBinOpIntoSelect(N))
10917 return NewSel;
10918
10919 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10920
10921 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10922 // clamp (add c1, c2) to max shift.
10923 if (N0.getOpcode() == ISD::SRA) {
10924 EVT ShiftVT = N1.getValueType();
10925 EVT ShiftSVT = ShiftVT.getScalarType();
10926 SmallVector<SDValue, 16> ShiftValues;
10927
10928 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10929 APInt c1 = LHS->getAPIntValue();
10930 APInt c2 = RHS->getAPIntValue();
10931 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10932 APInt Sum = c1 + c2;
10933 unsigned ShiftSum =
10934 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10935 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10936 return true;
10937 };
10938 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10939 SDValue ShiftValue;
10940 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10941 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10942 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10943 assert(ShiftValues.size() == 1 &&
10944 "Expected matchBinaryPredicate to return one element for "
10945 "SPLAT_VECTORs");
10946 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10947 } else
10948 ShiftValue = ShiftValues[0];
10949 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10950 }
10951 }
10952
10953 // fold (sra (shl X, m), (sub result_size, n))
10954 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10955 // result_size - n != m.
10956 // If truncate is free for the target sext(shl) is likely to result in better
10957 // code.
10958 if (N0.getOpcode() == ISD::SHL && N1C) {
10959 // Get the two constants of the shifts, CN0 = m, CN = n.
10960 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10961 if (N01C) {
10962 LLVMContext &Ctx = *DAG.getContext();
10963 // Determine what the truncate's result bitsize and type would be.
10964 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10965
10966 if (VT.isVector())
10967 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10968
10969 // Determine the residual right-shift amount.
10970 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10971
10972 // If the shift is not a no-op (in which case this should be just a sign
10973 // extend already), the truncated to type is legal, sign_extend is legal
10974 // on that type, and the truncate to that type is both legal and free,
10975 // perform the transform.
10976 if ((ShiftAmt > 0) &&
10979 TLI.isTruncateFree(VT, TruncVT)) {
10980 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10981 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10982 N0.getOperand(0), Amt);
10983 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10984 Shift);
10985 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10986 N->getValueType(0), Trunc);
10987 }
10988 }
10989 }
10990
10991 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10992 // sra (add (shl X, N1C), AddC), N1C -->
10993 // sext (add (trunc X to (width - N1C)), AddC')
10994 // sra (sub AddC, (shl X, N1C)), N1C -->
10995 // sext (sub AddC1',(trunc X to (width - N1C)))
10996 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10997 N0.hasOneUse()) {
10998 bool IsAdd = N0.getOpcode() == ISD::ADD;
10999 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
11000 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
11001 Shl.hasOneUse()) {
11002 // TODO: AddC does not need to be a splat.
11003 if (ConstantSDNode *AddC =
11004 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
11005 // Determine what the truncate's type would be and ask the target if
11006 // that is a free operation.
11007 LLVMContext &Ctx = *DAG.getContext();
11008 unsigned ShiftAmt = N1C->getZExtValue();
11009 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
11010 if (VT.isVector())
11011 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
11012
11013 // TODO: The simple type check probably belongs in the default hook
11014 // implementation and/or target-specific overrides (because
11015 // non-simple types likely require masking when legalized), but
11016 // that restriction may conflict with other transforms.
11017 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
11018 TLI.isTruncateFree(VT, TruncVT)) {
11019 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
11020 SDValue ShiftC =
11021 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
11022 TruncVT.getScalarSizeInBits()),
11023 DL, TruncVT);
11024 SDValue Add;
11025 if (IsAdd)
11026 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
11027 else
11028 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
11029 return DAG.getSExtOrTrunc(Add, DL, VT);
11030 }
11031 }
11032 }
11033 }
11034
11035 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
11036 if (N1.getOpcode() == ISD::TRUNCATE &&
11037 N1.getOperand(0).getOpcode() == ISD::AND) {
11038 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11039 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
11040 }
11041
11042 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11043 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11044 // if c1 is equal to the number of bits the trunc removes
11045 // TODO - support non-uniform vector shift amounts.
11046 if (N0.getOpcode() == ISD::TRUNCATE &&
11047 (N0.getOperand(0).getOpcode() == ISD::SRL ||
11048 N0.getOperand(0).getOpcode() == ISD::SRA) &&
11049 N0.getOperand(0).hasOneUse() &&
11050 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
11051 SDValue N0Op0 = N0.getOperand(0);
11052 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
11053 EVT LargeVT = N0Op0.getValueType();
11054 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11055 if (LargeShift->getAPIntValue() == TruncBits) {
11056 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
11057 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
11058 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
11059 DAG.getConstant(TruncBits, DL, LargeShiftVT));
11060 SDValue SRA =
11061 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
11062 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
11063 }
11064 }
11065 }
11066
11067 // Simplify, based on bits shifted out of the LHS.
11069 return SDValue(N, 0);
11070
11071 // If the sign bit is known to be zero, switch this to a SRL.
11072 if (DAG.SignBitIsZero(N0))
11073 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
11074
11075 if (N1C && !N1C->isOpaque())
11076 if (SDValue NewSRA = visitShiftByConstant(N))
11077 return NewSRA;
11078
11079 // Try to transform this shift into a multiply-high if
11080 // it matches the appropriate pattern detected in combineShiftToMULH.
11081 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11082 return MULH;
11083
11084 // Attempt to convert a sra of a load into a narrower sign-extending load.
11085 if (SDValue NarrowLoad = reduceLoadWidth(N))
11086 return NarrowLoad;
11087
11088 if (SDValue AVG = foldShiftToAvg(N, DL))
11089 return AVG;
11090
11091 return SDValue();
11092}
11093
11094SDValue DAGCombiner::visitSRL(SDNode *N) {
11095 SDValue N0 = N->getOperand(0);
11096 SDValue N1 = N->getOperand(1);
11097 if (SDValue V = DAG.simplifyShift(N0, N1))
11098 return V;
11099
11100 SDLoc DL(N);
11101 EVT VT = N0.getValueType();
11102 EVT ShiftVT = N1.getValueType();
11103 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11104
11105 // fold (srl c1, c2) -> c1 >>u c2
11106 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11107 return C;
11108
11109 // fold vector ops
11110 if (VT.isVector())
11111 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11112 return FoldedVOp;
11113
11114 if (SDValue NewSel = foldBinOpIntoSelect(N))
11115 return NewSel;
11116
11117 // if (srl x, c) is known to be zero, return 0
11118 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11119 if (N1C &&
11120 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11121 return DAG.getConstant(0, DL, VT);
11122
11123 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11124 if (N0.getOpcode() == ISD::SRL) {
11125 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11126 ConstantSDNode *RHS) {
11127 APInt c1 = LHS->getAPIntValue();
11128 APInt c2 = RHS->getAPIntValue();
11129 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11130 return (c1 + c2).uge(OpSizeInBits);
11131 };
11132 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11133 return DAG.getConstant(0, DL, VT);
11134
11135 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11136 ConstantSDNode *RHS) {
11137 APInt c1 = LHS->getAPIntValue();
11138 APInt c2 = RHS->getAPIntValue();
11139 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11140 return (c1 + c2).ult(OpSizeInBits);
11141 };
11142 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11143 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11144 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11145 }
11146 }
11147
11148 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11149 N0.getOperand(0).getOpcode() == ISD::SRL) {
11150 SDValue InnerShift = N0.getOperand(0);
11151 // TODO - support non-uniform vector shift amounts.
11152 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11153 uint64_t c1 = N001C->getZExtValue();
11154 uint64_t c2 = N1C->getZExtValue();
11155 EVT InnerShiftVT = InnerShift.getValueType();
11156 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11157 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11158 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11159 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11160 if (c1 + OpSizeInBits == InnerShiftSize) {
11161 if (c1 + c2 >= InnerShiftSize)
11162 return DAG.getConstant(0, DL, VT);
11163 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11164 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11165 InnerShift.getOperand(0), NewShiftAmt);
11166 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11167 }
11168 // In the more general case, we can clear the high bits after the shift:
11169 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11170 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11171 c1 + c2 < InnerShiftSize) {
11172 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11173 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11174 InnerShift.getOperand(0), NewShiftAmt);
11175 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11176 OpSizeInBits - c2),
11177 DL, InnerShiftVT);
11178 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11179 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11180 }
11181 }
11182 }
11183
11184 if (N0.getOpcode() == ISD::SHL) {
11185 // fold (srl (shl nuw x, c), c) -> x
11186 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11187 return N0.getOperand(0);
11188
11189 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11190 // (and (srl x, (sub c2, c1), MASK)
11191 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11193 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11194 ConstantSDNode *RHS) {
11195 const APInt &LHSC = LHS->getAPIntValue();
11196 const APInt &RHSC = RHS->getAPIntValue();
11197 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11198 LHSC.getZExtValue() <= RHSC.getZExtValue();
11199 };
11200 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11201 /*AllowUndefs*/ false,
11202 /*AllowTypeMismatch*/ true)) {
11203 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11204 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11205 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11206 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11207 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11208 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11209 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11210 }
11211 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11212 /*AllowUndefs*/ false,
11213 /*AllowTypeMismatch*/ true)) {
11214 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11215 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11216 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11217 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11218 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11219 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11220 }
11221 }
11222 }
11223
11224 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11225 // TODO - support non-uniform vector shift amounts.
11226 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11227 // Shifting in all undef bits?
11228 EVT SmallVT = N0.getOperand(0).getValueType();
11229 unsigned BitSize = SmallVT.getScalarSizeInBits();
11230 if (N1C->getAPIntValue().uge(BitSize))
11231 return DAG.getUNDEF(VT);
11232
11233 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11234 uint64_t ShiftAmt = N1C->getZExtValue();
11235 SDLoc DL0(N0);
11236 SDValue SmallShift =
11237 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11238 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11239 AddToWorklist(SmallShift.getNode());
11240 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11241 return DAG.getNode(ISD::AND, DL, VT,
11242 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11243 DAG.getConstant(Mask, DL, VT));
11244 }
11245 }
11246
11247 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11248 // bit, which is unmodified by sra.
11249 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11250 if (N0.getOpcode() == ISD::SRA)
11251 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11252 }
11253
11254 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11255 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11256 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11257 isPowerOf2_32(OpSizeInBits) &&
11258 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11259 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11260
11261 // If any of the input bits are KnownOne, then the input couldn't be all
11262 // zeros, thus the result of the srl will always be zero.
11263 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11264
11265 // If all of the bits input the to ctlz node are known to be zero, then
11266 // the result of the ctlz is "32" and the result of the shift is one.
11267 APInt UnknownBits = ~Known.Zero;
11268 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11269
11270 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11271 if (UnknownBits.isPowerOf2()) {
11272 // Okay, we know that only that the single bit specified by UnknownBits
11273 // could be set on input to the CTLZ node. If this bit is set, the SRL
11274 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11275 // to an SRL/XOR pair, which is likely to simplify more.
11276 unsigned ShAmt = UnknownBits.countr_zero();
11277 SDValue Op = N0.getOperand(0);
11278
11279 if (ShAmt) {
11280 SDLoc DL(N0);
11281 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11282 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11283 AddToWorklist(Op.getNode());
11284 }
11285 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11286 }
11287 }
11288
11289 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11290 if (N1.getOpcode() == ISD::TRUNCATE &&
11291 N1.getOperand(0).getOpcode() == ISD::AND) {
11292 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11293 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11294 }
11295
11296 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11297 // -> (logic_op (srl x, c1), (zext y))
11298 // c1 <= leadingzeros(zext(y))
11299 SDValue X, ZExtY;
11300 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11301 m_Value(X),
11304 m_Specific(N1))))))) {
11305 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11307 if (N1C->getZExtValue() <= NumLeadingZeros)
11308 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11309 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11310 }
11311
11312 // fold operands of srl based on knowledge that the low bits are not
11313 // demanded.
11315 return SDValue(N, 0);
11316
11317 if (N1C && !N1C->isOpaque())
11318 if (SDValue NewSRL = visitShiftByConstant(N))
11319 return NewSRL;
11320
11321 // Attempt to convert a srl of a load into a narrower zero-extending load.
11322 if (SDValue NarrowLoad = reduceLoadWidth(N))
11323 return NarrowLoad;
11324
11325 // Here is a common situation. We want to optimize:
11326 //
11327 // %a = ...
11328 // %b = and i32 %a, 2
11329 // %c = srl i32 %b, 1
11330 // brcond i32 %c ...
11331 //
11332 // into
11333 //
11334 // %a = ...
11335 // %b = and %a, 2
11336 // %c = setcc eq %b, 0
11337 // brcond %c ...
11338 //
11339 // However when after the source operand of SRL is optimized into AND, the SRL
11340 // itself may not be optimized further. Look for it and add the BRCOND into
11341 // the worklist.
11342 //
11343 // The also tends to happen for binary operations when SimplifyDemandedBits
11344 // is involved.
11345 //
11346 // FIXME: This is unecessary if we process the DAG in topological order,
11347 // which we plan to do. This workaround can be removed once the DAG is
11348 // processed in topological order.
11349 if (N->hasOneUse()) {
11350 SDNode *User = *N->user_begin();
11351
11352 // Look pass the truncate.
11353 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11354 User = *User->user_begin();
11355
11356 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11357 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11358 AddToWorklist(User);
11359 }
11360
11361 // Try to transform this shift into a multiply-high if
11362 // it matches the appropriate pattern detected in combineShiftToMULH.
11363 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11364 return MULH;
11365
11366 if (SDValue AVG = foldShiftToAvg(N, DL))
11367 return AVG;
11368
11369 return SDValue();
11370}
11371
11372SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11373 EVT VT = N->getValueType(0);
11374 SDValue N0 = N->getOperand(0);
11375 SDValue N1 = N->getOperand(1);
11376 SDValue N2 = N->getOperand(2);
11377 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11378 unsigned BitWidth = VT.getScalarSizeInBits();
11379 SDLoc DL(N);
11380
11381 // fold (fshl/fshr C0, C1, C2) -> C3
11382 if (SDValue C =
11383 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11384 return C;
11385
11386 // fold (fshl N0, N1, 0) -> N0
11387 // fold (fshr N0, N1, 0) -> N1
11389 if (DAG.MaskedValueIsZero(
11390 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11391 return IsFSHL ? N0 : N1;
11392
11393 auto IsUndefOrZero = [](SDValue V) {
11394 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11395 };
11396
11397 // TODO - support non-uniform vector shift amounts.
11398 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11399 EVT ShAmtTy = N2.getValueType();
11400
11401 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11402 if (Cst->getAPIntValue().uge(BitWidth)) {
11403 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11404 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11405 DAG.getConstant(RotAmt, DL, ShAmtTy));
11406 }
11407
11408 unsigned ShAmt = Cst->getZExtValue();
11409 if (ShAmt == 0)
11410 return IsFSHL ? N0 : N1;
11411
11412 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11413 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11414 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11415 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11416 if (IsUndefOrZero(N0))
11417 return DAG.getNode(
11418 ISD::SRL, DL, VT, N1,
11419 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11420 if (IsUndefOrZero(N1))
11421 return DAG.getNode(
11422 ISD::SHL, DL, VT, N0,
11423 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11424
11425 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11426 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11427 // TODO - bigendian support once we have test coverage.
11428 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11429 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11430 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11431 !DAG.getDataLayout().isBigEndian()) {
11432 auto *LHS = dyn_cast<LoadSDNode>(N0);
11433 auto *RHS = dyn_cast<LoadSDNode>(N1);
11434 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11435 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11436 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11438 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11439 SDLoc DL(RHS);
11440 uint64_t PtrOff =
11441 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11442 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11443 unsigned Fast = 0;
11444 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11445 RHS->getAddressSpace(), NewAlign,
11446 RHS->getMemOperand()->getFlags(), &Fast) &&
11447 Fast) {
11448 SDValue NewPtr = DAG.getMemBasePlusOffset(
11449 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11450 AddToWorklist(NewPtr.getNode());
11451 SDValue Load = DAG.getLoad(
11452 VT, DL, RHS->getChain(), NewPtr,
11453 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11454 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11455 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11456 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11457 return Load;
11458 }
11459 }
11460 }
11461 }
11462 }
11463
11464 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11465 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11466 // iff We know the shift amount is in range.
11467 // TODO: when is it worth doing SUB(BW, N2) as well?
11468 if (isPowerOf2_32(BitWidth)) {
11469 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11470 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11471 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11472 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11473 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11474 }
11475
11476 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11477 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11478 // TODO: Investigate flipping this rotate if only one is legal.
11479 // If funnel shift is legal as well we might be better off avoiding
11480 // non-constant (BW - N2).
11481 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11482 if (N0 == N1 && hasOperation(RotOpc, VT))
11483 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11484
11485 // Simplify, based on bits shifted out of N0/N1.
11487 return SDValue(N, 0);
11488
11489 return SDValue();
11490}
11491
11492SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11493 SDValue N0 = N->getOperand(0);
11494 SDValue N1 = N->getOperand(1);
11495 if (SDValue V = DAG.simplifyShift(N0, N1))
11496 return V;
11497
11498 SDLoc DL(N);
11499 EVT VT = N0.getValueType();
11500
11501 // fold (*shlsat c1, c2) -> c1<<c2
11502 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11503 return C;
11504
11505 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11506
11507 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11508 // fold (sshlsat x, c) -> (shl x, c)
11509 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11510 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11511 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11512
11513 // fold (ushlsat x, c) -> (shl x, c)
11514 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11515 N1C->getAPIntValue().ule(
11517 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11518 }
11519
11520 return SDValue();
11521}
11522
11523// Given a ABS node, detect the following patterns:
11524// (ABS (SUB (EXTEND a), (EXTEND b))).
11525// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11526// Generates UABD/SABD instruction.
11527SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11528 EVT SrcVT = N->getValueType(0);
11529
11530 if (N->getOpcode() == ISD::TRUNCATE)
11531 N = N->getOperand(0).getNode();
11532
11533 EVT VT = N->getValueType(0);
11534 SDValue Op0, Op1;
11535
11536 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11537 return SDValue();
11538
11539 SDValue AbsOp0 = N->getOperand(0);
11540 unsigned Opc0 = Op0.getOpcode();
11541
11542 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11543 // fallback to ValueTracking.
11544 if (Opc0 != Op1.getOpcode() ||
11545 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11546 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11547 // fold (abs (sub nsw x, y)) -> abds(x, y)
11548 // Don't fold this for unsupported types as we lose the NSW handling.
11549 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11550 (AbsOp0->getFlags().hasNoSignedWrap() ||
11551 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11552 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11553 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11554 }
11555 // fold (abs (sub x, y)) -> abdu(x, y)
11556 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11557 DAG.SignBitIsZero(Op1)) {
11558 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11559 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11560 }
11561 return SDValue();
11562 }
11563
11564 EVT VT0, VT1;
11565 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11566 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11567 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11568 } else {
11569 VT0 = Op0.getOperand(0).getValueType();
11570 VT1 = Op1.getOperand(0).getValueType();
11571 }
11572 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11573
11574 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11575 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11576 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11577 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11578 (VT1 == MaxVT || Op1->hasOneUse()) &&
11579 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11580 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11581 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11582 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11583 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11584 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11585 }
11586
11587 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11588 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11589 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11590 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11591 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11592 }
11593
11594 return SDValue();
11595}
11596
11597SDValue DAGCombiner::visitABS(SDNode *N) {
11598 SDValue N0 = N->getOperand(0);
11599 EVT VT = N->getValueType(0);
11600 SDLoc DL(N);
11601
11602 // fold (abs c1) -> c2
11603 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11604 return C;
11605 // fold (abs (abs x)) -> (abs x)
11606 if (N0.getOpcode() == ISD::ABS)
11607 return N0;
11608 // fold (abs x) -> x iff not-negative
11609 if (DAG.SignBitIsZero(N0))
11610 return N0;
11611
11612 if (SDValue ABD = foldABSToABD(N, DL))
11613 return ABD;
11614
11615 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11616 // iff zero_extend/truncate are free.
11617 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11618 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11619 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11620 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11621 hasOperation(ISD::ABS, ExtVT)) {
11622 return DAG.getNode(
11623 ISD::ZERO_EXTEND, DL, VT,
11624 DAG.getNode(ISD::ABS, DL, ExtVT,
11625 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11626 }
11627 }
11628
11629 return SDValue();
11630}
11631
11632SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11633 SDValue N0 = N->getOperand(0);
11634 EVT VT = N->getValueType(0);
11635 SDLoc DL(N);
11636
11637 // fold (bswap c1) -> c2
11638 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11639 return C;
11640 // fold (bswap (bswap x)) -> x
11641 if (N0.getOpcode() == ISD::BSWAP)
11642 return N0.getOperand(0);
11643
11644 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11645 // isn't supported, it will be expanded to bswap followed by a manual reversal
11646 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11647 // the two bswaps if the bitreverse gets expanded.
11648 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11649 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11650 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11651 }
11652
11653 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11654 // iff x >= bw/2 (i.e. lower half is known zero)
11655 unsigned BW = VT.getScalarSizeInBits();
11656 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11657 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11658 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11659 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11660 ShAmt->getZExtValue() >= (BW / 2) &&
11661 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11662 TLI.isTruncateFree(VT, HalfVT) &&
11663 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11664 SDValue Res = N0.getOperand(0);
11665 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11666 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11667 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11668 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11669 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11670 return DAG.getZExtOrTrunc(Res, DL, VT);
11671 }
11672 }
11673
11674 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11675 // inverse-shift-of-bswap:
11676 // bswap (X u<< C) --> (bswap X) u>> C
11677 // bswap (X u>> C) --> (bswap X) u<< C
11678 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11679 N0.hasOneUse()) {
11680 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11681 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11682 ShAmt->getZExtValue() % 8 == 0) {
11683 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11684 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11685 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11686 }
11687 }
11688
11689 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11690 return V;
11691
11692 return SDValue();
11693}
11694
11695SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11696 SDValue N0 = N->getOperand(0);
11697 EVT VT = N->getValueType(0);
11698 SDLoc DL(N);
11699
11700 // fold (bitreverse c1) -> c2
11701 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11702 return C;
11703
11704 // fold (bitreverse (bitreverse x)) -> x
11705 if (N0.getOpcode() == ISD::BITREVERSE)
11706 return N0.getOperand(0);
11707
11708 SDValue X, Y;
11709
11710 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11711 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11713 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11714
11715 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11716 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11718 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11719
11720 return SDValue();
11721}
11722
11723SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11724 SDValue N0 = N->getOperand(0);
11725 EVT VT = N->getValueType(0);
11726 SDLoc DL(N);
11727
11728 // fold (ctlz c1) -> c2
11729 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11730 return C;
11731
11732 // If the value is known never to be zero, switch to the undef version.
11733 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11734 if (DAG.isKnownNeverZero(N0))
11735 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11736
11737 return SDValue();
11738}
11739
11740SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11741 SDValue N0 = N->getOperand(0);
11742 EVT VT = N->getValueType(0);
11743 SDLoc DL(N);
11744
11745 // fold (ctlz_zero_undef c1) -> c2
11746 if (SDValue C =
11748 return C;
11749 return SDValue();
11750}
11751
11752SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11753 SDValue N0 = N->getOperand(0);
11754 EVT VT = N->getValueType(0);
11755 SDLoc DL(N);
11756
11757 // fold (cttz c1) -> c2
11758 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11759 return C;
11760
11761 // If the value is known never to be zero, switch to the undef version.
11762 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11763 if (DAG.isKnownNeverZero(N0))
11764 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11765
11766 return SDValue();
11767}
11768
11769SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11770 SDValue N0 = N->getOperand(0);
11771 EVT VT = N->getValueType(0);
11772 SDLoc DL(N);
11773
11774 // fold (cttz_zero_undef c1) -> c2
11775 if (SDValue C =
11777 return C;
11778 return SDValue();
11779}
11780
11781SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11782 SDValue N0 = N->getOperand(0);
11783 EVT VT = N->getValueType(0);
11784 unsigned NumBits = VT.getScalarSizeInBits();
11785 SDLoc DL(N);
11786
11787 // fold (ctpop c1) -> c2
11788 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11789 return C;
11790
11791 // If the source is being shifted, but doesn't affect any active bits,
11792 // then we can call CTPOP on the shift source directly.
11793 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11794 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11795 const APInt &Amt = AmtC->getAPIntValue();
11796 if (Amt.ult(NumBits)) {
11797 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11798 if ((N0.getOpcode() == ISD::SRL &&
11799 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11800 (N0.getOpcode() == ISD::SHL &&
11801 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11802 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11803 }
11804 }
11805 }
11806 }
11807
11808 // If the upper bits are known to be zero, then see if its profitable to
11809 // only count the lower bits.
11810 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11811 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11812 if (hasOperation(ISD::CTPOP, HalfVT) &&
11813 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11814 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11815 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11816 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11817 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11818 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11819 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11820 }
11821 }
11822 }
11823
11824 return SDValue();
11825}
11826
11828 SDValue RHS, const SDNodeFlags Flags,
11829 const TargetLowering &TLI) {
11830 EVT VT = LHS.getValueType();
11831 if (!VT.isFloatingPoint())
11832 return false;
11833
11834 const TargetOptions &Options = DAG.getTarget().Options;
11835
11836 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11838 (Flags.hasNoNaNs() ||
11839 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11840}
11841
11843 SDValue RHS, SDValue True, SDValue False,
11844 ISD::CondCode CC,
11845 const TargetLowering &TLI,
11846 SelectionDAG &DAG) {
11847 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11848 switch (CC) {
11849 case ISD::SETOLT:
11850 case ISD::SETOLE:
11851 case ISD::SETLT:
11852 case ISD::SETLE:
11853 case ISD::SETULT:
11854 case ISD::SETULE: {
11855 // Since it's known never nan to get here already, either fminnum or
11856 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11857 // expanded in terms of it.
11858 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11859 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11860 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11861
11862 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11863 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11864 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11865 return SDValue();
11866 }
11867 case ISD::SETOGT:
11868 case ISD::SETOGE:
11869 case ISD::SETGT:
11870 case ISD::SETGE:
11871 case ISD::SETUGT:
11872 case ISD::SETUGE: {
11873 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11874 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11875 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11876
11877 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11878 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11879 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11880 return SDValue();
11881 }
11882 default:
11883 return SDValue();
11884 }
11885}
11886
11887// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11888SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
11889 const unsigned Opcode = N->getOpcode();
11890 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11891 return SDValue();
11892
11893 EVT VT = N->getValueType(0);
11894 bool IsUnsigned = Opcode == ISD::SRL;
11895
11896 // Captured values.
11897 SDValue A, B, Add;
11898
11899 // Match floor average as it is common to both floor/ceil avgs.
11900 if (sd_match(N, m_BinOp(Opcode,
11902 m_One()))) {
11903 // Decide whether signed or unsigned.
11904 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
11905 if (!hasOperation(FloorISD, VT))
11906 return SDValue();
11907
11908 // Can't optimize adds that may wrap.
11909 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
11910 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
11911 return SDValue();
11912
11913 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
11914 }
11915
11916 return SDValue();
11917}
11918
11919SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
11920 unsigned Opc = N->getOpcode();
11921 SDValue X, Y, Z;
11922 if (sd_match(
11924 return DAG.getNode(Opc, DL, VT, X,
11925 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
11926
11928 m_Value(Z)))))
11929 return DAG.getNode(Opc, DL, VT, X,
11930 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
11931
11932 return SDValue();
11933}
11934
11935/// Generate Min/Max node
11936SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11937 SDValue RHS, SDValue True,
11938 SDValue False, ISD::CondCode CC) {
11939 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11940 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11941
11942 // If we can't directly match this, try to see if we can pull an fneg out of
11943 // the select.
11945 True, DAG, LegalOperations, ForCodeSize);
11946 if (!NegTrue)
11947 return SDValue();
11948
11949 HandleSDNode NegTrueHandle(NegTrue);
11950
11951 // Try to unfold an fneg from the select if we are comparing the negated
11952 // constant.
11953 //
11954 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11955 //
11956 // TODO: Handle fabs
11957 if (LHS == NegTrue) {
11958 // If we can't directly match this, try to see if we can pull an fneg out of
11959 // the select.
11961 RHS, DAG, LegalOperations, ForCodeSize);
11962 if (NegRHS) {
11963 HandleSDNode NegRHSHandle(NegRHS);
11964 if (NegRHS == False) {
11965 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11966 False, CC, TLI, DAG);
11967 if (Combined)
11968 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11969 }
11970 }
11971 }
11972
11973 return SDValue();
11974}
11975
11976/// If a (v)select has a condition value that is a sign-bit test, try to smear
11977/// the condition operand sign-bit across the value width and use it as a mask.
11979 SelectionDAG &DAG) {
11980 SDValue Cond = N->getOperand(0);
11981 SDValue C1 = N->getOperand(1);
11982 SDValue C2 = N->getOperand(2);
11984 return SDValue();
11985
11986 EVT VT = N->getValueType(0);
11987 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11988 VT != Cond.getOperand(0).getValueType())
11989 return SDValue();
11990
11991 // The inverted-condition + commuted-select variants of these patterns are
11992 // canonicalized to these forms in IR.
11993 SDValue X = Cond.getOperand(0);
11994 SDValue CondC = Cond.getOperand(1);
11995 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11996 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11998 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11999 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12000 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12001 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
12002 }
12003 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
12004 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
12005 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12006 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12007 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
12008 }
12009 return SDValue();
12010}
12011
12013 const TargetLowering &TLI) {
12014 if (!TLI.convertSelectOfConstantsToMath(VT))
12015 return false;
12016
12017 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
12018 return true;
12020 return true;
12021
12022 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12023 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
12024 return true;
12025 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
12026 return true;
12027
12028 return false;
12029}
12030
12031SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
12032 SDValue Cond = N->getOperand(0);
12033 SDValue N1 = N->getOperand(1);
12034 SDValue N2 = N->getOperand(2);
12035 EVT VT = N->getValueType(0);
12036 EVT CondVT = Cond.getValueType();
12037 SDLoc DL(N);
12038
12039 if (!VT.isInteger())
12040 return SDValue();
12041
12042 auto *C1 = dyn_cast<ConstantSDNode>(N1);
12043 auto *C2 = dyn_cast<ConstantSDNode>(N2);
12044 if (!C1 || !C2)
12045 return SDValue();
12046
12047 if (CondVT != MVT::i1 || LegalOperations) {
12048 // fold (select Cond, 0, 1) -> (xor Cond, 1)
12049 // We can't do this reliably if integer based booleans have different contents
12050 // to floating point based booleans. This is because we can't tell whether we
12051 // have an integer-based boolean or a floating-point-based boolean unless we
12052 // can find the SETCC that produced it and inspect its operands. This is
12053 // fairly easy if C is the SETCC node, but it can potentially be
12054 // undiscoverable (or not reasonably discoverable). For example, it could be
12055 // in another basic block or it could require searching a complicated
12056 // expression.
12057 if (CondVT.isInteger() &&
12058 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
12060 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
12062 C1->isZero() && C2->isOne()) {
12063 SDValue NotCond =
12064 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
12065 if (VT.bitsEq(CondVT))
12066 return NotCond;
12067 return DAG.getZExtOrTrunc(NotCond, DL, VT);
12068 }
12069
12070 return SDValue();
12071 }
12072
12073 // Only do this before legalization to avoid conflicting with target-specific
12074 // transforms in the other direction (create a select from a zext/sext). There
12075 // is also a target-independent combine here in DAGCombiner in the other
12076 // direction for (select Cond, -1, 0) when the condition is not i1.
12077 assert(CondVT == MVT::i1 && !LegalOperations);
12078
12079 // select Cond, 1, 0 --> zext (Cond)
12080 if (C1->isOne() && C2->isZero())
12081 return DAG.getZExtOrTrunc(Cond, DL, VT);
12082
12083 // select Cond, -1, 0 --> sext (Cond)
12084 if (C1->isAllOnes() && C2->isZero())
12085 return DAG.getSExtOrTrunc(Cond, DL, VT);
12086
12087 // select Cond, 0, 1 --> zext (!Cond)
12088 if (C1->isZero() && C2->isOne()) {
12089 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12090 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12091 return NotCond;
12092 }
12093
12094 // select Cond, 0, -1 --> sext (!Cond)
12095 if (C1->isZero() && C2->isAllOnes()) {
12096 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12097 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12098 return NotCond;
12099 }
12100
12101 // Use a target hook because some targets may prefer to transform in the
12102 // other direction.
12104 return SDValue();
12105
12106 // For any constants that differ by 1, we can transform the select into
12107 // an extend and add.
12108 const APInt &C1Val = C1->getAPIntValue();
12109 const APInt &C2Val = C2->getAPIntValue();
12110
12111 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12112 if (C1Val - 1 == C2Val) {
12113 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12114 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12115 }
12116
12117 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12118 if (C1Val + 1 == C2Val) {
12119 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12120 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12121 }
12122
12123 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12124 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12125 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12126 SDValue ShAmtC =
12127 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12128 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12129 }
12130
12131 // select Cond, -1, C --> or (sext Cond), C
12132 if (C1->isAllOnes()) {
12133 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12134 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12135 }
12136
12137 // select Cond, C, -1 --> or (sext (not Cond)), C
12138 if (C2->isAllOnes()) {
12139 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12140 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12141 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12142 }
12143
12145 return V;
12146
12147 return SDValue();
12148}
12149
12150template <class MatchContextClass>
12152 SelectionDAG &DAG) {
12153 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12154 N->getOpcode() == ISD::VP_SELECT) &&
12155 "Expected a (v)(vp.)select");
12156 SDValue Cond = N->getOperand(0);
12157 SDValue T = N->getOperand(1), F = N->getOperand(2);
12158 EVT VT = N->getValueType(0);
12159 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12160 MatchContextClass matcher(DAG, TLI, N);
12161
12162 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12163 return SDValue();
12164
12165 // select Cond, Cond, F --> or Cond, freeze(F)
12166 // select Cond, 1, F --> or Cond, freeze(F)
12167 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12168 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12169
12170 // select Cond, T, Cond --> and Cond, freeze(T)
12171 // select Cond, T, 0 --> and Cond, freeze(T)
12172 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12173 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12174
12175 // select Cond, T, 1 --> or (not Cond), freeze(T)
12176 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12177 SDValue NotCond =
12178 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12179 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12180 }
12181
12182 // select Cond, 0, F --> and (not Cond), freeze(F)
12183 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12184 SDValue NotCond =
12185 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12186 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12187 }
12188
12189 return SDValue();
12190}
12191
12193 SDValue N0 = N->getOperand(0);
12194 SDValue N1 = N->getOperand(1);
12195 SDValue N2 = N->getOperand(2);
12196 EVT VT = N->getValueType(0);
12197 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12198
12199 SDValue Cond0, Cond1;
12200 ISD::CondCode CC;
12201 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12202 m_CondCode(CC)))) ||
12203 VT != Cond0.getValueType())
12204 return SDValue();
12205
12206 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12207 // compare is inverted from that pattern ("Cond0 s> -1").
12208 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12209 ; // This is the pattern we are looking for.
12210 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12211 std::swap(N1, N2);
12212 else
12213 return SDValue();
12214
12215 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12216 if (isNullOrNullSplat(N2)) {
12217 SDLoc DL(N);
12218 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12219 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12220 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12221 }
12222
12223 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12224 if (isAllOnesOrAllOnesSplat(N1)) {
12225 SDLoc DL(N);
12226 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12227 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12228 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12229 }
12230
12231 // If we have to invert the sign bit mask, only do that transform if the
12232 // target has a bitwise 'and not' instruction (the invert is free).
12233 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12234 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12235 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12236 SDLoc DL(N);
12237 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12238 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12239 SDValue Not = DAG.getNOT(DL, Sra, VT);
12240 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12241 }
12242
12243 // TODO: There's another pattern in this family, but it may require
12244 // implementing hasOrNot() to check for profitability:
12245 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12246
12247 return SDValue();
12248}
12249
12250// Match SELECTs with absolute difference patterns.
12251// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12252// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12253// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12254// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12255SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12256 SDValue False, ISD::CondCode CC,
12257 const SDLoc &DL) {
12258 bool IsSigned = isSignedIntSetCC(CC);
12259 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12260 EVT VT = LHS.getValueType();
12261
12262 if (LegalOperations && !hasOperation(ABDOpc, VT))
12263 return SDValue();
12264
12265 switch (CC) {
12266 case ISD::SETGT:
12267 case ISD::SETGE:
12268 case ISD::SETUGT:
12269 case ISD::SETUGE:
12270 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12272 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12273 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12274 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12275 hasOperation(ABDOpc, VT))
12276 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12277 break;
12278 case ISD::SETLT:
12279 case ISD::SETLE:
12280 case ISD::SETULT:
12281 case ISD::SETULE:
12282 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12284 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12285 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12286 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12287 hasOperation(ABDOpc, VT))
12288 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12289 break;
12290 default:
12291 break;
12292 }
12293
12294 return SDValue();
12295}
12296
12297// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12298// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12299SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12300 SDValue False, ISD::CondCode CC,
12301 const SDLoc &DL) {
12302 APInt C;
12303 EVT VT = True.getValueType();
12304 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12305 if (CC == ISD::SETUGT && LHS == False &&
12306 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12307 SDValue AddC = DAG.getConstant(~C, DL, VT);
12308 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12309 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12310 }
12311 if (CC == ISD::SETULT && LHS == True &&
12312 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12313 SDValue AddC = DAG.getConstant(-C, DL, VT);
12314 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12315 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12316 }
12317 }
12318 return SDValue();
12319}
12320
12321SDValue DAGCombiner::visitSELECT(SDNode *N) {
12322 SDValue N0 = N->getOperand(0);
12323 SDValue N1 = N->getOperand(1);
12324 SDValue N2 = N->getOperand(2);
12325 EVT VT = N->getValueType(0);
12326 EVT VT0 = N0.getValueType();
12327 SDLoc DL(N);
12328 SDNodeFlags Flags = N->getFlags();
12329
12330 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12331 return V;
12332
12334 return V;
12335
12336 // select (not Cond), N1, N2 -> select Cond, N2, N1
12337 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12338 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12339
12340 if (SDValue V = foldSelectOfConstants(N))
12341 return V;
12342
12343 // If we can fold this based on the true/false value, do so.
12344 if (SimplifySelectOps(N, N1, N2))
12345 return SDValue(N, 0); // Don't revisit N.
12346
12347 if (VT0 == MVT::i1) {
12348 // The code in this block deals with the following 2 equivalences:
12349 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12350 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12351 // The target can specify its preferred form with the
12352 // shouldNormalizeToSelectSequence() callback. However we always transform
12353 // to the right anyway if we find the inner select exists in the DAG anyway
12354 // and we always transform to the left side if we know that we can further
12355 // optimize the combination of the conditions.
12356 bool normalizeToSequence =
12358 // select (and Cond0, Cond1), X, Y
12359 // -> select Cond0, (select Cond1, X, Y), Y
12360 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12361 SDValue Cond0 = N0->getOperand(0);
12362 SDValue Cond1 = N0->getOperand(1);
12363 SDValue InnerSelect =
12364 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12365 if (normalizeToSequence || !InnerSelect.use_empty())
12366 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12367 InnerSelect, N2, Flags);
12368 // Cleanup on failure.
12369 if (InnerSelect.use_empty())
12370 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12371 }
12372 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12373 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12374 SDValue Cond0 = N0->getOperand(0);
12375 SDValue Cond1 = N0->getOperand(1);
12376 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12377 Cond1, N1, N2, Flags);
12378 if (normalizeToSequence || !InnerSelect.use_empty())
12379 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12380 InnerSelect, Flags);
12381 // Cleanup on failure.
12382 if (InnerSelect.use_empty())
12383 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12384 }
12385
12386 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12387 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12388 SDValue N1_0 = N1->getOperand(0);
12389 SDValue N1_1 = N1->getOperand(1);
12390 SDValue N1_2 = N1->getOperand(2);
12391 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12392 // Create the actual and node if we can generate good code for it.
12393 if (!normalizeToSequence) {
12394 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12395 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12396 N2, Flags);
12397 }
12398 // Otherwise see if we can optimize the "and" to a better pattern.
12399 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12400 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12401 N2, Flags);
12402 }
12403 }
12404 }
12405 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12406 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12407 SDValue N2_0 = N2->getOperand(0);
12408 SDValue N2_1 = N2->getOperand(1);
12409 SDValue N2_2 = N2->getOperand(2);
12410 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12411 // Create the actual or node if we can generate good code for it.
12412 if (!normalizeToSequence) {
12413 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12414 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12415 N2_2, Flags);
12416 }
12417 // Otherwise see if we can optimize to a better pattern.
12418 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12419 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12420 N2_2, Flags);
12421 }
12422 }
12423
12424 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12425 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12426 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12427 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12428 N2.getOperand(1) == N1.getOperand(0) &&
12429 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12430 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12431
12432 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12433 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12434 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12435 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12436 N2.getOperand(1) == N1.getOperand(0) &&
12437 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12438 return DAG.getNegative(
12439 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12440 DL, VT);
12441 }
12442
12443 // Fold selects based on a setcc into other things, such as min/max/abs.
12444 if (N0.getOpcode() == ISD::SETCC) {
12445 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12447
12448 // select (fcmp lt x, y), x, y -> fminnum x, y
12449 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12450 //
12451 // This is OK if we don't care what happens if either operand is a NaN.
12452 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12453 if (SDValue FMinMax =
12454 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12455 return FMinMax;
12456
12457 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12458 // This is conservatively limited to pre-legal-operations to give targets
12459 // a chance to reverse the transform if they want to do that. Also, it is
12460 // unlikely that the pattern would be formed late, so it's probably not
12461 // worth going through the other checks.
12462 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12463 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12464 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12465 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12466 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12467 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12468 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12469 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12470 //
12471 // The IR equivalent of this transform would have this form:
12472 // %a = add %x, C
12473 // %c = icmp ugt %x, ~C
12474 // %r = select %c, -1, %a
12475 // =>
12476 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12477 // %u0 = extractvalue %u, 0
12478 // %u1 = extractvalue %u, 1
12479 // %r = select %u1, -1, %u0
12480 SDVTList VTs = DAG.getVTList(VT, VT0);
12481 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12482 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12483 }
12484 }
12485
12486 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12487 (!LegalOperations &&
12489 // Any flags available in a select/setcc fold will be on the setcc as they
12490 // migrated from fcmp
12491 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12492 N0.getOperand(2), N0->getFlags());
12493 }
12494
12495 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12496 return ABD;
12497
12498 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12499 return NewSel;
12500
12501 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12502 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12503 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12504 return UMin;
12505 }
12506
12507 if (!VT.isVector())
12508 if (SDValue BinOp = foldSelectOfBinops(N))
12509 return BinOp;
12510
12511 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12512 return R;
12513
12514 return SDValue();
12515}
12516
12517// This function assumes all the vselect's arguments are CONCAT_VECTOR
12518// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12520 SDLoc DL(N);
12521 SDValue Cond = N->getOperand(0);
12522 SDValue LHS = N->getOperand(1);
12523 SDValue RHS = N->getOperand(2);
12524 EVT VT = N->getValueType(0);
12525 int NumElems = VT.getVectorNumElements();
12526 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12527 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12528 Cond.getOpcode() == ISD::BUILD_VECTOR);
12529
12530 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12531 // binary ones here.
12532 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12533 return SDValue();
12534
12535 // We're sure we have an even number of elements due to the
12536 // concat_vectors we have as arguments to vselect.
12537 // Skip BV elements until we find one that's not an UNDEF
12538 // After we find an UNDEF element, keep looping until we get to half the
12539 // length of the BV and see if all the non-undef nodes are the same.
12540 ConstantSDNode *BottomHalf = nullptr;
12541 for (int i = 0; i < NumElems / 2; ++i) {
12542 if (Cond->getOperand(i)->isUndef())
12543 continue;
12544
12545 if (BottomHalf == nullptr)
12546 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12547 else if (Cond->getOperand(i).getNode() != BottomHalf)
12548 return SDValue();
12549 }
12550
12551 // Do the same for the second half of the BuildVector
12552 ConstantSDNode *TopHalf = nullptr;
12553 for (int i = NumElems / 2; i < NumElems; ++i) {
12554 if (Cond->getOperand(i)->isUndef())
12555 continue;
12556
12557 if (TopHalf == nullptr)
12558 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12559 else if (Cond->getOperand(i).getNode() != TopHalf)
12560 return SDValue();
12561 }
12562
12563 assert(TopHalf && BottomHalf &&
12564 "One half of the selector was all UNDEFs and the other was all the "
12565 "same value. This should have been addressed before this function.");
12566 return DAG.getNode(
12568 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12569 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12570}
12571
12572bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12573 SelectionDAG &DAG, const SDLoc &DL) {
12574
12575 // Only perform the transformation when existing operands can be reused.
12576 if (IndexIsScaled)
12577 return false;
12578
12579 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12580 return false;
12581
12582 EVT VT = BasePtr.getValueType();
12583
12584 if (SDValue SplatVal = DAG.getSplatValue(Index);
12585 SplatVal && !isNullConstant(SplatVal) &&
12586 SplatVal.getValueType() == VT) {
12587 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12588 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12589 return true;
12590 }
12591
12592 if (Index.getOpcode() != ISD::ADD)
12593 return false;
12594
12595 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12596 SplatVal && SplatVal.getValueType() == VT) {
12597 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12598 Index = Index.getOperand(1);
12599 return true;
12600 }
12601 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12602 SplatVal && SplatVal.getValueType() == VT) {
12603 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12604 Index = Index.getOperand(0);
12605 return true;
12606 }
12607 return false;
12608}
12609
12610// Fold sext/zext of index into index type.
12611bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12612 SelectionDAG &DAG) {
12613 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12614
12615 // It's always safe to look through zero extends.
12616 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12617 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12618 IndexType = ISD::UNSIGNED_SCALED;
12619 Index = Index.getOperand(0);
12620 return true;
12621 }
12622 if (ISD::isIndexTypeSigned(IndexType)) {
12623 IndexType = ISD::UNSIGNED_SCALED;
12624 return true;
12625 }
12626 }
12627
12628 // It's only safe to look through sign extends when Index is signed.
12629 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12630 ISD::isIndexTypeSigned(IndexType) &&
12631 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12632 Index = Index.getOperand(0);
12633 return true;
12634 }
12635
12636 return false;
12637}
12638
12639SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12640 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12641 SDValue Mask = MSC->getMask();
12642 SDValue Chain = MSC->getChain();
12643 SDValue Index = MSC->getIndex();
12644 SDValue Scale = MSC->getScale();
12645 SDValue StoreVal = MSC->getValue();
12646 SDValue BasePtr = MSC->getBasePtr();
12647 SDValue VL = MSC->getVectorLength();
12648 ISD::MemIndexType IndexType = MSC->getIndexType();
12649 SDLoc DL(N);
12650
12651 // Zap scatters with a zero mask.
12653 return Chain;
12654
12655 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12656 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12657 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12658 DL, Ops, MSC->getMemOperand(), IndexType);
12659 }
12660
12661 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12662 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12663 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12664 DL, Ops, MSC->getMemOperand(), IndexType);
12665 }
12666
12667 return SDValue();
12668}
12669
12670SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12671 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12672 SDValue Mask = MSC->getMask();
12673 SDValue Chain = MSC->getChain();
12674 SDValue Index = MSC->getIndex();
12675 SDValue Scale = MSC->getScale();
12676 SDValue StoreVal = MSC->getValue();
12677 SDValue BasePtr = MSC->getBasePtr();
12678 ISD::MemIndexType IndexType = MSC->getIndexType();
12679 SDLoc DL(N);
12680
12681 // Zap scatters with a zero mask.
12683 return Chain;
12684
12685 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12686 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12687 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12688 DL, Ops, MSC->getMemOperand(), IndexType,
12689 MSC->isTruncatingStore());
12690 }
12691
12692 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12693 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12694 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12695 DL, Ops, MSC->getMemOperand(), IndexType,
12696 MSC->isTruncatingStore());
12697 }
12698
12699 return SDValue();
12700}
12701
12702SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12703 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12704 SDValue Mask = MST->getMask();
12705 SDValue Chain = MST->getChain();
12706 SDValue Value = MST->getValue();
12707 SDValue Ptr = MST->getBasePtr();
12708
12709 // Zap masked stores with a zero mask.
12711 return Chain;
12712
12713 // Remove a masked store if base pointers and masks are equal.
12714 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12715 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12716 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12717 !MST->getBasePtr().isUndef() &&
12718 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12719 MST1->getMemoryVT().getStoreSize()) ||
12721 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12722 MST->getMemoryVT().getStoreSize())) {
12723 CombineTo(MST1, MST1->getChain());
12724 if (N->getOpcode() != ISD::DELETED_NODE)
12725 AddToWorklist(N);
12726 return SDValue(N, 0);
12727 }
12728 }
12729
12730 // If this is a masked load with an all ones mask, we can use a unmasked load.
12731 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12732 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12733 !MST->isCompressingStore() && !MST->isTruncatingStore())
12734 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12735 MST->getBasePtr(), MST->getPointerInfo(),
12736 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12737 MST->getAAInfo());
12738
12739 // Try transforming N to an indexed store.
12740 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12741 return SDValue(N, 0);
12742
12743 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12744 Value.getValueType().isInteger() &&
12746 !cast<ConstantSDNode>(Value)->isOpaque())) {
12747 APInt TruncDemandedBits =
12748 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12750
12751 // See if we can simplify the operation with
12752 // SimplifyDemandedBits, which only works if the value has a single use.
12753 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12754 // Re-visit the store if anything changed and the store hasn't been merged
12755 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12756 // node back to the worklist if necessary, but we also need to re-visit
12757 // the Store node itself.
12758 if (N->getOpcode() != ISD::DELETED_NODE)
12759 AddToWorklist(N);
12760 return SDValue(N, 0);
12761 }
12762 }
12763
12764 // If this is a TRUNC followed by a masked store, fold this into a masked
12765 // truncating store. We can do this even if this is already a masked
12766 // truncstore.
12767 // TODO: Try combine to masked compress store if possiable.
12768 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12769 MST->isUnindexed() && !MST->isCompressingStore() &&
12770 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12771 MST->getMemoryVT(), LegalOperations)) {
12772 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12773 Value.getOperand(0).getValueType());
12774 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12775 MST->getOffset(), Mask, MST->getMemoryVT(),
12776 MST->getMemOperand(), MST->getAddressingMode(),
12777 /*IsTruncating=*/true);
12778 }
12779
12780 return SDValue();
12781}
12782
12783SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12784 auto *SST = cast<VPStridedStoreSDNode>(N);
12785 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12786 // Combine strided stores with unit-stride to a regular VP store.
12787 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12788 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12789 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12790 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12791 SST->getVectorLength(), SST->getMemoryVT(),
12792 SST->getMemOperand(), SST->getAddressingMode(),
12793 SST->isTruncatingStore(), SST->isCompressingStore());
12794 }
12795 return SDValue();
12796}
12797
12798SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12799 SDLoc DL(N);
12800 SDValue Vec = N->getOperand(0);
12801 SDValue Mask = N->getOperand(1);
12802 SDValue Passthru = N->getOperand(2);
12803 EVT VecVT = Vec.getValueType();
12804
12805 bool HasPassthru = !Passthru.isUndef();
12806
12807 APInt SplatVal;
12808 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12809 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12810
12811 if (Vec.isUndef() || Mask.isUndef())
12812 return Passthru;
12813
12814 // No need for potentially expensive compress if the mask is constant.
12817 EVT ScalarVT = VecVT.getVectorElementType();
12818 unsigned NumSelected = 0;
12819 unsigned NumElmts = VecVT.getVectorNumElements();
12820 for (unsigned I = 0; I < NumElmts; ++I) {
12821 SDValue MaskI = Mask.getOperand(I);
12822 // We treat undef mask entries as "false".
12823 if (MaskI.isUndef())
12824 continue;
12825
12826 if (TLI.isConstTrueVal(MaskI)) {
12827 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12828 DAG.getVectorIdxConstant(I, DL));
12829 Ops.push_back(VecI);
12830 NumSelected++;
12831 }
12832 }
12833 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12834 SDValue Val =
12835 HasPassthru
12836 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12837 DAG.getVectorIdxConstant(Rest, DL))
12838 : DAG.getUNDEF(ScalarVT);
12839 Ops.push_back(Val);
12840 }
12841 return DAG.getBuildVector(VecVT, DL, Ops);
12842 }
12843
12844 return SDValue();
12845}
12846
12847SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12848 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12849 SDValue Mask = MGT->getMask();
12850 SDValue Chain = MGT->getChain();
12851 SDValue Index = MGT->getIndex();
12852 SDValue Scale = MGT->getScale();
12853 SDValue BasePtr = MGT->getBasePtr();
12854 SDValue VL = MGT->getVectorLength();
12855 ISD::MemIndexType IndexType = MGT->getIndexType();
12856 SDLoc DL(N);
12857
12858 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12859 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12860 return DAG.getGatherVP(
12861 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12862 Ops, MGT->getMemOperand(), IndexType);
12863 }
12864
12865 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12866 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12867 return DAG.getGatherVP(
12868 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12869 Ops, MGT->getMemOperand(), IndexType);
12870 }
12871
12872 return SDValue();
12873}
12874
12875SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12876 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12877 SDValue Mask = MGT->getMask();
12878 SDValue Chain = MGT->getChain();
12879 SDValue Index = MGT->getIndex();
12880 SDValue Scale = MGT->getScale();
12881 SDValue PassThru = MGT->getPassThru();
12882 SDValue BasePtr = MGT->getBasePtr();
12883 ISD::MemIndexType IndexType = MGT->getIndexType();
12884 SDLoc DL(N);
12885
12886 // Zap gathers with a zero mask.
12888 return CombineTo(N, PassThru, MGT->getChain());
12889
12890 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12891 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12892 return DAG.getMaskedGather(
12893 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12894 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12895 }
12896
12897 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12898 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12899 return DAG.getMaskedGather(
12900 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12901 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12902 }
12903
12904 return SDValue();
12905}
12906
12907SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12908 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12909 SDValue Mask = MLD->getMask();
12910
12911 // Zap masked loads with a zero mask.
12913 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12914
12915 // If this is a masked load with an all ones mask, we can use a unmasked load.
12916 // FIXME: Can we do this for indexed, expanding, or extending loads?
12917 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12918 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12919 SDValue NewLd = DAG.getLoad(
12920 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12921 MLD->getPointerInfo(), MLD->getBaseAlign(),
12922 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12923 return CombineTo(N, NewLd, NewLd.getValue(1));
12924 }
12925
12926 // Try transforming N to an indexed load.
12927 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12928 return SDValue(N, 0);
12929
12930 return SDValue();
12931}
12932
12933SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12934 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12935 SDValue Chain = HG->getChain();
12936 SDValue Inc = HG->getInc();
12937 SDValue Mask = HG->getMask();
12938 SDValue BasePtr = HG->getBasePtr();
12939 SDValue Index = HG->getIndex();
12940 SDLoc DL(HG);
12941
12942 EVT MemVT = HG->getMemoryVT();
12943 EVT DataVT = Index.getValueType();
12944 MachineMemOperand *MMO = HG->getMemOperand();
12945 ISD::MemIndexType IndexType = HG->getIndexType();
12946
12948 return Chain;
12949
12950 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
12951 refineIndexType(Index, IndexType, DataVT, DAG)) {
12952 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12953 HG->getScale(), HG->getIntID()};
12954 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12955 MMO, IndexType);
12956 }
12957
12958 return SDValue();
12959}
12960
12961SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
12962 if (SDValue Res = foldPartialReduceMLAMulOp(N))
12963 return Res;
12964 if (SDValue Res = foldPartialReduceAdd(N))
12965 return Res;
12966 return SDValue();
12967}
12968
12969// partial_reduce_*mla(acc, mul(ext(a), ext(b)), splat(1))
12970// -> partial_reduce_*mla(acc, a, b)
12971//
12972// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12973// -> partial_reduce_*mla(acc, x, C)
12974SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
12975 SDLoc DL(N);
12976 auto *Context = DAG.getContext();
12977 SDValue Acc = N->getOperand(0);
12978 SDValue Op1 = N->getOperand(1);
12979 SDValue Op2 = N->getOperand(2);
12980
12981 APInt C;
12982 if (Op1->getOpcode() != ISD::MUL ||
12983 !ISD::isConstantSplatVector(Op2.getNode(), C) || !C.isOne())
12984 return SDValue();
12985
12986 SDValue LHS = Op1->getOperand(0);
12987 SDValue RHS = Op1->getOperand(1);
12988 unsigned LHSOpcode = LHS->getOpcode();
12989 if (!ISD::isExtOpcode(LHSOpcode))
12990 return SDValue();
12991
12992 SDValue LHSExtOp = LHS->getOperand(0);
12993 EVT LHSExtOpVT = LHSExtOp.getValueType();
12994
12995 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
12996 // -> partial_reduce_*mla(acc, x, C)
12997 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
12998 // TODO: Make use of partial_reduce_sumla here
12999 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
13000 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
13001 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
13002 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
13003 return SDValue();
13004
13005 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
13006 ? ISD::PARTIAL_REDUCE_SMLA
13007 : ISD::PARTIAL_REDUCE_UMLA;
13008
13009 // Only perform these combines if the target supports folding
13010 // the extends into the operation.
13012 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13013 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13014 return SDValue();
13015
13016 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp,
13017 DAG.getConstant(CTrunc, DL, LHSExtOpVT));
13018 }
13019
13020 unsigned RHSOpcode = RHS->getOpcode();
13021 if (!ISD::isExtOpcode(RHSOpcode))
13022 return SDValue();
13023
13024 SDValue RHSExtOp = RHS->getOperand(0);
13025 if (LHSExtOpVT != RHSExtOp.getValueType())
13026 return SDValue();
13027
13028 unsigned NewOpc;
13029 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
13030 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
13031 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13032 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
13033 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13034 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13035 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
13036 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13037 std::swap(LHSExtOp, RHSExtOp);
13038 } else
13039 return SDValue();
13040 // For a 2-stage extend the signedness of both of the extends must match
13041 // If the mul has the same type, there is no outer extend, and thus we
13042 // can simply use the inner extends to pick the result node.
13043 // TODO: extend to handle nonneg zext as sext
13044 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13045 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13046 NewOpc != N->getOpcode())
13047 return SDValue();
13048
13049 // Only perform these combines if the target supports folding
13050 // the extends into the operation.
13052 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13053 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13054 return SDValue();
13055
13056 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
13057}
13058
13059// partial.reduce.umla(acc, zext(op), splat(1))
13060// -> partial.reduce.umla(acc, op, splat(trunc(1)))
13061// partial.reduce.smla(acc, sext(op), splat(1))
13062// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13063// partial.reduce.sumla(acc, sext(op), splat(1))
13064// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13065SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13066 SDLoc DL(N);
13067 SDValue Acc = N->getOperand(0);
13068 SDValue Op1 = N->getOperand(1);
13069 SDValue Op2 = N->getOperand(2);
13070
13071 APInt ConstantOne;
13072 if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
13073 !ConstantOne.isOne())
13074 return SDValue();
13075
13076 unsigned Op1Opcode = Op1.getOpcode();
13077 if (!ISD::isExtOpcode(Op1Opcode))
13078 return SDValue();
13079
13080 bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
13081 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13082 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13083 if (Op1IsSigned != NodeIsSigned &&
13084 Op1.getValueType().getVectorElementType() != AccElemVT)
13085 return SDValue();
13086
13087 unsigned NewOpcode =
13088 Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
13089
13090 SDValue UnextOp1 = Op1.getOperand(0);
13091 EVT UnextOp1VT = UnextOp1.getValueType();
13092 auto *Context = DAG.getContext();
13094 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13095 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13096 return SDValue();
13097
13098 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13099 DAG.getConstant(1, DL, UnextOp1VT));
13100}
13101
13102SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13103 auto *SLD = cast<VPStridedLoadSDNode>(N);
13104 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13105 // Combine strided loads with unit-stride to a regular VP load.
13106 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13107 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13108 SDValue NewLd = DAG.getLoadVP(
13109 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13110 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13111 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13112 SLD->getMemOperand(), SLD->isExpandingLoad());
13113 return CombineTo(N, NewLd, NewLd.getValue(1));
13114 }
13115 return SDValue();
13116}
13117
13118/// A vector select of 2 constant vectors can be simplified to math/logic to
13119/// avoid a variable select instruction and possibly avoid constant loads.
13120SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13121 SDValue Cond = N->getOperand(0);
13122 SDValue N1 = N->getOperand(1);
13123 SDValue N2 = N->getOperand(2);
13124 EVT VT = N->getValueType(0);
13125 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13129 return SDValue();
13130
13131 // Check if we can use the condition value to increment/decrement a single
13132 // constant value. This simplifies a select to an add and removes a constant
13133 // load/materialization from the general case.
13134 bool AllAddOne = true;
13135 bool AllSubOne = true;
13136 unsigned Elts = VT.getVectorNumElements();
13137 for (unsigned i = 0; i != Elts; ++i) {
13138 SDValue N1Elt = N1.getOperand(i);
13139 SDValue N2Elt = N2.getOperand(i);
13140 if (N1Elt.isUndef())
13141 continue;
13142 // N2 should not contain undef values since it will be reused in the fold.
13143 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13144 AllAddOne = false;
13145 AllSubOne = false;
13146 break;
13147 }
13148
13149 const APInt &C1 = N1Elt->getAsAPIntVal();
13150 const APInt &C2 = N2Elt->getAsAPIntVal();
13151 if (C1 != C2 + 1)
13152 AllAddOne = false;
13153 if (C1 != C2 - 1)
13154 AllSubOne = false;
13155 }
13156
13157 // Further simplifications for the extra-special cases where the constants are
13158 // all 0 or all -1 should be implemented as folds of these patterns.
13159 SDLoc DL(N);
13160 if (AllAddOne || AllSubOne) {
13161 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13162 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13163 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13164 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13165 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13166 }
13167
13168 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13169 APInt Pow2C;
13170 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13171 isNullOrNullSplat(N2)) {
13172 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13173 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13174 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13175 }
13176
13178 return V;
13179
13180 // The general case for select-of-constants:
13181 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13182 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13183 // leave that to a machine-specific pass.
13184 return SDValue();
13185}
13186
13187SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13188 SDValue N0 = N->getOperand(0);
13189 SDValue N1 = N->getOperand(1);
13190 SDValue N2 = N->getOperand(2);
13191 SDLoc DL(N);
13192
13193 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13194 return V;
13195
13197 return V;
13198
13199 return SDValue();
13200}
13201
13203 SDValue FVal,
13204 const TargetLowering &TLI,
13205 SelectionDAG &DAG,
13206 const SDLoc &DL) {
13207 EVT VT = TVal.getValueType();
13208 if (!TLI.isTypeLegal(VT))
13209 return SDValue();
13210
13211 EVT CondVT = Cond.getValueType();
13212 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13213
13214 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13215 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13216 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13217 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13218
13219 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13220 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13221 return SDValue();
13222
13223 // select Cond, 0, 0 → 0
13224 if (IsTAllZero && IsFAllZero) {
13225 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13226 : DAG.getConstant(0, DL, VT);
13227 }
13228
13229 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13230 APInt TValAPInt;
13231 if (Cond.getOpcode() == ISD::SETCC &&
13232 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13233 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13234 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13235 TValAPInt.isOne() &&
13236 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13238 return SDValue();
13239 }
13240
13241 // To use the condition operand as a bitwise mask, it must have elements that
13242 // are the same size as the select elements. i.e, the condition operand must
13243 // have already been promoted from the IR select condition type <N x i1>.
13244 // Don't check if the types themselves are equal because that excludes
13245 // vector floating-point selects.
13246 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13247 return SDValue();
13248
13249 // Cond value must be 'sign splat' to be converted to a logical op.
13250 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13251 return SDValue();
13252
13253 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13254 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13255 Cond.getOpcode() == ISD::SETCC &&
13256 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13257 CondVT) {
13258 if (IsTAllZero || IsFAllOne) {
13259 SDValue CC = Cond.getOperand(2);
13261 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13262 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13263 InverseCC);
13264 std::swap(TVal, FVal);
13265 std::swap(IsTAllOne, IsFAllOne);
13266 std::swap(IsTAllZero, IsFAllZero);
13267 }
13268 }
13269
13271 "Select condition no longer all-sign bits");
13272
13273 // select Cond, -1, 0 → bitcast Cond
13274 if (IsTAllOne && IsFAllZero)
13275 return DAG.getBitcast(VT, Cond);
13276
13277 // select Cond, -1, x → or Cond, x
13278 if (IsTAllOne) {
13279 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13280 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13281 return DAG.getBitcast(VT, Or);
13282 }
13283
13284 // select Cond, x, 0 → and Cond, x
13285 if (IsFAllZero) {
13286 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13287 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13288 return DAG.getBitcast(VT, And);
13289 }
13290
13291 // select Cond, 0, x -> and not(Cond), x
13292 if (IsTAllZero &&
13294 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13295 SDValue And =
13296 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13297 return DAG.getBitcast(VT, And);
13298 }
13299
13300 return SDValue();
13301}
13302
13303SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13304 SDValue N0 = N->getOperand(0);
13305 SDValue N1 = N->getOperand(1);
13306 SDValue N2 = N->getOperand(2);
13307 EVT VT = N->getValueType(0);
13308 SDLoc DL(N);
13309
13310 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13311 return V;
13312
13314 return V;
13315
13316 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13317 if (!TLI.isTargetCanonicalSelect(N))
13318 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13319 return DAG.getSelect(DL, VT, F, N2, N1);
13320
13321 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13322 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13325 TLI.getBooleanContents(N0.getValueType()) ==
13327 return DAG.getNode(
13328 ISD::ADD, DL, N1.getValueType(), N2,
13329 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13330 }
13331
13332 // Canonicalize integer abs.
13333 // vselect (setg[te] X, 0), X, -X ->
13334 // vselect (setgt X, -1), X, -X ->
13335 // vselect (setl[te] X, 0), -X, X ->
13336 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13337 if (N0.getOpcode() == ISD::SETCC) {
13338 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13340 bool isAbs = false;
13341 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13342
13343 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13344 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13345 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13347 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13348 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13350
13351 if (isAbs) {
13353 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13354
13355 SDValue Shift = DAG.getNode(
13356 ISD::SRA, DL, VT, LHS,
13357 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13358 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13359 AddToWorklist(Shift.getNode());
13360 AddToWorklist(Add.getNode());
13361 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13362 }
13363
13364 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13365 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13366 //
13367 // This is OK if we don't care about what happens if either operand is a
13368 // NaN.
13369 //
13370 if (N0.hasOneUse() &&
13371 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13372 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13373 return FMinMax;
13374 }
13375
13376 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13377 return S;
13378 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13379 return S;
13380
13381 // If this select has a condition (setcc) with narrower operands than the
13382 // select, try to widen the compare to match the select width.
13383 // TODO: This should be extended to handle any constant.
13384 // TODO: This could be extended to handle non-loading patterns, but that
13385 // requires thorough testing to avoid regressions.
13386 if (isNullOrNullSplat(RHS)) {
13387 EVT NarrowVT = LHS.getValueType();
13389 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13390 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13391 unsigned WideWidth = WideVT.getScalarSizeInBits();
13392 bool IsSigned = isSignedIntSetCC(CC);
13393 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13394 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13395 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13396 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13397 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13398 // Both compare operands can be widened for free. The LHS can use an
13399 // extended load, and the RHS is a constant:
13400 // vselect (ext (setcc load(X), C)), N1, N2 -->
13401 // vselect (setcc extload(X), C'), N1, N2
13402 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13403 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13404 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13405 EVT WideSetCCVT = getSetCCResultType(WideVT);
13406 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13407 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13408 }
13409 }
13410
13411 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13412 return ABD;
13413
13414 // Match VSELECTs into add with unsigned saturation.
13415 if (hasOperation(ISD::UADDSAT, VT)) {
13416 // Check if one of the arms of the VSELECT is vector with all bits set.
13417 // If it's on the left side invert the predicate to simplify logic below.
13418 SDValue Other;
13419 ISD::CondCode SatCC = CC;
13421 Other = N2;
13422 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13423 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13424 Other = N1;
13425 }
13426
13427 if (Other && Other.getOpcode() == ISD::ADD) {
13428 SDValue CondLHS = LHS, CondRHS = RHS;
13429 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13430
13431 // Canonicalize condition operands.
13432 if (SatCC == ISD::SETUGE) {
13433 std::swap(CondLHS, CondRHS);
13434 SatCC = ISD::SETULE;
13435 }
13436
13437 // We can test against either of the addition operands.
13438 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13439 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13440 if (SatCC == ISD::SETULE && Other == CondRHS &&
13441 (OpLHS == CondLHS || OpRHS == CondLHS))
13442 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13443
13444 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13445 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13446 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13447 CondLHS == OpLHS) {
13448 // If the RHS is a constant we have to reverse the const
13449 // canonicalization.
13450 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13451 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13452 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13453 };
13454 if (SatCC == ISD::SETULE &&
13455 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13456 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13457 }
13458 }
13459 }
13460
13461 // Match VSELECTs into sub with unsigned saturation.
13462 if (hasOperation(ISD::USUBSAT, VT)) {
13463 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13464 // the left side invert the predicate to simplify logic below.
13465 SDValue Other;
13466 ISD::CondCode SatCC = CC;
13468 Other = N2;
13469 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13471 Other = N1;
13472 }
13473
13474 // zext(x) >= y ? trunc(zext(x) - y) : 0
13475 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13476 // zext(x) > y ? trunc(zext(x) - y) : 0
13477 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13478 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13479 Other.getOperand(0).getOpcode() == ISD::SUB &&
13480 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13481 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13482 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13483 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13484 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13485 DAG, DL))
13486 return R;
13487 }
13488
13489 if (Other && Other.getNumOperands() == 2) {
13490 SDValue CondRHS = RHS;
13491 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13492
13493 if (OpLHS == LHS) {
13494 // Look for a general sub with unsigned saturation first.
13495 // x >= y ? x-y : 0 --> usubsat x, y
13496 // x > y ? x-y : 0 --> usubsat x, y
13497 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13498 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13499 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13500
13501 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13502 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13503 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13504 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13505 // If the RHS is a constant we have to reverse the const
13506 // canonicalization.
13507 // x > C-1 ? x+-C : 0 --> usubsat x, C
13508 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13509 return (!Op && !Cond) ||
13510 (Op && Cond &&
13511 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13512 };
13513 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13514 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13515 /*AllowUndefs*/ true)) {
13516 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13517 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13518 }
13519
13520 // Another special case: If C was a sign bit, the sub has been
13521 // canonicalized into a xor.
13522 // FIXME: Would it be better to use computeKnownBits to
13523 // determine whether it's safe to decanonicalize the xor?
13524 // x s< 0 ? x^C : 0 --> usubsat x, C
13525 APInt SplatValue;
13526 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13527 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13529 SplatValue.isSignMask()) {
13530 // Note that we have to rebuild the RHS constant here to
13531 // ensure we don't rely on particular values of undef lanes.
13532 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13533 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13534 }
13535 }
13536 }
13537 }
13538 }
13539 }
13540
13541 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13542 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13543 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13544 return UMin;
13545 }
13546
13547 if (SimplifySelectOps(N, N1, N2))
13548 return SDValue(N, 0); // Don't revisit N.
13549
13550 // Fold (vselect all_ones, N1, N2) -> N1
13552 return N1;
13553 // Fold (vselect all_zeros, N1, N2) -> N2
13555 return N2;
13556
13557 // The ConvertSelectToConcatVector function is assuming both the above
13558 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13559 // and addressed.
13560 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13563 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13564 return CV;
13565 }
13566
13567 if (SDValue V = foldVSelectOfConstants(N))
13568 return V;
13569
13570 if (hasOperation(ISD::SRA, VT))
13572 return V;
13573
13575 return SDValue(N, 0);
13576
13577 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13578 return V;
13579
13580 return SDValue();
13581}
13582
13583SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13584 SDValue N0 = N->getOperand(0);
13585 SDValue N1 = N->getOperand(1);
13586 SDValue N2 = N->getOperand(2);
13587 SDValue N3 = N->getOperand(3);
13588 SDValue N4 = N->getOperand(4);
13589 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13590 SDLoc DL(N);
13591
13592 // fold select_cc lhs, rhs, x, x, cc -> x
13593 if (N2 == N3)
13594 return N2;
13595
13596 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13597 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13598 isNullConstant(N1))
13599 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13600
13601 // Determine if the condition we're dealing with is constant
13602 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13603 CC, DL, false)) {
13604 AddToWorklist(SCC.getNode());
13605
13606 // cond always true -> true val
13607 // cond always false -> false val
13608 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13609 return SCCC->isZero() ? N3 : N2;
13610
13611 // When the condition is UNDEF, just return the first operand. This is
13612 // coherent the DAG creation, no setcc node is created in this case
13613 if (SCC->isUndef())
13614 return N2;
13615
13616 // Fold to a simpler select_cc
13617 if (SCC.getOpcode() == ISD::SETCC) {
13618 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13619 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13620 SCC.getOperand(2), SCC->getFlags());
13621 }
13622 }
13623
13624 // If we can fold this based on the true/false value, do so.
13625 if (SimplifySelectOps(N, N2, N3))
13626 return SDValue(N, 0); // Don't revisit N.
13627
13628 // fold select_cc into other things, such as min/max/abs
13629 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13630}
13631
13632SDValue DAGCombiner::visitSETCC(SDNode *N) {
13633 // setcc is very commonly used as an argument to brcond. This pattern
13634 // also lend itself to numerous combines and, as a result, it is desired
13635 // we keep the argument to a brcond as a setcc as much as possible.
13636 bool PreferSetCC =
13637 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13638
13639 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13640 EVT VT = N->getValueType(0);
13641 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13642 SDLoc DL(N);
13643
13644 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13645 // If we prefer to have a setcc, and we don't, we'll try our best to
13646 // recreate one using rebuildSetCC.
13647 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13648 SDValue NewSetCC = rebuildSetCC(Combined);
13649
13650 // We don't have anything interesting to combine to.
13651 if (NewSetCC.getNode() == N)
13652 return SDValue();
13653
13654 if (NewSetCC)
13655 return NewSetCC;
13656 }
13657 return Combined;
13658 }
13659
13660 // Optimize
13661 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13662 // or
13663 // 2) (icmp eq/ne X, (rotate X, C1))
13664 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13665 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13666 // Then:
13667 // If C1 is a power of 2, then the rotate and shift+and versions are
13668 // equivilent, so we can interchange them depending on target preference.
13669 // Otherwise, if we have the shift+and version we can interchange srl/shl
13670 // which inturn affects the constant C0. We can use this to get better
13671 // constants again determined by target preference.
13672 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13673 auto IsAndWithShift = [](SDValue A, SDValue B) {
13674 return A.getOpcode() == ISD::AND &&
13675 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13676 A.getOperand(0) == B.getOperand(0);
13677 };
13678 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13679 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13680 B.getOperand(0) == A;
13681 };
13682 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13683 bool IsRotate = false;
13684
13685 // Find either shift+and or rotate pattern.
13686 if (IsAndWithShift(N0, N1)) {
13687 AndOrOp = N0;
13688 ShiftOrRotate = N1;
13689 } else if (IsAndWithShift(N1, N0)) {
13690 AndOrOp = N1;
13691 ShiftOrRotate = N0;
13692 } else if (IsRotateWithOp(N0, N1)) {
13693 IsRotate = true;
13694 AndOrOp = N0;
13695 ShiftOrRotate = N1;
13696 } else if (IsRotateWithOp(N1, N0)) {
13697 IsRotate = true;
13698 AndOrOp = N1;
13699 ShiftOrRotate = N0;
13700 }
13701
13702 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13703 (IsRotate || AndOrOp.hasOneUse())) {
13704 EVT OpVT = N0.getValueType();
13705 // Get constant shift/rotate amount and possibly mask (if its shift+and
13706 // variant).
13707 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13708 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13709 /*AllowTrunc*/ false);
13710 if (CNode == nullptr)
13711 return std::nullopt;
13712 return CNode->getAPIntValue();
13713 };
13714 std::optional<APInt> AndCMask =
13715 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13716 std::optional<APInt> ShiftCAmt =
13717 GetAPIntValue(ShiftOrRotate.getOperand(1));
13718 unsigned NumBits = OpVT.getScalarSizeInBits();
13719
13720 // We found constants.
13721 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13722 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13723 // Check that the constants meet the constraints.
13724 bool CanTransform = IsRotate;
13725 if (!CanTransform) {
13726 // Check that mask and shift compliment eachother
13727 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13728 // Check that we are comparing all bits
13729 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13730 // Check that the and mask is correct for the shift
13731 CanTransform &=
13732 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13733 }
13734
13735 // See if target prefers another shift/rotate opcode.
13736 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13737 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13738 // Transform is valid and we have a new preference.
13739 if (CanTransform && NewShiftOpc != ShiftOpc) {
13740 SDValue NewShiftOrRotate =
13741 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13742 ShiftOrRotate.getOperand(1));
13743 SDValue NewAndOrOp = SDValue();
13744
13745 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13746 APInt NewMask =
13747 NewShiftOpc == ISD::SHL
13748 ? APInt::getHighBitsSet(NumBits,
13749 NumBits - ShiftCAmt->getZExtValue())
13750 : APInt::getLowBitsSet(NumBits,
13751 NumBits - ShiftCAmt->getZExtValue());
13752 NewAndOrOp =
13753 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13754 DAG.getConstant(NewMask, DL, OpVT));
13755 } else {
13756 NewAndOrOp = ShiftOrRotate.getOperand(0);
13757 }
13758
13759 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13760 }
13761 }
13762 }
13763 }
13764 return SDValue();
13765}
13766
13767SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13768 SDValue LHS = N->getOperand(0);
13769 SDValue RHS = N->getOperand(1);
13770 SDValue Carry = N->getOperand(2);
13771 SDValue Cond = N->getOperand(3);
13772
13773 // If Carry is false, fold to a regular SETCC.
13774 if (isNullConstant(Carry))
13775 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13776
13777 return SDValue();
13778}
13779
13780/// Check if N satisfies:
13781/// N is used once.
13782/// N is a Load.
13783/// The load is compatible with ExtOpcode. It means
13784/// If load has explicit zero/sign extension, ExpOpcode must have the same
13785/// extension.
13786/// Otherwise returns true.
13787static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13788 if (!N.hasOneUse())
13789 return false;
13790
13791 if (!isa<LoadSDNode>(N))
13792 return false;
13793
13794 LoadSDNode *Load = cast<LoadSDNode>(N);
13795 ISD::LoadExtType LoadExt = Load->getExtensionType();
13796 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13797 return true;
13798
13799 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13800 // extension.
13801 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13802 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13803 return false;
13804
13805 return true;
13806}
13807
13808/// Fold
13809/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13810/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13811/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13812/// This function is called by the DAGCombiner when visiting sext/zext/aext
13813/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13815 SelectionDAG &DAG, const SDLoc &DL,
13816 CombineLevel Level) {
13817 unsigned Opcode = N->getOpcode();
13818 SDValue N0 = N->getOperand(0);
13819 EVT VT = N->getValueType(0);
13820 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13821 Opcode == ISD::ANY_EXTEND) &&
13822 "Expected EXTEND dag node in input!");
13823
13824 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
13825 !N0.hasOneUse())
13826 return SDValue();
13827
13828 SDValue Op1 = N0->getOperand(1);
13829 SDValue Op2 = N0->getOperand(2);
13830 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13831 return SDValue();
13832
13833 auto ExtLoadOpcode = ISD::EXTLOAD;
13834 if (Opcode == ISD::SIGN_EXTEND)
13835 ExtLoadOpcode = ISD::SEXTLOAD;
13836 else if (Opcode == ISD::ZERO_EXTEND)
13837 ExtLoadOpcode = ISD::ZEXTLOAD;
13838
13839 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13840 // Combine2), so we should conservatively check the OperationAction.
13841 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13842 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13843 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13844 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13845 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13847 return SDValue();
13848
13849 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13850 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13851 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
13852}
13853
13854/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13855/// a build_vector of constants.
13856/// This function is called by the DAGCombiner when visiting sext/zext/aext
13857/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13858/// Vector extends are not folded if operations are legal; this is to
13859/// avoid introducing illegal build_vector dag nodes.
13861 const TargetLowering &TLI,
13862 SelectionDAG &DAG, bool LegalTypes) {
13863 unsigned Opcode = N->getOpcode();
13864 SDValue N0 = N->getOperand(0);
13865 EVT VT = N->getValueType(0);
13866
13867 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13868 "Expected EXTEND dag node in input!");
13869
13870 // fold (sext c1) -> c1
13871 // fold (zext c1) -> c1
13872 // fold (aext c1) -> c1
13873 if (isa<ConstantSDNode>(N0))
13874 return DAG.getNode(Opcode, DL, VT, N0);
13875
13876 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13877 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13878 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13879 if (N0->getOpcode() == ISD::SELECT) {
13880 SDValue Op1 = N0->getOperand(1);
13881 SDValue Op2 = N0->getOperand(2);
13882 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13883 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13884 // For any_extend, choose sign extension of the constants to allow a
13885 // possible further transform to sign_extend_inreg.i.e.
13886 //
13887 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13888 // t2: i64 = any_extend t1
13889 // -->
13890 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13891 // -->
13892 // t4: i64 = sign_extend_inreg t3
13893 unsigned FoldOpc = Opcode;
13894 if (FoldOpc == ISD::ANY_EXTEND)
13895 FoldOpc = ISD::SIGN_EXTEND;
13896 return DAG.getSelect(DL, VT, N0->getOperand(0),
13897 DAG.getNode(FoldOpc, DL, VT, Op1),
13898 DAG.getNode(FoldOpc, DL, VT, Op2));
13899 }
13900 }
13901
13902 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13903 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13904 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13905 EVT SVT = VT.getScalarType();
13906 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13908 return SDValue();
13909
13910 // We can fold this node into a build_vector.
13911 unsigned VTBits = SVT.getSizeInBits();
13912 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13914 unsigned NumElts = VT.getVectorNumElements();
13915
13916 for (unsigned i = 0; i != NumElts; ++i) {
13917 SDValue Op = N0.getOperand(i);
13918 if (Op.isUndef()) {
13919 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13920 Elts.push_back(DAG.getUNDEF(SVT));
13921 else
13922 Elts.push_back(DAG.getConstant(0, DL, SVT));
13923 continue;
13924 }
13925
13926 SDLoc DL(Op);
13927 // Get the constant value and if needed trunc it to the size of the type.
13928 // Nodes like build_vector might have constants wider than the scalar type.
13929 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13930 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13931 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13932 else
13933 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13934 }
13935
13936 return DAG.getBuildVector(VT, DL, Elts);
13937}
13938
13939// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13940// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13941// transformation. Returns true if extension are possible and the above
13942// mentioned transformation is profitable.
13944 unsigned ExtOpc,
13945 SmallVectorImpl<SDNode *> &ExtendNodes,
13946 const TargetLowering &TLI) {
13947 bool HasCopyToRegUses = false;
13948 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13949 for (SDUse &Use : N0->uses()) {
13950 SDNode *User = Use.getUser();
13951 if (User == N)
13952 continue;
13953 if (Use.getResNo() != N0.getResNo())
13954 continue;
13955 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
13956 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
13958 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
13959 // Sign bits will be lost after a zext.
13960 return false;
13961 bool Add = false;
13962 for (unsigned i = 0; i != 2; ++i) {
13963 SDValue UseOp = User->getOperand(i);
13964 if (UseOp == N0)
13965 continue;
13966 if (!isa<ConstantSDNode>(UseOp))
13967 return false;
13968 Add = true;
13969 }
13970 if (Add)
13971 ExtendNodes.push_back(User);
13972 continue;
13973 }
13974 // If truncates aren't free and there are users we can't
13975 // extend, it isn't worthwhile.
13976 if (!isTruncFree)
13977 return false;
13978 // Remember if this value is live-out.
13979 if (User->getOpcode() == ISD::CopyToReg)
13980 HasCopyToRegUses = true;
13981 }
13982
13983 if (HasCopyToRegUses) {
13984 bool BothLiveOut = false;
13985 for (SDUse &Use : N->uses()) {
13986 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
13987 BothLiveOut = true;
13988 break;
13989 }
13990 }
13991 if (BothLiveOut)
13992 // Both unextended and extended values are live out. There had better be
13993 // a good reason for the transformation.
13994 return !ExtendNodes.empty();
13995 }
13996 return true;
13997}
13998
13999void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
14000 SDValue OrigLoad, SDValue ExtLoad,
14001 ISD::NodeType ExtType) {
14002 // Extend SetCC uses if necessary.
14003 SDLoc DL(ExtLoad);
14004 for (SDNode *SetCC : SetCCs) {
14006
14007 for (unsigned j = 0; j != 2; ++j) {
14008 SDValue SOp = SetCC->getOperand(j);
14009 if (SOp == OrigLoad)
14010 Ops.push_back(ExtLoad);
14011 else
14012 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
14013 }
14014
14015 Ops.push_back(SetCC->getOperand(2));
14016 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
14017 }
14018}
14019
14020// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
14021SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
14022 SDValue N0 = N->getOperand(0);
14023 EVT DstVT = N->getValueType(0);
14024 EVT SrcVT = N0.getValueType();
14025
14026 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14027 N->getOpcode() == ISD::ZERO_EXTEND) &&
14028 "Unexpected node type (not an extend)!");
14029
14030 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
14031 // For example, on a target with legal v4i32, but illegal v8i32, turn:
14032 // (v8i32 (sext (v8i16 (load x))))
14033 // into:
14034 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14035 // (v4i32 (sextload (x + 16)))))
14036 // Where uses of the original load, i.e.:
14037 // (v8i16 (load x))
14038 // are replaced with:
14039 // (v8i16 (truncate
14040 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14041 // (v4i32 (sextload (x + 16)))))))
14042 //
14043 // This combine is only applicable to illegal, but splittable, vectors.
14044 // All legal types, and illegal non-vector types, are handled elsewhere.
14045 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14046 //
14047 if (N0->getOpcode() != ISD::LOAD)
14048 return SDValue();
14049
14050 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14051
14052 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
14053 !N0.hasOneUse() || !LN0->isSimple() ||
14054 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
14056 return SDValue();
14057
14059 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
14060 return SDValue();
14061
14062 ISD::LoadExtType ExtType =
14063 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14064
14065 // Try to split the vector types to get down to legal types.
14066 EVT SplitSrcVT = SrcVT;
14067 EVT SplitDstVT = DstVT;
14068 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
14069 SplitSrcVT.getVectorNumElements() > 1) {
14070 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
14071 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
14072 }
14073
14074 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
14075 return SDValue();
14076
14077 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14078
14079 SDLoc DL(N);
14080 const unsigned NumSplits =
14081 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14082 const unsigned Stride = SplitSrcVT.getStoreSize();
14085
14086 SDValue BasePtr = LN0->getBasePtr();
14087 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14088 const unsigned Offset = Idx * Stride;
14089
14091 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14092 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14093 SplitSrcVT, LN0->getBaseAlign(),
14094 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14095
14096 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14097
14098 Loads.push_back(SplitLoad.getValue(0));
14099 Chains.push_back(SplitLoad.getValue(1));
14100 }
14101
14102 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14103 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14104
14105 // Simplify TF.
14106 AddToWorklist(NewChain.getNode());
14107
14108 CombineTo(N, NewValue);
14109
14110 // Replace uses of the original load (before extension)
14111 // with a truncate of the concatenated sextloaded vectors.
14112 SDValue Trunc =
14113 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14114 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14115 CombineTo(N0.getNode(), Trunc, NewChain);
14116 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14117}
14118
14119// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14120// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14121SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14122 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14123 EVT VT = N->getValueType(0);
14124 EVT OrigVT = N->getOperand(0).getValueType();
14125 if (TLI.isZExtFree(OrigVT, VT))
14126 return SDValue();
14127
14128 // and/or/xor
14129 SDValue N0 = N->getOperand(0);
14130 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14131 N0.getOperand(1).getOpcode() != ISD::Constant ||
14132 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14133 return SDValue();
14134
14135 // shl/shr
14136 SDValue N1 = N0->getOperand(0);
14137 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14138 N1.getOperand(1).getOpcode() != ISD::Constant ||
14139 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14140 return SDValue();
14141
14142 // load
14143 if (!isa<LoadSDNode>(N1.getOperand(0)))
14144 return SDValue();
14145 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14146 EVT MemVT = Load->getMemoryVT();
14147 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14148 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14149 return SDValue();
14150
14151
14152 // If the shift op is SHL, the logic op must be AND, otherwise the result
14153 // will be wrong.
14154 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14155 return SDValue();
14156
14157 if (!N0.hasOneUse() || !N1.hasOneUse())
14158 return SDValue();
14159
14161 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14162 ISD::ZERO_EXTEND, SetCCs, TLI))
14163 return SDValue();
14164
14165 // Actually do the transformation.
14166 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14167 Load->getChain(), Load->getBasePtr(),
14168 Load->getMemoryVT(), Load->getMemOperand());
14169
14170 SDLoc DL1(N1);
14171 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14172 N1.getOperand(1));
14173
14174 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14175 SDLoc DL0(N0);
14176 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14177 DAG.getConstant(Mask, DL0, VT));
14178
14179 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14180 CombineTo(N, And);
14181 if (SDValue(Load, 0).hasOneUse()) {
14182 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14183 } else {
14184 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14185 Load->getValueType(0), ExtLoad);
14186 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14187 }
14188
14189 // N0 is dead at this point.
14190 recursivelyDeleteUnusedNodes(N0.getNode());
14191
14192 return SDValue(N,0); // Return N so it doesn't get rechecked!
14193}
14194
14195/// If we're narrowing or widening the result of a vector select and the final
14196/// size is the same size as a setcc (compare) feeding the select, then try to
14197/// apply the cast operation to the select's operands because matching vector
14198/// sizes for a select condition and other operands should be more efficient.
14199SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14200 unsigned CastOpcode = Cast->getOpcode();
14201 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14202 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14203 CastOpcode == ISD::FP_ROUND) &&
14204 "Unexpected opcode for vector select narrowing/widening");
14205
14206 // We only do this transform before legal ops because the pattern may be
14207 // obfuscated by target-specific operations after legalization. Do not create
14208 // an illegal select op, however, because that may be difficult to lower.
14209 EVT VT = Cast->getValueType(0);
14210 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14211 return SDValue();
14212
14213 SDValue VSel = Cast->getOperand(0);
14214 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14215 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14216 return SDValue();
14217
14218 // Does the setcc have the same vector size as the casted select?
14219 SDValue SetCC = VSel.getOperand(0);
14220 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14221 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14222 return SDValue();
14223
14224 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14225 SDValue A = VSel.getOperand(1);
14226 SDValue B = VSel.getOperand(2);
14227 SDValue CastA, CastB;
14228 SDLoc DL(Cast);
14229 if (CastOpcode == ISD::FP_ROUND) {
14230 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14231 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14232 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14233 } else {
14234 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14235 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14236 }
14237 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14238}
14239
14240// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14241// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14243 const TargetLowering &TLI, EVT VT,
14244 bool LegalOperations, SDNode *N,
14245 SDValue N0, ISD::LoadExtType ExtLoadType) {
14246 SDNode *N0Node = N0.getNode();
14247 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14248 : ISD::isZEXTLoad(N0Node);
14249 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14250 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14251 return SDValue();
14252
14253 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14254 EVT MemVT = LN0->getMemoryVT();
14255 if ((LegalOperations || !LN0->isSimple() ||
14256 VT.isVector()) &&
14257 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14258 return SDValue();
14259
14260 SDValue ExtLoad =
14261 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14262 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14263 Combiner.CombineTo(N, ExtLoad);
14264 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14265 if (LN0->use_empty())
14266 Combiner.recursivelyDeleteUnusedNodes(LN0);
14267 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14268}
14269
14270// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14271// Only generate vector extloads when 1) they're legal, and 2) they are
14272// deemed desirable by the target. NonNegZExt can be set to true if a zero
14273// extend has the nonneg flag to allow use of sextload if profitable.
14275 const TargetLowering &TLI, EVT VT,
14276 bool LegalOperations, SDNode *N, SDValue N0,
14277 ISD::LoadExtType ExtLoadType,
14278 ISD::NodeType ExtOpc,
14279 bool NonNegZExt = false) {
14281 return {};
14282
14283 // If this is zext nneg, see if it would make sense to treat it as a sext.
14284 if (NonNegZExt) {
14285 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14286 "Unexpected load type or opcode");
14287 for (SDNode *User : N0->users()) {
14288 if (User->getOpcode() == ISD::SETCC) {
14290 if (ISD::isSignedIntSetCC(CC)) {
14291 ExtLoadType = ISD::SEXTLOAD;
14292 ExtOpc = ISD::SIGN_EXTEND;
14293 break;
14294 }
14295 }
14296 }
14297 }
14298
14299 // TODO: isFixedLengthVector() should be removed and any negative effects on
14300 // code generation being the result of that target's implementation of
14301 // isVectorLoadExtDesirable().
14302 if ((LegalOperations || VT.isFixedLengthVector() ||
14303 !cast<LoadSDNode>(N0)->isSimple()) &&
14304 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14305 return {};
14306
14307 bool DoXform = true;
14309 if (!N0.hasOneUse())
14310 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14311 if (VT.isVector())
14312 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14313 if (!DoXform)
14314 return {};
14315
14316 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14317 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14318 LN0->getBasePtr(), N0.getValueType(),
14319 LN0->getMemOperand());
14320 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14321 // If the load value is used only by N, replace it via CombineTo N.
14322 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14323 Combiner.CombineTo(N, ExtLoad);
14324 if (NoReplaceTrunc) {
14325 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14326 Combiner.recursivelyDeleteUnusedNodes(LN0);
14327 } else {
14328 SDValue Trunc =
14329 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14330 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14331 }
14332 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14333}
14334
14335static SDValue
14337 bool LegalOperations, SDNode *N, SDValue N0,
14338 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14339 if (!N0.hasOneUse())
14340 return SDValue();
14341
14343 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14344 return SDValue();
14345
14346 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14347 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14348 return SDValue();
14349
14350 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14351 return SDValue();
14352
14353 SDLoc dl(Ld);
14354 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14355 SDValue NewLoad = DAG.getMaskedLoad(
14356 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14357 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14358 ExtLoadType, Ld->isExpandingLoad());
14359 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14360 return NewLoad;
14361}
14362
14363// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14365 const TargetLowering &TLI, EVT VT,
14366 SDValue N0,
14367 ISD::LoadExtType ExtLoadType) {
14368 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14369 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14370 return {};
14371 EVT MemoryVT = ALoad->getMemoryVT();
14372 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14373 return {};
14374 // Can't fold into ALoad if it is already extending differently.
14375 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14376 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14377 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14378 return {};
14379
14380 EVT OrigVT = ALoad->getValueType(0);
14381 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14382 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14383 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14384 ALoad->getBasePtr(), ALoad->getMemOperand()));
14386 SDValue(ALoad, 0),
14387 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14388 // Update the chain uses.
14389 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14390 return SDValue(NewALoad, 0);
14391}
14392
14394 bool LegalOperations) {
14395 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14396 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14397
14398 SDValue SetCC = N->getOperand(0);
14399 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14400 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14401 return SDValue();
14402
14403 SDValue X = SetCC.getOperand(0);
14404 SDValue Ones = SetCC.getOperand(1);
14405 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14406 EVT VT = N->getValueType(0);
14407 EVT XVT = X.getValueType();
14408 // setge X, C is canonicalized to setgt, so we do not need to match that
14409 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14410 // not require the 'not' op.
14411 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14412 // Invert and smear/shift the sign bit:
14413 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14414 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14415 SDLoc DL(N);
14416 unsigned ShCt = VT.getSizeInBits() - 1;
14417 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14418 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14419 SDValue NotX = DAG.getNOT(DL, X, VT);
14420 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14421 auto ShiftOpcode =
14422 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14423 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14424 }
14425 }
14426 return SDValue();
14427}
14428
14429SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14430 SDValue N0 = N->getOperand(0);
14431 if (N0.getOpcode() != ISD::SETCC)
14432 return SDValue();
14433
14434 SDValue N00 = N0.getOperand(0);
14435 SDValue N01 = N0.getOperand(1);
14437 EVT VT = N->getValueType(0);
14438 EVT N00VT = N00.getValueType();
14439 SDLoc DL(N);
14440
14441 // Propagate fast-math-flags.
14442 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14443
14444 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14445 // the same size as the compared operands. Try to optimize sext(setcc())
14446 // if this is the case.
14447 if (VT.isVector() && !LegalOperations &&
14448 TLI.getBooleanContents(N00VT) ==
14450 EVT SVT = getSetCCResultType(N00VT);
14451
14452 // If we already have the desired type, don't change it.
14453 if (SVT != N0.getValueType()) {
14454 // We know that the # elements of the results is the same as the
14455 // # elements of the compare (and the # elements of the compare result
14456 // for that matter). Check to see that they are the same size. If so,
14457 // we know that the element size of the sext'd result matches the
14458 // element size of the compare operands.
14459 if (VT.getSizeInBits() == SVT.getSizeInBits())
14460 return DAG.getSetCC(DL, VT, N00, N01, CC);
14461
14462 // If the desired elements are smaller or larger than the source
14463 // elements, we can use a matching integer vector type and then
14464 // truncate/sign extend.
14465 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14466 if (SVT == MatchingVecType) {
14467 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14468 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14469 }
14470 }
14471
14472 // Try to eliminate the sext of a setcc by zexting the compare operands.
14473 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14475 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14476 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14477 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14478
14479 // We have an unsupported narrow vector compare op that would be legal
14480 // if extended to the destination type. See if the compare operands
14481 // can be freely extended to the destination type.
14482 auto IsFreeToExtend = [&](SDValue V) {
14483 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14484 return true;
14485 // Match a simple, non-extended load that can be converted to a
14486 // legal {z/s}ext-load.
14487 // TODO: Allow widening of an existing {z/s}ext-load?
14488 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14489 ISD::isUNINDEXEDLoad(V.getNode()) &&
14490 cast<LoadSDNode>(V)->isSimple() &&
14491 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14492 return false;
14493
14494 // Non-chain users of this value must either be the setcc in this
14495 // sequence or extends that can be folded into the new {z/s}ext-load.
14496 for (SDUse &Use : V->uses()) {
14497 // Skip uses of the chain and the setcc.
14498 SDNode *User = Use.getUser();
14499 if (Use.getResNo() != 0 || User == N0.getNode())
14500 continue;
14501 // Extra users must have exactly the same cast we are about to create.
14502 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14503 // is enhanced similarly.
14504 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14505 return false;
14506 }
14507 return true;
14508 };
14509
14510 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14511 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14512 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14513 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14514 }
14515 }
14516 }
14517
14518 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14519 // Here, T can be 1 or -1, depending on the type of the setcc and
14520 // getBooleanContents().
14521 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14522
14523 // To determine the "true" side of the select, we need to know the high bit
14524 // of the value returned by the setcc if it evaluates to true.
14525 // If the type of the setcc is i1, then the true case of the select is just
14526 // sext(i1 1), that is, -1.
14527 // If the type of the setcc is larger (say, i8) then the value of the high
14528 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14529 // of the appropriate width.
14530 SDValue ExtTrueVal = (SetCCWidth == 1)
14531 ? DAG.getAllOnesConstant(DL, VT)
14532 : DAG.getBoolConstant(true, DL, VT, N00VT);
14533 SDValue Zero = DAG.getConstant(0, DL, VT);
14534 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14535 return SCC;
14536
14537 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14538 EVT SetCCVT = getSetCCResultType(N00VT);
14539 // Don't do this transform for i1 because there's a select transform
14540 // that would reverse it.
14541 // TODO: We should not do this transform at all without a target hook
14542 // because a sext is likely cheaper than a select?
14543 if (SetCCVT.getScalarSizeInBits() != 1 &&
14544 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14545 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14546 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14547 }
14548 }
14549
14550 return SDValue();
14551}
14552
14553SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14554 SDValue N0 = N->getOperand(0);
14555 EVT VT = N->getValueType(0);
14556 SDLoc DL(N);
14557
14558 if (VT.isVector())
14559 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14560 return FoldedVOp;
14561
14562 // sext(undef) = 0 because the top bit will all be the same.
14563 if (N0.isUndef())
14564 return DAG.getConstant(0, DL, VT);
14565
14566 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14567 return Res;
14568
14569 // fold (sext (sext x)) -> (sext x)
14570 // fold (sext (aext x)) -> (sext x)
14571 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14572 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14573
14574 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14575 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14578 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
14579 N0.getOperand(0));
14580
14581 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14582 SDValue N00 = N0.getOperand(0);
14583 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14584 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14585 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14586 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14587 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14588 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14589 }
14590
14591 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14592 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14593 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14594 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14595 N0->getOperand(1));
14596 }
14597 }
14598 }
14599
14600 if (N0.getOpcode() == ISD::TRUNCATE) {
14601 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14602 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14603 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14604 SDNode *oye = N0.getOperand(0).getNode();
14605 if (NarrowLoad.getNode() != N0.getNode()) {
14606 CombineTo(N0.getNode(), NarrowLoad);
14607 // CombineTo deleted the truncate, if needed, but not what's under it.
14608 AddToWorklist(oye);
14609 }
14610 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14611 }
14612
14613 // See if the value being truncated is already sign extended. If so, just
14614 // eliminate the trunc/sext pair.
14615 SDValue Op = N0.getOperand(0);
14616 unsigned OpBits = Op.getScalarValueSizeInBits();
14617 unsigned MidBits = N0.getScalarValueSizeInBits();
14618 unsigned DestBits = VT.getScalarSizeInBits();
14619
14620 if (N0->getFlags().hasNoSignedWrap() ||
14621 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14622 if (OpBits == DestBits) {
14623 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14624 // bits, it is already ready.
14625 return Op;
14626 }
14627
14628 if (OpBits < DestBits) {
14629 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14630 // bits, just sext from i32.
14631 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14632 }
14633
14634 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14635 // bits, just truncate to i32.
14636 SDNodeFlags Flags;
14637 Flags.setNoSignedWrap(true);
14638 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14639 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14640 }
14641
14642 // fold (sext (truncate x)) -> (sextinreg x).
14643 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14644 N0.getValueType())) {
14645 if (OpBits < DestBits)
14646 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14647 else if (OpBits > DestBits)
14648 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14649 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14650 DAG.getValueType(N0.getValueType()));
14651 }
14652 }
14653
14654 // Try to simplify (sext (load x)).
14655 if (SDValue foldedExt =
14656 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14658 return foldedExt;
14659
14660 if (SDValue foldedExt =
14661 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14663 return foldedExt;
14664
14665 // fold (sext (load x)) to multiple smaller sextloads.
14666 // Only on illegal but splittable vectors.
14667 if (SDValue ExtLoad = CombineExtLoad(N))
14668 return ExtLoad;
14669
14670 // Try to simplify (sext (sextload x)).
14671 if (SDValue foldedExt = tryToFoldExtOfExtload(
14672 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14673 return foldedExt;
14674
14675 // Try to simplify (sext (atomic_load x)).
14676 if (SDValue foldedExt =
14677 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14678 return foldedExt;
14679
14680 // fold (sext (and/or/xor (load x), cst)) ->
14681 // (and/or/xor (sextload x), (sext cst))
14682 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14683 isa<LoadSDNode>(N0.getOperand(0)) &&
14684 N0.getOperand(1).getOpcode() == ISD::Constant &&
14685 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14686 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14687 EVT MemVT = LN00->getMemoryVT();
14688 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14689 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14691 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14692 ISD::SIGN_EXTEND, SetCCs, TLI);
14693 if (DoXform) {
14694 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14695 LN00->getChain(), LN00->getBasePtr(),
14696 LN00->getMemoryVT(),
14697 LN00->getMemOperand());
14698 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
14699 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14700 ExtLoad, DAG.getConstant(Mask, DL, VT));
14701 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14702 bool NoReplaceTruncAnd = !N0.hasOneUse();
14703 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14704 CombineTo(N, And);
14705 // If N0 has multiple uses, change other uses as well.
14706 if (NoReplaceTruncAnd) {
14707 SDValue TruncAnd =
14709 CombineTo(N0.getNode(), TruncAnd);
14710 }
14711 if (NoReplaceTrunc) {
14712 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14713 } else {
14714 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14715 LN00->getValueType(0), ExtLoad);
14716 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14717 }
14718 return SDValue(N,0); // Return N so it doesn't get rechecked!
14719 }
14720 }
14721 }
14722
14723 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14724 return V;
14725
14726 if (SDValue V = foldSextSetcc(N))
14727 return V;
14728
14729 // fold (sext x) -> (zext x) if the sign bit is known zero.
14730 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14731 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14732 DAG.SignBitIsZero(N0))
14733 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14734
14735 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14736 return NewVSel;
14737
14738 // Eliminate this sign extend by doing a negation in the destination type:
14739 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14740 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14744 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14745 return DAG.getNegative(Zext, DL, VT);
14746 }
14747 // Eliminate this sign extend by doing a decrement in the destination type:
14748 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14749 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14753 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14754 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14755 }
14756
14757 // fold sext (not i1 X) -> add (zext i1 X), -1
14758 // TODO: This could be extended to handle bool vectors.
14759 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14760 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14761 TLI.isOperationLegal(ISD::ADD, VT)))) {
14762 // If we can eliminate the 'not', the sext form should be better
14763 if (SDValue NewXor = visitXOR(N0.getNode())) {
14764 // Returning N0 is a form of in-visit replacement that may have
14765 // invalidated N0.
14766 if (NewXor.getNode() == N0.getNode()) {
14767 // Return SDValue here as the xor should have already been replaced in
14768 // this sext.
14769 return SDValue();
14770 }
14771
14772 // Return a new sext with the new xor.
14773 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14774 }
14775
14776 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14777 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14778 }
14779
14780 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14781 return Res;
14782
14783 return SDValue();
14784}
14785
14786/// Given an extending node with a pop-count operand, if the target does not
14787/// support a pop-count in the narrow source type but does support it in the
14788/// destination type, widen the pop-count to the destination type.
14789static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14790 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14791 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14792 "Expected extend op");
14793
14794 SDValue CtPop = Extend->getOperand(0);
14795 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14796 return SDValue();
14797
14798 EVT VT = Extend->getValueType(0);
14799 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14802 return SDValue();
14803
14804 // zext (ctpop X) --> ctpop (zext X)
14805 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14806 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14807}
14808
14809// If we have (zext (abs X)) where X is a type that will be promoted by type
14810// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14811static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14812 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14813
14814 EVT VT = Extend->getValueType(0);
14815 if (VT.isVector())
14816 return SDValue();
14817
14818 SDValue Abs = Extend->getOperand(0);
14819 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14820 return SDValue();
14821
14822 EVT AbsVT = Abs.getValueType();
14823 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14824 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14826 return SDValue();
14827
14828 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14829
14830 SDValue SExt =
14831 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14832 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14833 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14834}
14835
14836SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14837 SDValue N0 = N->getOperand(0);
14838 EVT VT = N->getValueType(0);
14839 SDLoc DL(N);
14840
14841 if (VT.isVector())
14842 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14843 return FoldedVOp;
14844
14845 // zext(undef) = 0
14846 if (N0.isUndef())
14847 return DAG.getConstant(0, DL, VT);
14848
14849 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14850 return Res;
14851
14852 // fold (zext (zext x)) -> (zext x)
14853 // fold (zext (aext x)) -> (zext x)
14854 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14855 SDNodeFlags Flags;
14856 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14857 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14858 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14859 }
14860
14861 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14862 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14865 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14866
14867 // fold (zext (truncate x)) -> (zext x) or
14868 // (zext (truncate x)) -> (truncate x)
14869 // This is valid when the truncated bits of x are already zero.
14870 SDValue Op;
14871 KnownBits Known;
14872 if (isTruncateOf(DAG, N0, Op, Known)) {
14873 APInt TruncatedBits =
14874 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14875 APInt(Op.getScalarValueSizeInBits(), 0) :
14876 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14877 N0.getScalarValueSizeInBits(),
14878 std::min(Op.getScalarValueSizeInBits(),
14879 VT.getScalarSizeInBits()));
14880 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14881 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14882 DAG.salvageDebugInfo(*N0.getNode());
14883
14884 return ZExtOrTrunc;
14885 }
14886 }
14887
14888 // fold (zext (truncate x)) -> (and x, mask)
14889 if (N0.getOpcode() == ISD::TRUNCATE) {
14890 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14891 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14892 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14893 SDNode *oye = N0.getOperand(0).getNode();
14894 if (NarrowLoad.getNode() != N0.getNode()) {
14895 CombineTo(N0.getNode(), NarrowLoad);
14896 // CombineTo deleted the truncate, if needed, but not what's under it.
14897 AddToWorklist(oye);
14898 }
14899 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14900 }
14901
14902 EVT SrcVT = N0.getOperand(0).getValueType();
14903 EVT MinVT = N0.getValueType();
14904
14905 if (N->getFlags().hasNonNeg()) {
14906 SDValue Op = N0.getOperand(0);
14907 unsigned OpBits = SrcVT.getScalarSizeInBits();
14908 unsigned MidBits = MinVT.getScalarSizeInBits();
14909 unsigned DestBits = VT.getScalarSizeInBits();
14910
14911 if (N0->getFlags().hasNoSignedWrap() ||
14912 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14913 if (OpBits == DestBits) {
14914 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14915 // bits, it is already ready.
14916 return Op;
14917 }
14918
14919 if (OpBits < DestBits) {
14920 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14921 // bits, just sext from i32.
14922 // FIXME: This can probably be ZERO_EXTEND nneg?
14923 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14924 }
14925
14926 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14927 // bits, just truncate to i32.
14928 SDNodeFlags Flags;
14929 Flags.setNoSignedWrap(true);
14930 Flags.setNoUnsignedWrap(true);
14931 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14932 }
14933 }
14934
14935 // Try to mask before the extension to avoid having to generate a larger mask,
14936 // possibly over several sub-vectors.
14937 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14938 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14940 SDValue Op = N0.getOperand(0);
14941 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14942 AddToWorklist(Op.getNode());
14943 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14944 // Transfer the debug info; the new node is equivalent to N0.
14945 DAG.transferDbgValues(N0, ZExtOrTrunc);
14946 return ZExtOrTrunc;
14947 }
14948 }
14949
14950 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
14951 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14952 AddToWorklist(Op.getNode());
14953 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
14954 // We may safely transfer the debug info describing the truncate node over
14955 // to the equivalent and operation.
14956 DAG.transferDbgValues(N0, And);
14957 return And;
14958 }
14959 }
14960
14961 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
14962 // if either of the casts is not free.
14963 if (N0.getOpcode() == ISD::AND &&
14964 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14965 N0.getOperand(1).getOpcode() == ISD::Constant &&
14966 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
14967 !TLI.isZExtFree(N0.getValueType(), VT))) {
14968 SDValue X = N0.getOperand(0).getOperand(0);
14969 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
14970 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14971 return DAG.getNode(ISD::AND, DL, VT,
14972 X, DAG.getConstant(Mask, DL, VT));
14973 }
14974
14975 // Try to simplify (zext (load x)).
14976 if (SDValue foldedExt = tryToFoldExtOfLoad(
14977 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
14978 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
14979 return foldedExt;
14980
14981 if (SDValue foldedExt =
14982 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14984 return foldedExt;
14985
14986 // fold (zext (load x)) to multiple smaller zextloads.
14987 // Only on illegal but splittable vectors.
14988 if (SDValue ExtLoad = CombineExtLoad(N))
14989 return ExtLoad;
14990
14991 // Try to simplify (zext (atomic_load x)).
14992 if (SDValue foldedExt =
14993 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
14994 return foldedExt;
14995
14996 // fold (zext (and/or/xor (load x), cst)) ->
14997 // (and/or/xor (zextload x), (zext cst))
14998 // Unless (and (load x) cst) will match as a zextload already and has
14999 // additional users, or the zext is already free.
15000 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
15001 isa<LoadSDNode>(N0.getOperand(0)) &&
15002 N0.getOperand(1).getOpcode() == ISD::Constant &&
15003 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
15004 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
15005 EVT MemVT = LN00->getMemoryVT();
15006 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
15007 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
15008 bool DoXform = true;
15010 if (!N0.hasOneUse()) {
15011 if (N0.getOpcode() == ISD::AND) {
15012 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
15013 EVT LoadResultTy = AndC->getValueType(0);
15014 EVT ExtVT;
15015 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
15016 DoXform = false;
15017 }
15018 }
15019 if (DoXform)
15020 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
15021 ISD::ZERO_EXTEND, SetCCs, TLI);
15022 if (DoXform) {
15023 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
15024 LN00->getChain(), LN00->getBasePtr(),
15025 LN00->getMemoryVT(),
15026 LN00->getMemOperand());
15027 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15028 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
15029 ExtLoad, DAG.getConstant(Mask, DL, VT));
15030 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
15031 bool NoReplaceTruncAnd = !N0.hasOneUse();
15032 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
15033 CombineTo(N, And);
15034 // If N0 has multiple uses, change other uses as well.
15035 if (NoReplaceTruncAnd) {
15036 SDValue TruncAnd =
15038 CombineTo(N0.getNode(), TruncAnd);
15039 }
15040 if (NoReplaceTrunc) {
15041 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15042 } else {
15043 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15044 LN00->getValueType(0), ExtLoad);
15045 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15046 }
15047 return SDValue(N,0); // Return N so it doesn't get rechecked!
15048 }
15049 }
15050 }
15051
15052 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15053 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15054 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15055 return ZExtLoad;
15056
15057 // Try to simplify (zext (zextload x)).
15058 if (SDValue foldedExt = tryToFoldExtOfExtload(
15059 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
15060 return foldedExt;
15061
15062 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15063 return V;
15064
15065 if (N0.getOpcode() == ISD::SETCC) {
15066 // Propagate fast-math-flags.
15067 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15068
15069 // Only do this before legalize for now.
15070 if (!LegalOperations && VT.isVector() &&
15071 N0.getValueType().getVectorElementType() == MVT::i1) {
15072 EVT N00VT = N0.getOperand(0).getValueType();
15073 if (getSetCCResultType(N00VT) == N0.getValueType())
15074 return SDValue();
15075
15076 // We know that the # elements of the results is the same as the #
15077 // elements of the compare (and the # elements of the compare result for
15078 // that matter). Check to see that they are the same size. If so, we know
15079 // that the element size of the sext'd result matches the element size of
15080 // the compare operands.
15081 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15082 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15083 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
15084 N0.getOperand(1), N0.getOperand(2));
15085 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15086 }
15087
15088 // If the desired elements are smaller or larger than the source
15089 // elements we can use a matching integer vector type and then
15090 // truncate/any extend followed by zext_in_reg.
15091 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15092 SDValue VsetCC =
15093 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15094 N0.getOperand(1), N0.getOperand(2));
15095 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15096 N0.getValueType());
15097 }
15098
15099 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15100 EVT N0VT = N0.getValueType();
15101 EVT N00VT = N0.getOperand(0).getValueType();
15102 if (SDValue SCC = SimplifySelectCC(
15103 DL, N0.getOperand(0), N0.getOperand(1),
15104 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15105 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15106 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15107 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15108 }
15109
15110 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15111 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15112 !TLI.isZExtFree(N0, VT)) {
15113 SDValue ShVal = N0.getOperand(0);
15114 SDValue ShAmt = N0.getOperand(1);
15115 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15116 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15117 if (N0.getOpcode() == ISD::SHL) {
15118 // If the original shl may be shifting out bits, do not perform this
15119 // transformation.
15120 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15121 ShVal.getOperand(0).getValueSizeInBits();
15122 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15123 // If the shift is too large, then see if we can deduce that the
15124 // shift is safe anyway.
15125
15126 // Check if the bits being shifted out are known to be zero.
15127 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15128 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15129 return SDValue();
15130 }
15131 }
15132
15133 // Ensure that the shift amount is wide enough for the shifted value.
15134 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15135 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15136
15137 return DAG.getNode(N0.getOpcode(), DL, VT,
15138 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15139 }
15140 }
15141 }
15142
15143 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15144 return NewVSel;
15145
15146 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15147 return NewCtPop;
15148
15149 if (SDValue V = widenAbs(N, DAG))
15150 return V;
15151
15152 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15153 return Res;
15154
15155 // CSE zext nneg with sext if the zext is not free.
15156 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15157 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15158 if (CSENode)
15159 return SDValue(CSENode, 0);
15160 }
15161
15162 return SDValue();
15163}
15164
15165SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15166 SDValue N0 = N->getOperand(0);
15167 EVT VT = N->getValueType(0);
15168 SDLoc DL(N);
15169
15170 // aext(undef) = undef
15171 if (N0.isUndef())
15172 return DAG.getUNDEF(VT);
15173
15174 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15175 return Res;
15176
15177 // fold (aext (aext x)) -> (aext x)
15178 // fold (aext (zext x)) -> (zext x)
15179 // fold (aext (sext x)) -> (sext x)
15180 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15181 N0.getOpcode() == ISD::SIGN_EXTEND) {
15182 SDNodeFlags Flags;
15183 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15184 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15185 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15186 }
15187
15188 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15189 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15190 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15194 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15195
15196 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15197 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15198 if (N0.getOpcode() == ISD::TRUNCATE) {
15199 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15200 SDNode *oye = N0.getOperand(0).getNode();
15201 if (NarrowLoad.getNode() != N0.getNode()) {
15202 CombineTo(N0.getNode(), NarrowLoad);
15203 // CombineTo deleted the truncate, if needed, but not what's under it.
15204 AddToWorklist(oye);
15205 }
15206 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15207 }
15208 }
15209
15210 // fold (aext (truncate x))
15211 if (N0.getOpcode() == ISD::TRUNCATE)
15212 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15213
15214 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15215 // if the trunc is not free.
15216 if (N0.getOpcode() == ISD::AND &&
15217 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15218 N0.getOperand(1).getOpcode() == ISD::Constant &&
15219 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15220 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15221 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15222 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15223 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15224 }
15225
15226 // fold (aext (load x)) -> (aext (truncate (extload x)))
15227 // None of the supported targets knows how to perform load and any_ext
15228 // on vectors in one instruction, so attempt to fold to zext instead.
15229 if (VT.isVector()) {
15230 // Try to simplify (zext (load x)).
15231 if (SDValue foldedExt =
15232 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15234 return foldedExt;
15235 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15238 bool DoXform = true;
15240 if (!N0.hasOneUse())
15241 DoXform =
15242 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15243 if (DoXform) {
15244 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15245 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15246 LN0->getBasePtr(), N0.getValueType(),
15247 LN0->getMemOperand());
15248 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15249 // If the load value is used only by N, replace it via CombineTo N.
15250 bool NoReplaceTrunc = N0.hasOneUse();
15251 CombineTo(N, ExtLoad);
15252 if (NoReplaceTrunc) {
15253 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15254 recursivelyDeleteUnusedNodes(LN0);
15255 } else {
15256 SDValue Trunc =
15257 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15258 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15259 }
15260 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15261 }
15262 }
15263
15264 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15265 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15266 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15267 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15268 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15269 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15270 ISD::LoadExtType ExtType = LN0->getExtensionType();
15271 EVT MemVT = LN0->getMemoryVT();
15272 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15273 SDValue ExtLoad =
15274 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15275 MemVT, LN0->getMemOperand());
15276 CombineTo(N, ExtLoad);
15277 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15278 recursivelyDeleteUnusedNodes(LN0);
15279 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15280 }
15281 }
15282
15283 if (N0.getOpcode() == ISD::SETCC) {
15284 // Propagate fast-math-flags.
15285 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15286
15287 // For vectors:
15288 // aext(setcc) -> vsetcc
15289 // aext(setcc) -> truncate(vsetcc)
15290 // aext(setcc) -> aext(vsetcc)
15291 // Only do this before legalize for now.
15292 if (VT.isVector() && !LegalOperations) {
15293 EVT N00VT = N0.getOperand(0).getValueType();
15294 if (getSetCCResultType(N00VT) == N0.getValueType())
15295 return SDValue();
15296
15297 // We know that the # elements of the results is the same as the
15298 // # elements of the compare (and the # elements of the compare result
15299 // for that matter). Check to see that they are the same size. If so,
15300 // we know that the element size of the sext'd result matches the
15301 // element size of the compare operands.
15302 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15303 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15304 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15305
15306 // If the desired elements are smaller or larger than the source
15307 // elements we can use a matching integer vector type and then
15308 // truncate/any extend
15309 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15310 SDValue VsetCC = DAG.getSetCC(
15311 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15312 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15313 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15314 }
15315
15316 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15317 if (SDValue SCC = SimplifySelectCC(
15318 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15319 DAG.getConstant(0, DL, VT),
15320 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15321 return SCC;
15322 }
15323
15324 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15325 return NewCtPop;
15326
15327 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15328 return Res;
15329
15330 return SDValue();
15331}
15332
15333SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15334 unsigned Opcode = N->getOpcode();
15335 SDValue N0 = N->getOperand(0);
15336 SDValue N1 = N->getOperand(1);
15337 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15338
15339 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15340 if (N0.getOpcode() == Opcode &&
15341 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15342 return N0;
15343
15344 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15345 N0.getOperand(0).getOpcode() == Opcode) {
15346 // We have an assert, truncate, assert sandwich. Make one stronger assert
15347 // by asserting on the smallest asserted type to the larger source type.
15348 // This eliminates the later assert:
15349 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15350 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15351 SDLoc DL(N);
15352 SDValue BigA = N0.getOperand(0);
15353 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15354 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15355 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15356 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15357 BigA.getOperand(0), MinAssertVTVal);
15358 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15359 }
15360
15361 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15362 // than X. Just move the AssertZext in front of the truncate and drop the
15363 // AssertSExt.
15364 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15366 Opcode == ISD::AssertZext) {
15367 SDValue BigA = N0.getOperand(0);
15368 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15369 if (AssertVT.bitsLT(BigA_AssertVT)) {
15370 SDLoc DL(N);
15371 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15372 BigA.getOperand(0), N1);
15373 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15374 }
15375 }
15376
15377 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15379 const APInt &Mask = N0.getConstantOperandAPInt(1);
15380
15381 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15382 // than X, and the And doesn't change the lower iX bits, we can move the
15383 // AssertZext in front of the And and drop the AssertSext.
15384 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15385 SDValue BigA = N0.getOperand(0);
15386 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15387 if (AssertVT.bitsLT(BigA_AssertVT) &&
15388 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15389 SDLoc DL(N);
15390 SDValue NewAssert =
15391 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15392 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15393 N0.getOperand(1));
15394 }
15395 }
15396
15397 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15398 // fail.
15399 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15400 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15401 return N0;
15402 }
15403
15404 return SDValue();
15405}
15406
15407SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15408 SDLoc DL(N);
15409
15410 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15411 SDValue N0 = N->getOperand(0);
15412
15413 // Fold (assertalign (assertalign x, AL0), AL1) ->
15414 // (assertalign x, max(AL0, AL1))
15415 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15416 return DAG.getAssertAlign(DL, N0.getOperand(0),
15417 std::max(AL, AAN->getAlign()));
15418
15419 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15420 // this assert down to source operands so that those arithmetic ops could be
15421 // exposed to the DAG combining.
15422 switch (N0.getOpcode()) {
15423 default:
15424 break;
15425 case ISD::ADD:
15426 case ISD::PTRADD:
15427 case ISD::SUB: {
15428 unsigned AlignShift = Log2(AL);
15429 SDValue LHS = N0.getOperand(0);
15430 SDValue RHS = N0.getOperand(1);
15431 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15432 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15433 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15434 if (LHSAlignShift < AlignShift)
15435 LHS = DAG.getAssertAlign(DL, LHS, AL);
15436 if (RHSAlignShift < AlignShift)
15437 RHS = DAG.getAssertAlign(DL, RHS, AL);
15438 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15439 }
15440 break;
15441 }
15442 }
15443
15444 return SDValue();
15445}
15446
15447/// If the result of a load is shifted/masked/truncated to an effectively
15448/// narrower type, try to transform the load to a narrower type and/or
15449/// use an extending load.
15450SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15451 unsigned Opc = N->getOpcode();
15452
15454 SDValue N0 = N->getOperand(0);
15455 EVT VT = N->getValueType(0);
15456 EVT ExtVT = VT;
15457
15458 // This transformation isn't valid for vector loads.
15459 if (VT.isVector())
15460 return SDValue();
15461
15462 // The ShAmt variable is used to indicate that we've consumed a right
15463 // shift. I.e. we want to narrow the width of the load by skipping to load the
15464 // ShAmt least significant bits.
15465 unsigned ShAmt = 0;
15466 // A special case is when the least significant bits from the load are masked
15467 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15468 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15469 // the result.
15470 unsigned ShiftedOffset = 0;
15471 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15472 // extended to VT.
15473 if (Opc == ISD::SIGN_EXTEND_INREG) {
15474 ExtType = ISD::SEXTLOAD;
15475 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15476 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15477 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15478 // value, or it may be shifting a higher subword, half or byte into the
15479 // lowest bits.
15480
15481 // Only handle shift with constant shift amount, and the shiftee must be a
15482 // load.
15483 auto *LN = dyn_cast<LoadSDNode>(N0);
15484 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15485 if (!N1C || !LN)
15486 return SDValue();
15487 // If the shift amount is larger than the memory type then we're not
15488 // accessing any of the loaded bytes.
15489 ShAmt = N1C->getZExtValue();
15490 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15491 if (MemoryWidth <= ShAmt)
15492 return SDValue();
15493 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15494 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15495 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15496 // If original load is a SEXTLOAD then we can't simply replace it by a
15497 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15498 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15499 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15500 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15501 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15502 LN->getExtensionType() != ExtType)
15503 return SDValue();
15504 } else if (Opc == ISD::AND) {
15505 // An AND with a constant mask is the same as a truncate + zero-extend.
15506 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15507 if (!AndC)
15508 return SDValue();
15509
15510 const APInt &Mask = AndC->getAPIntValue();
15511 unsigned ActiveBits = 0;
15512 if (Mask.isMask()) {
15513 ActiveBits = Mask.countr_one();
15514 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15515 ShiftedOffset = ShAmt;
15516 } else {
15517 return SDValue();
15518 }
15519
15520 ExtType = ISD::ZEXTLOAD;
15521 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15522 }
15523
15524 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15525 // a right shift. Here we redo some of those checks, to possibly adjust the
15526 // ExtVT even further based on "a masking AND". We could also end up here for
15527 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15528 // need to be done here as well.
15529 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15530 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15531 // Bail out when the SRL has more than one use. This is done for historical
15532 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15533 // check below? And maybe it could be non-profitable to do the transform in
15534 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15535 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15536 if (!SRL.hasOneUse())
15537 return SDValue();
15538
15539 // Only handle shift with constant shift amount, and the shiftee must be a
15540 // load.
15541 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15542 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15543 if (!SRL1C || !LN)
15544 return SDValue();
15545
15546 // If the shift amount is larger than the input type then we're not
15547 // accessing any of the loaded bytes. If the load was a zextload/extload
15548 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15549 ShAmt = SRL1C->getZExtValue();
15550 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15551 if (ShAmt >= MemoryWidth)
15552 return SDValue();
15553
15554 // Because a SRL must be assumed to *need* to zero-extend the high bits
15555 // (as opposed to anyext the high bits), we can't combine the zextload
15556 // lowering of SRL and an sextload.
15557 if (LN->getExtensionType() == ISD::SEXTLOAD)
15558 return SDValue();
15559
15560 // Avoid reading outside the memory accessed by the original load (could
15561 // happened if we only adjust the load base pointer by ShAmt). Instead we
15562 // try to narrow the load even further. The typical scenario here is:
15563 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15564 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15565 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15566 // Don't replace sextload by zextload.
15567 if (ExtType == ISD::SEXTLOAD)
15568 return SDValue();
15569 // Narrow the load.
15570 ExtType = ISD::ZEXTLOAD;
15571 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15572 }
15573
15574 // If the SRL is only used by a masking AND, we may be able to adjust
15575 // the ExtVT to make the AND redundant.
15576 SDNode *Mask = *(SRL->user_begin());
15577 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15578 isa<ConstantSDNode>(Mask->getOperand(1))) {
15579 unsigned Offset, ActiveBits;
15580 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15581 if (ShiftMask.isMask()) {
15582 EVT MaskedVT =
15583 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15584 // If the mask is smaller, recompute the type.
15585 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15586 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15587 ExtVT = MaskedVT;
15588 } else if (ExtType == ISD::ZEXTLOAD &&
15589 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15590 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15591 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15592 // If the mask is shifted we can use a narrower load and a shl to insert
15593 // the trailing zeros.
15594 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15595 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15596 ExtVT = MaskedVT;
15597 ShAmt = Offset + ShAmt;
15598 ShiftedOffset = Offset;
15599 }
15600 }
15601 }
15602
15603 N0 = SRL.getOperand(0);
15604 }
15605
15606 // If the load is shifted left (and the result isn't shifted back right), we
15607 // can fold a truncate through the shift. The typical scenario is that N
15608 // points at a TRUNCATE here so the attempted fold is:
15609 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15610 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15611 unsigned ShLeftAmt = 0;
15612 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15613 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15614 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15615 ShLeftAmt = N01->getZExtValue();
15616 N0 = N0.getOperand(0);
15617 }
15618 }
15619
15620 // If we haven't found a load, we can't narrow it.
15621 if (!isa<LoadSDNode>(N0))
15622 return SDValue();
15623
15624 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15625 // Reducing the width of a volatile load is illegal. For atomics, we may be
15626 // able to reduce the width provided we never widen again. (see D66309)
15627 if (!LN0->isSimple() ||
15628 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15629 return SDValue();
15630
15631 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15632 unsigned LVTStoreBits =
15634 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15635 return LVTStoreBits - EVTStoreBits - ShAmt;
15636 };
15637
15638 // We need to adjust the pointer to the load by ShAmt bits in order to load
15639 // the correct bytes.
15640 unsigned PtrAdjustmentInBits =
15641 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15642
15643 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15644 SDLoc DL(LN0);
15645 // The original load itself didn't wrap, so an offset within it doesn't.
15646 SDValue NewPtr =
15649 AddToWorklist(NewPtr.getNode());
15650
15651 SDValue Load;
15652 if (ExtType == ISD::NON_EXTLOAD) {
15653 const MDNode *OldRanges = LN0->getRanges();
15654 const MDNode *NewRanges = nullptr;
15655 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15656 // metadata is not the full-set for the new width then create a NewRanges
15657 // metadata for the truncated load
15658 if (ShAmt == 0 && OldRanges) {
15659 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
15660 unsigned BitSize = VT.getScalarSizeInBits();
15661
15662 // It is possible for an 8-bit extending load with 8-bit range
15663 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15664 // ensure that truncation is strictly smaller.
15665 if (CR.getBitWidth() > BitSize) {
15666 ConstantRange TruncatedCR = CR.truncate(BitSize);
15667 if (!TruncatedCR.isFullSet()) {
15668 Metadata *Bounds[2] = {
15670 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15672 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15673 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15674 }
15675 } else if (CR.getBitWidth() == BitSize)
15676 NewRanges = OldRanges;
15677 }
15678 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15679 LN0->getPointerInfo().getWithOffset(PtrOff),
15680 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15681 LN0->getAAInfo(), NewRanges);
15682 } else
15683 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15684 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15685 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15686 LN0->getAAInfo());
15687
15688 // Replace the old load's chain with the new load's chain.
15689 WorklistRemover DeadNodes(*this);
15690 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15691
15692 // Shift the result left, if we've swallowed a left shift.
15694 if (ShLeftAmt != 0) {
15695 // If the shift amount is as large as the result size (but, presumably,
15696 // no larger than the source) then the useful bits of the result are
15697 // zero; we can't simply return the shortened shift, because the result
15698 // of that operation is undefined.
15699 if (ShLeftAmt >= VT.getScalarSizeInBits())
15700 Result = DAG.getConstant(0, DL, VT);
15701 else
15702 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15703 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15704 }
15705
15706 if (ShiftedOffset != 0) {
15707 // We're using a shifted mask, so the load now has an offset. This means
15708 // that data has been loaded into the lower bytes than it would have been
15709 // before, so we need to shl the loaded data into the correct position in the
15710 // register.
15711 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15712 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15713 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15714 }
15715
15716 // Return the new loaded value.
15717 return Result;
15718}
15719
15720SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15721 SDValue N0 = N->getOperand(0);
15722 SDValue N1 = N->getOperand(1);
15723 EVT VT = N->getValueType(0);
15724 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15725 unsigned VTBits = VT.getScalarSizeInBits();
15726 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15727 SDLoc DL(N);
15728
15729 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15730 if (N0.isUndef())
15731 return DAG.getConstant(0, DL, VT);
15732
15733 // fold (sext_in_reg c1) -> c1
15734 if (SDValue C =
15736 return C;
15737
15738 // If the input is already sign extended, just drop the extension.
15739 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15740 return N0;
15741
15742 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15743 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15744 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15745 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15746
15747 // fold (sext_in_reg (sext x)) -> (sext x)
15748 // fold (sext_in_reg (aext x)) -> (sext x)
15749 // if x is small enough or if we know that x has more than 1 sign bit and the
15750 // sign_extend_inreg is extending from one of them.
15751 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15752 SDValue N00 = N0.getOperand(0);
15753 unsigned N00Bits = N00.getScalarValueSizeInBits();
15754 if ((N00Bits <= ExtVTBits ||
15755 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
15756 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15757 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15758 }
15759
15760 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15761 // if x is small enough or if we know that x has more than 1 sign bit and the
15762 // sign_extend_inreg is extending from one of them.
15764 SDValue N00 = N0.getOperand(0);
15765 unsigned N00Bits = N00.getScalarValueSizeInBits();
15766 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15767 if ((N00Bits == ExtVTBits ||
15768 (!IsZext && (N00Bits < ExtVTBits ||
15769 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15770 (!LegalOperations ||
15772 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15773 }
15774
15775 // fold (sext_in_reg (zext x)) -> (sext x)
15776 // iff we are extending the source sign bit.
15777 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15778 SDValue N00 = N0.getOperand(0);
15779 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15780 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15781 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15782 }
15783
15784 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15785 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15786 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15787
15788 // fold operands of sext_in_reg based on knowledge that the top bits are not
15789 // demanded.
15791 return SDValue(N, 0);
15792
15793 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15794 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15795 if (SDValue NarrowLoad = reduceLoadWidth(N))
15796 return NarrowLoad;
15797
15798 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15799 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15800 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15801 if (N0.getOpcode() == ISD::SRL) {
15802 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15803 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15804 // We can turn this into an SRA iff the input to the SRL is already sign
15805 // extended enough.
15806 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15807 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15808 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15809 N0.getOperand(1));
15810 }
15811 }
15812
15813 // fold (sext_inreg (extload x)) -> (sextload x)
15814 // If sextload is not supported by target, we can only do the combine when
15815 // load has one use. Doing otherwise can block folding the extload with other
15816 // extends that the target does support.
15818 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15819 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15820 N0.hasOneUse()) ||
15821 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15822 auto *LN0 = cast<LoadSDNode>(N0);
15823 SDValue ExtLoad =
15824 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15825 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15826 CombineTo(N, ExtLoad);
15827 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15828 AddToWorklist(ExtLoad.getNode());
15829 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15830 }
15831
15832 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15834 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15835 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15836 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15837 auto *LN0 = cast<LoadSDNode>(N0);
15838 SDValue ExtLoad =
15839 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15840 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15841 CombineTo(N, ExtLoad);
15842 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15843 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15844 }
15845
15846 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15847 // ignore it if the masked load is already sign extended
15848 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15849 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15850 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15851 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15852 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15853 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15854 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15855 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15856 CombineTo(N, ExtMaskedLoad);
15857 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15858 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15859 }
15860 }
15861
15862 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15863 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15864 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15866 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15867 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15868
15869 SDValue ExtLoad = DAG.getMaskedGather(
15870 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15871 GN0->getIndexType(), ISD::SEXTLOAD);
15872
15873 CombineTo(N, ExtLoad);
15874 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15875 AddToWorklist(ExtLoad.getNode());
15876 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15877 }
15878 }
15879
15880 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15881 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15882 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15883 N0.getOperand(1), false))
15884 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15885 }
15886
15887 // Fold (iM_signext_inreg
15888 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15889 // from iN)
15890 // -> (extract_subvector (signext iN_v to iM))
15891 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15893 SDValue InnerExt = N0.getOperand(0);
15894 EVT InnerExtVT = InnerExt->getValueType(0);
15895 SDValue Extendee = InnerExt->getOperand(0);
15896
15897 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15898 (!LegalOperations ||
15899 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15900 SDValue SignExtExtendee =
15901 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15902 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15903 N0.getOperand(1));
15904 }
15905 }
15906
15907 return SDValue();
15908}
15909
15911 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15912 bool LegalOperations) {
15913 unsigned InregOpcode = N->getOpcode();
15914 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15915
15916 SDValue Src = N->getOperand(0);
15917 EVT VT = N->getValueType(0);
15918 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15919 Src.getValueType().getVectorElementType(),
15921
15922 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15923 "Expected EXTEND_VECTOR_INREG dag node in input!");
15924
15925 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15926 // FIXME: one-use check may be overly restrictive
15927 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15928 return SDValue();
15929
15930 // Profitability check: we must be extending exactly one of it's operands.
15931 // FIXME: this is probably overly restrictive.
15932 Src = Src.getOperand(0);
15933 if (Src.getValueType() != SrcVT)
15934 return SDValue();
15935
15936 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15937 return SDValue();
15938
15939 return DAG.getNode(Opcode, DL, VT, Src);
15940}
15941
15942SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15943 SDValue N0 = N->getOperand(0);
15944 EVT VT = N->getValueType(0);
15945 SDLoc DL(N);
15946
15947 if (N0.isUndef()) {
15948 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15949 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
15950 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
15951 ? DAG.getUNDEF(VT)
15952 : DAG.getConstant(0, DL, VT);
15953 }
15954
15955 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15956 return Res;
15957
15959 return SDValue(N, 0);
15960
15962 LegalOperations))
15963 return R;
15964
15965 return SDValue();
15966}
15967
15968SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
15969 EVT VT = N->getValueType(0);
15970 SDValue N0 = N->getOperand(0);
15971
15972 SDValue FPVal;
15973 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
15975 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
15976 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
15977 DAG.getValueType(VT.getScalarType()));
15978
15979 return SDValue();
15980}
15981
15982/// Detect patterns of truncation with unsigned saturation:
15983///
15984/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
15985/// Return the source value x to be truncated or SDValue() if the pattern was
15986/// not matched.
15987///
15989 unsigned NumDstBits = VT.getScalarSizeInBits();
15990 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15991 // Saturation with truncation. We truncate from InVT to VT.
15992 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
15993
15994 SDValue Min;
15995 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
15996 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
15997 return Min;
15998
15999 return SDValue();
16000}
16001
16002/// Detect patterns of truncation with signed saturation:
16003/// (truncate (smin (smax (x, signed_min_of_dest_type),
16004/// signed_max_of_dest_type)) to dest_type)
16005/// or:
16006/// (truncate (smax (smin (x, signed_max_of_dest_type),
16007/// signed_min_of_dest_type)) to dest_type).
16008///
16009/// Return the source value to be truncated or SDValue() if the pattern was not
16010/// matched.
16012 unsigned NumDstBits = VT.getScalarSizeInBits();
16013 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16014 // Saturation with truncation. We truncate from InVT to VT.
16015 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16016
16017 SDValue Val;
16018 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16019 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16020
16021 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
16022 m_SpecificInt(SignedMax))))
16023 return Val;
16024
16025 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
16026 m_SpecificInt(SignedMin))))
16027 return Val;
16028
16029 return SDValue();
16030}
16031
16032/// Detect patterns of truncation with unsigned saturation:
16034 const SDLoc &DL) {
16035 unsigned NumDstBits = VT.getScalarSizeInBits();
16036 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16037 // Saturation with truncation. We truncate from InVT to VT.
16038 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16039
16040 SDValue Val;
16041 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16042 // Min == 0, Max is unsigned max of destination type.
16043 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
16044 m_Zero())))
16045 return Val;
16046
16047 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
16048 m_SpecificInt(UnsignedMax))))
16049 return Val;
16050
16051 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
16052 m_SpecificInt(UnsignedMax))))
16053 return Val;
16054
16055 return SDValue();
16056}
16057
16058static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
16059 SDLoc &DL, const TargetLowering &TLI,
16060 SelectionDAG &DAG) {
16061 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
16062 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
16063 TLI.isTypeDesirableForOp(Opc, VT));
16064 };
16065
16066 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
16067 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
16068 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
16069 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
16070 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16071 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16072 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16073 } else if (Src.getOpcode() == ISD::UMIN) {
16074 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16075 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16076 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16077 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
16078 if (SDValue USatVal = detectUSatUPattern(Src, VT))
16079 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
16080 }
16081
16082 return SDValue();
16083}
16084
16085SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16086 SDValue N0 = N->getOperand(0);
16087 EVT VT = N->getValueType(0);
16088 EVT SrcVT = N0.getValueType();
16089 bool isLE = DAG.getDataLayout().isLittleEndian();
16090 SDLoc DL(N);
16091
16092 // trunc(undef) = undef
16093 if (N0.isUndef())
16094 return DAG.getUNDEF(VT);
16095
16096 // fold (truncate (truncate x)) -> (truncate x)
16097 if (N0.getOpcode() == ISD::TRUNCATE)
16098 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16099
16100 // fold saturated truncate
16101 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16102 return SaturatedTR;
16103
16104 // fold (truncate c1) -> c1
16105 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16106 return C;
16107
16108 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16109 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16110 N0.getOpcode() == ISD::SIGN_EXTEND ||
16111 N0.getOpcode() == ISD::ANY_EXTEND) {
16112 // if the source is smaller than the dest, we still need an extend.
16113 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16114 SDNodeFlags Flags;
16115 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16116 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16117 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16118 }
16119 // if the source is larger than the dest, than we just need the truncate.
16120 if (N0.getOperand(0).getValueType().bitsGT(VT))
16121 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16122 // if the source and dest are the same type, we can drop both the extend
16123 // and the truncate.
16124 return N0.getOperand(0);
16125 }
16126
16127 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16128 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16129 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16130 N0.hasOneUse()) {
16131 SDValue X = N0.getOperand(0);
16132 SDValue ExtVal = N0.getOperand(1);
16133 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16134 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16135 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16136 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16137 }
16138 }
16139
16140 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16141 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16142 return SDValue();
16143
16144 // Fold extract-and-trunc into a narrow extract. For example:
16145 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16146 // i32 y = TRUNCATE(i64 x)
16147 // -- becomes --
16148 // v16i8 b = BITCAST (v2i64 val)
16149 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16150 //
16151 // Note: We only run this optimization after type legalization (which often
16152 // creates this pattern) and before operation legalization after which
16153 // we need to be more careful about the vector instructions that we generate.
16154 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16155 N0->hasOneUse()) {
16156 EVT TrTy = N->getValueType(0);
16157 SDValue Src = N0;
16158
16159 // Check for cases where we shift down an upper element before truncation.
16160 int EltOffset = 0;
16161 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16162 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16163 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16164 Src = Src.getOperand(0);
16165 EltOffset = *ShAmt / TrTy.getSizeInBits();
16166 }
16167 }
16168 }
16169
16170 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16171 EVT VecTy = Src.getOperand(0).getValueType();
16172 EVT ExTy = Src.getValueType();
16173
16174 auto EltCnt = VecTy.getVectorElementCount();
16175 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16176 auto NewEltCnt = EltCnt * SizeRatio;
16177
16178 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16179 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16180
16181 SDValue EltNo = Src->getOperand(1);
16182 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16183 int Elt = EltNo->getAsZExtVal();
16184 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16185 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16186 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16187 DAG.getBitcast(NVT, Src.getOperand(0)),
16188 DAG.getVectorIdxConstant(Index, DL));
16189 }
16190 }
16191 }
16192
16193 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16194 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16195 TLI.isTruncateFree(SrcVT, VT)) {
16196 if (!LegalOperations ||
16197 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16198 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16199 SDLoc SL(N0);
16200 SDValue Cond = N0.getOperand(0);
16201 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16202 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16203 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16204 }
16205 }
16206
16207 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16208 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16209 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16210 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16211 SDValue Amt = N0.getOperand(1);
16212 KnownBits Known = DAG.computeKnownBits(Amt);
16213 unsigned Size = VT.getScalarSizeInBits();
16214 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16215 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16216 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16217 if (AmtVT != Amt.getValueType()) {
16218 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16219 AddToWorklist(Amt.getNode());
16220 }
16221 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16222 }
16223 }
16224
16225 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16226 return V;
16227
16228 if (SDValue ABD = foldABSToABD(N, DL))
16229 return ABD;
16230
16231 // Attempt to pre-truncate BUILD_VECTOR sources.
16232 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16233 N0.hasOneUse() &&
16234 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16235 // Avoid creating illegal types if running after type legalizer.
16236 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16237 EVT SVT = VT.getScalarType();
16238 SmallVector<SDValue, 8> TruncOps;
16239 for (const SDValue &Op : N0->op_values()) {
16240 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16241 TruncOps.push_back(TruncOp);
16242 }
16243 return DAG.getBuildVector(VT, DL, TruncOps);
16244 }
16245
16246 // trunc (splat_vector x) -> splat_vector (trunc x)
16247 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16248 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16249 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16250 EVT SVT = VT.getScalarType();
16251 return DAG.getSplatVector(
16252 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16253 }
16254
16255 // Fold a series of buildvector, bitcast, and truncate if possible.
16256 // For example fold
16257 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16258 // (2xi32 (buildvector x, y)).
16259 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16260 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16262 N0.getOperand(0).hasOneUse()) {
16263 SDValue BuildVect = N0.getOperand(0);
16264 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16265 EVT TruncVecEltTy = VT.getVectorElementType();
16266
16267 // Check that the element types match.
16268 if (BuildVectEltTy == TruncVecEltTy) {
16269 // Now we only need to compute the offset of the truncated elements.
16270 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16271 unsigned TruncVecNumElts = VT.getVectorNumElements();
16272 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16273 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16274
16275 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16276 "Invalid number of elements");
16277
16279 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16280 i += TruncEltOffset)
16281 Opnds.push_back(BuildVect.getOperand(i));
16282
16283 return DAG.getBuildVector(VT, DL, Opnds);
16284 }
16285 }
16286
16287 // fold (truncate (load x)) -> (smaller load x)
16288 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16289 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16290 if (SDValue Reduced = reduceLoadWidth(N))
16291 return Reduced;
16292
16293 // Handle the case where the truncated result is at least as wide as the
16294 // loaded type.
16295 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16296 auto *LN0 = cast<LoadSDNode>(N0);
16297 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16298 SDValue NewLoad = DAG.getExtLoad(
16299 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16300 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16301 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16302 return NewLoad;
16303 }
16304 }
16305 }
16306
16307 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16308 // where ... are all 'undef'.
16309 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16311 SDValue V;
16312 unsigned Idx = 0;
16313 unsigned NumDefs = 0;
16314
16315 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16316 SDValue X = N0.getOperand(i);
16317 if (!X.isUndef()) {
16318 V = X;
16319 Idx = i;
16320 NumDefs++;
16321 }
16322 // Stop if more than one members are non-undef.
16323 if (NumDefs > 1)
16324 break;
16325
16328 X.getValueType().getVectorElementCount()));
16329 }
16330
16331 if (NumDefs == 0)
16332 return DAG.getUNDEF(VT);
16333
16334 if (NumDefs == 1) {
16335 assert(V.getNode() && "The single defined operand is empty!");
16337 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16338 if (i != Idx) {
16339 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16340 continue;
16341 }
16342 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16343 AddToWorklist(NV.getNode());
16344 Opnds.push_back(NV);
16345 }
16346 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16347 }
16348 }
16349
16350 // Fold truncate of a bitcast of a vector to an extract of the low vector
16351 // element.
16352 //
16353 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16354 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16355 SDValue VecSrc = N0.getOperand(0);
16356 EVT VecSrcVT = VecSrc.getValueType();
16357 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16358 (!LegalOperations ||
16359 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16360 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16361 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16362 DAG.getVectorIdxConstant(Idx, DL));
16363 }
16364 }
16365
16366 // Simplify the operands using demanded-bits information.
16368 return SDValue(N, 0);
16369
16370 // fold (truncate (extract_subvector(ext x))) ->
16371 // (extract_subvector x)
16372 // TODO: This can be generalized to cover cases where the truncate and extract
16373 // do not fully cancel each other out.
16374 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16375 SDValue N00 = N0.getOperand(0);
16376 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16377 N00.getOpcode() == ISD::ZERO_EXTEND ||
16378 N00.getOpcode() == ISD::ANY_EXTEND) {
16379 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16381 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16382 N00.getOperand(0), N0.getOperand(1));
16383 }
16384 }
16385
16386 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16387 return NewVSel;
16388
16389 // Narrow a suitable binary operation with a non-opaque constant operand by
16390 // moving it ahead of the truncate. This is limited to pre-legalization
16391 // because targets may prefer a wider type during later combines and invert
16392 // this transform.
16393 switch (N0.getOpcode()) {
16394 case ISD::ADD:
16395 case ISD::SUB:
16396 case ISD::MUL:
16397 case ISD::AND:
16398 case ISD::OR:
16399 case ISD::XOR:
16400 if (!LegalOperations && N0.hasOneUse() &&
16401 (isConstantOrConstantVector(N0.getOperand(0), true) ||
16402 isConstantOrConstantVector(N0.getOperand(1), true))) {
16403 // TODO: We already restricted this to pre-legalization, but for vectors
16404 // we are extra cautious to not create an unsupported operation.
16405 // Target-specific changes are likely needed to avoid regressions here.
16406 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16407 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16408 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16409 SDNodeFlags Flags;
16410 // Propagate nuw for sub.
16411 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16413 N0->getOperand(0),
16415 VT.getScalarSizeInBits())))
16416 Flags.setNoUnsignedWrap(true);
16417 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
16418 }
16419 }
16420 break;
16421 case ISD::ADDE:
16422 case ISD::UADDO_CARRY:
16423 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16424 // (trunc uaddo_carry(X, Y, Carry)) ->
16425 // (uaddo_carry trunc(X), trunc(Y), Carry)
16426 // When the adde's carry is not used.
16427 // We only do for uaddo_carry before legalize operation
16428 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16429 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16430 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16431 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16432 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16433 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16434 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16435 }
16436 break;
16437 case ISD::USUBSAT:
16438 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16439 // enough to know that the upper bits are zero we must ensure that we don't
16440 // introduce an extra truncate.
16441 if (!LegalOperations && N0.hasOneUse() &&
16444 VT.getScalarSizeInBits() &&
16445 hasOperation(N0.getOpcode(), VT)) {
16446 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16447 DAG, DL);
16448 }
16449 break;
16450 case ISD::AVGFLOORS:
16451 case ISD::AVGFLOORU:
16452 case ISD::AVGCEILS:
16453 case ISD::AVGCEILU:
16454 case ISD::ABDS:
16455 case ISD::ABDU:
16456 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16457 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16458 if (!LegalOperations && N0.hasOneUse() &&
16459 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16460 EVT TruncVT = VT;
16461 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16462 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16463
16464 SDValue A = N0.getOperand(0);
16465 SDValue B = N0.getOperand(1);
16466 bool CanFold = false;
16467
16468 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16469 N0.getOpcode() == ISD::ABDU) {
16470 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16471 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16472 DAG.MaskedValueIsZero(A, UpperBits);
16473 } else {
16474 unsigned NeededBits = SrcBits - TruncBits;
16475 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16476 DAG.ComputeNumSignBits(A) > NeededBits;
16477 }
16478
16479 if (CanFold) {
16480 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16481 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16482 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16483 }
16484 }
16485 break;
16486 }
16487
16488 return SDValue();
16489}
16490
16491static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16492 SDValue Elt = N->getOperand(i);
16493 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16494 return Elt.getNode();
16495 return Elt.getOperand(Elt.getResNo()).getNode();
16496}
16497
16498/// build_pair (load, load) -> load
16499/// if load locations are consecutive.
16500SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16501 assert(N->getOpcode() == ISD::BUILD_PAIR);
16502
16503 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16504 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16505
16506 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16507 // most significant part in elt 1. So when combining into one large load, we
16508 // need to consider the endianness.
16509 if (DAG.getDataLayout().isBigEndian())
16510 std::swap(LD1, LD2);
16511
16512 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16513 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16514 LD1->getAddressSpace() != LD2->getAddressSpace())
16515 return SDValue();
16516
16517 unsigned LD1Fast = 0;
16518 EVT LD1VT = LD1->getValueType(0);
16519 unsigned LD1Bytes = LD1VT.getStoreSize();
16520 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16521 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16522 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16523 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16524 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16525 LD1->getPointerInfo(), LD1->getAlign());
16526
16527 return SDValue();
16528}
16529
16530static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16531 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16532 // and Lo parts; on big-endian machines it doesn't.
16533 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16534}
16535
16536SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16537 const TargetLowering &TLI) {
16538 // If this is not a bitcast to an FP type or if the target doesn't have
16539 // IEEE754-compliant FP logic, we're done.
16540 EVT VT = N->getValueType(0);
16541 SDValue N0 = N->getOperand(0);
16542 EVT SourceVT = N0.getValueType();
16543
16544 if (!VT.isFloatingPoint())
16545 return SDValue();
16546
16547 // TODO: Handle cases where the integer constant is a different scalar
16548 // bitwidth to the FP.
16549 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16550 return SDValue();
16551
16552 unsigned FPOpcode;
16553 APInt SignMask;
16554 switch (N0.getOpcode()) {
16555 case ISD::AND:
16556 FPOpcode = ISD::FABS;
16557 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16558 break;
16559 case ISD::XOR:
16560 FPOpcode = ISD::FNEG;
16561 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16562 break;
16563 case ISD::OR:
16564 FPOpcode = ISD::FABS;
16565 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16566 break;
16567 default:
16568 return SDValue();
16569 }
16570
16571 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16572 return SDValue();
16573
16574 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16575 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16576 // removing this would require more changes.
16577 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16578 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16579 return true;
16580
16581 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16582 };
16583
16584 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16585 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16586 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16587 // fneg (fabs X)
16588 SDValue LogicOp0 = N0.getOperand(0);
16589 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16590 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16591 IsBitCastOrFree(LogicOp0, VT)) {
16592 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16593 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16594 NumFPLogicOpsConv++;
16595 if (N0.getOpcode() == ISD::OR)
16596 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16597 return FPOp;
16598 }
16599
16600 return SDValue();
16601}
16602
16603SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16604 SDValue N0 = N->getOperand(0);
16605 EVT VT = N->getValueType(0);
16606
16607 if (N0.isUndef())
16608 return DAG.getUNDEF(VT);
16609
16610 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16611 // Only do this before legalize types, unless both types are integer and the
16612 // scalar type is legal. Only do this before legalize ops, since the target
16613 // maybe depending on the bitcast.
16614 // First check to see if this is all constant.
16615 // TODO: Support FP bitcasts after legalize types.
16616 if (VT.isVector() &&
16617 (!LegalTypes ||
16618 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16619 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16620 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16621 cast<BuildVectorSDNode>(N0)->isConstant())
16622 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16624
16625 // If the input is a constant, let getNode fold it.
16626 if (isIntOrFPConstant(N0)) {
16627 // If we can't allow illegal operations, we need to check that this is just
16628 // a fp -> int or int -> conversion and that the resulting operation will
16629 // be legal.
16630 if (!LegalOperations ||
16631 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16633 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16634 TLI.isOperationLegal(ISD::Constant, VT))) {
16635 SDValue C = DAG.getBitcast(VT, N0);
16636 if (C.getNode() != N)
16637 return C;
16638 }
16639 }
16640
16641 // (conv (conv x, t1), t2) -> (conv x, t2)
16642 if (N0.getOpcode() == ISD::BITCAST)
16643 return DAG.getBitcast(VT, N0.getOperand(0));
16644
16645 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16646 // iff the current bitwise logicop type isn't legal
16647 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16648 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16649 auto IsFreeBitcast = [VT](SDValue V) {
16650 return (V.getOpcode() == ISD::BITCAST &&
16651 V.getOperand(0).getValueType() == VT) ||
16653 V->hasOneUse());
16654 };
16655 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16656 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16657 DAG.getBitcast(VT, N0.getOperand(0)),
16658 DAG.getBitcast(VT, N0.getOperand(1)));
16659 }
16660
16661 // fold (conv (load x)) -> (load (conv*)x)
16662 // If the resultant load doesn't need a higher alignment than the original!
16663 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16664 // Do not remove the cast if the types differ in endian layout.
16666 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
16667 // If the load is volatile, we only want to change the load type if the
16668 // resulting load is legal. Otherwise we might increase the number of
16669 // memory accesses. We don't care if the original type was legal or not
16670 // as we assume software couldn't rely on the number of accesses of an
16671 // illegal type.
16672 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
16673 TLI.isOperationLegal(ISD::LOAD, VT))) {
16674 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16675
16676 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16677 *LN0->getMemOperand())) {
16678 // If the range metadata type does not match the new memory
16679 // operation type, remove the range metadata.
16680 if (const MDNode *MD = LN0->getRanges()) {
16681 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16682 if (Lower->getBitWidth() != VT.getScalarSizeInBits() ||
16683 !VT.isInteger()) {
16684 LN0->getMemOperand()->clearRanges();
16685 }
16686 }
16687 SDValue Load =
16688 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
16689 LN0->getMemOperand());
16690 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16691 return Load;
16692 }
16693 }
16694
16695 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16696 return V;
16697
16698 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16699 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16700 //
16701 // For ppc_fp128:
16702 // fold (bitcast (fneg x)) ->
16703 // flipbit = signbit
16704 // (xor (bitcast x) (build_pair flipbit, flipbit))
16705 //
16706 // fold (bitcast (fabs x)) ->
16707 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16708 // (xor (bitcast x) (build_pair flipbit, flipbit))
16709 // This often reduces constant pool loads.
16710 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
16711 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
16712 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
16713 !N0.getValueType().isVector()) {
16714 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
16715 AddToWorklist(NewConv.getNode());
16716
16717 SDLoc DL(N);
16718 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16719 assert(VT.getSizeInBits() == 128);
16720 SDValue SignBit = DAG.getConstant(
16721 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
16722 SDValue FlipBit;
16723 if (N0.getOpcode() == ISD::FNEG) {
16724 FlipBit = SignBit;
16725 AddToWorklist(FlipBit.getNode());
16726 } else {
16727 assert(N0.getOpcode() == ISD::FABS);
16728 SDValue Hi =
16729 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
16731 SDLoc(NewConv)));
16732 AddToWorklist(Hi.getNode());
16733 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
16734 AddToWorklist(FlipBit.getNode());
16735 }
16736 SDValue FlipBits =
16737 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16738 AddToWorklist(FlipBits.getNode());
16739 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
16740 }
16741 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16742 if (N0.getOpcode() == ISD::FNEG)
16743 return DAG.getNode(ISD::XOR, DL, VT,
16744 NewConv, DAG.getConstant(SignBit, DL, VT));
16745 assert(N0.getOpcode() == ISD::FABS);
16746 return DAG.getNode(ISD::AND, DL, VT,
16747 NewConv, DAG.getConstant(~SignBit, DL, VT));
16748 }
16749
16750 // fold (bitconvert (fcopysign cst, x)) ->
16751 // (or (and (bitconvert x), sign), (and cst, (not sign)))
16752 // Note that we don't handle (copysign x, cst) because this can always be
16753 // folded to an fneg or fabs.
16754 //
16755 // For ppc_fp128:
16756 // fold (bitcast (fcopysign cst, x)) ->
16757 // flipbit = (and (extract_element
16758 // (xor (bitcast cst), (bitcast x)), 0),
16759 // signbit)
16760 // (xor (bitcast cst) (build_pair flipbit, flipbit))
16761 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
16763 !VT.isVector()) {
16764 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
16765 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
16766 if (isTypeLegal(IntXVT)) {
16767 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
16768 AddToWorklist(X.getNode());
16769
16770 // If X has a different width than the result/lhs, sext it or truncate it.
16771 unsigned VTWidth = VT.getSizeInBits();
16772 if (OrigXWidth < VTWidth) {
16773 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
16774 AddToWorklist(X.getNode());
16775 } else if (OrigXWidth > VTWidth) {
16776 // To get the sign bit in the right place, we have to shift it right
16777 // before truncating.
16778 SDLoc DL(X);
16779 X = DAG.getNode(ISD::SRL, DL,
16780 X.getValueType(), X,
16781 DAG.getConstant(OrigXWidth-VTWidth, DL,
16782 X.getValueType()));
16783 AddToWorklist(X.getNode());
16784 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
16785 AddToWorklist(X.getNode());
16786 }
16787
16788 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16789 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
16790 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16791 AddToWorklist(Cst.getNode());
16792 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
16793 AddToWorklist(X.getNode());
16794 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
16795 AddToWorklist(XorResult.getNode());
16796 SDValue XorResult64 = DAG.getNode(
16797 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
16799 SDLoc(XorResult)));
16800 AddToWorklist(XorResult64.getNode());
16801 SDValue FlipBit =
16802 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
16803 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
16804 AddToWorklist(FlipBit.getNode());
16805 SDValue FlipBits =
16806 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16807 AddToWorklist(FlipBits.getNode());
16808 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
16809 }
16810 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16811 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
16812 X, DAG.getConstant(SignBit, SDLoc(X), VT));
16813 AddToWorklist(X.getNode());
16814
16815 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16816 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16817 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16818 AddToWorklist(Cst.getNode());
16819
16820 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16821 }
16822 }
16823
16824 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16825 if (N0.getOpcode() == ISD::BUILD_PAIR)
16826 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16827 return CombineLD;
16828
16829 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16830 // => int_vt (any_extend elt_vt:x)
16831 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16832 SDValue SrcScalar = N0.getOperand(0);
16833 if (SrcScalar.getValueType().isScalarInteger())
16834 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16835 }
16836
16837 // Remove double bitcasts from shuffles - this is often a legacy of
16838 // XformToShuffleWithZero being used to combine bitmaskings (of
16839 // float vectors bitcast to integer vectors) into shuffles.
16840 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16841 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16842 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16845 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16846
16847 // If operands are a bitcast, peek through if it casts the original VT.
16848 // If operands are a constant, just bitcast back to original VT.
16849 auto PeekThroughBitcast = [&](SDValue Op) {
16850 if (Op.getOpcode() == ISD::BITCAST &&
16851 Op.getOperand(0).getValueType() == VT)
16852 return SDValue(Op.getOperand(0));
16853 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16854 return DAG.getBitcast(VT, Op);
16855 return SDValue();
16856 };
16857
16858 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16859 // the result type of this bitcast. This would eliminate at least one
16860 // bitcast. See the transform in InstCombine.
16861 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16862 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16863 if (!(SV0 && SV1))
16864 return SDValue();
16865
16866 int MaskScale =
16868 SmallVector<int, 8> NewMask;
16869 for (int M : SVN->getMask())
16870 for (int i = 0; i != MaskScale; ++i)
16871 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16872
16873 SDValue LegalShuffle =
16874 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16875 if (LegalShuffle)
16876 return LegalShuffle;
16877 }
16878
16879 return SDValue();
16880}
16881
16882SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16883 EVT VT = N->getValueType(0);
16884 return CombineConsecutiveLoads(N, VT);
16885}
16886
16887SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16888 SDValue N0 = N->getOperand(0);
16889
16890 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16891 return N0;
16892
16893 // If we have frozen and unfrozen users of N0, update so everything uses N.
16894 if (!N0.isUndef() && !N0.hasOneUse()) {
16895 SDValue FrozenN0(N, 0);
16896 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
16897 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
16898 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
16899 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
16900 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16901 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
16902 DAG.UpdateNodeOperands(N, N0);
16903 return FrozenN0;
16904 }
16905
16906 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16907 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16908 // example https://reviews.llvm.org/D136529#4120959.
16909 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16910 return SDValue();
16911
16912 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16913 // Try to push freeze through instructions that propagate but don't produce
16914 // poison as far as possible. If an operand of freeze follows three
16915 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16916 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16917 // the freeze through to the operands that are not guaranteed non-poison.
16918 // NOTE: we will strip poison-generating flags, so ignore them here.
16919 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16920 /*ConsiderFlags*/ false) ||
16921 N0->getNumValues() != 1 || !N0->hasOneUse())
16922 return SDValue();
16923
16924 // TOOD: we should always allow multiple operands, however this increases the
16925 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
16926 // below causing later nodes that share frozen operands to fold again and no
16927 // longer being able to confirm other operands are not poison due to recursion
16928 // depth limits on isGuaranteedNotToBeUndefOrPoison.
16929 bool AllowMultipleMaybePoisonOperands =
16930 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
16931 N0.getOpcode() == ISD::BUILD_VECTOR ||
16933 N0.getOpcode() == ISD::BUILD_PAIR ||
16936
16937 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
16938 // ones" or "constant" into something that depends on FrozenUndef. We can
16939 // instead pick undef values to keep those properties, while at the same time
16940 // folding away the freeze.
16941 // If we implement a more general solution for folding away freeze(undef) in
16942 // the future, then this special handling can be removed.
16943 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
16944 SDLoc DL(N0);
16945 EVT VT = N0.getValueType();
16947 return DAG.getAllOnesConstant(DL, VT);
16950 for (const SDValue &Op : N0->op_values())
16951 NewVecC.push_back(
16952 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
16953 return DAG.getBuildVector(VT, DL, NewVecC);
16954 }
16955 }
16956
16957 SmallSet<SDValue, 8> MaybePoisonOperands;
16958 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
16959 for (auto [OpNo, Op] : enumerate(N0->ops())) {
16960 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
16961 continue;
16962 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16963 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16964 if (IsNewMaybePoisonOperand)
16965 MaybePoisonOperandNumbers.push_back(OpNo);
16966 if (!HadMaybePoisonOperands)
16967 continue;
16968 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16969 // Multiple maybe-poison ops when not allowed - bail out.
16970 return SDValue();
16971 }
16972 }
16973 // NOTE: the whole op may be not guaranteed to not be undef or poison because
16974 // it could create undef or poison due to it's poison-generating flags.
16975 // So not finding any maybe-poison operands is fine.
16976
16977 for (unsigned OpNo : MaybePoisonOperandNumbers) {
16978 // N0 can mutate during iteration, so make sure to refetch the maybe poison
16979 // operands via the operand numbers. The typical scenario is that we have
16980 // something like this
16981 // t262: i32 = freeze t181
16982 // t150: i32 = ctlz_zero_undef t262
16983 // t184: i32 = ctlz_zero_undef t181
16984 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
16985 // When freezing the t181 operand we get t262 back, and then the
16986 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
16987 // also recursively replace t184 by t150.
16988 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
16989 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
16990 if (MaybePoisonOperand.isUndef())
16991 continue;
16992 // First, freeze each offending operand.
16993 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
16994 // Then, change all other uses of unfrozen operand to use frozen operand.
16995 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
16996 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
16997 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
16998 // But, that also updated the use in the freeze we just created, thus
16999 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17000 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
17001 MaybePoisonOperand);
17002 }
17003
17004 // This node has been merged with another.
17005 if (N->getOpcode() == ISD::DELETED_NODE)
17006 return SDValue(N, 0);
17007 }
17008
17009 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
17010
17011 // The whole node may have been updated, so the value we were holding
17012 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
17013 N0 = N->getOperand(0);
17014
17015 // Finally, recreate the node, it's operands were updated to use
17016 // frozen operands, so we just need to use it's "original" operands.
17018 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
17019 // leave for a future patch.
17020 for (SDValue &Op : Ops) {
17021 if (Op.isUndef())
17022 Op = DAG.getFreeze(Op);
17023 }
17024
17025 SDLoc DL(N0);
17026
17027 // Special case handling for ShuffleVectorSDNode nodes.
17028 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
17029 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
17030 SVN->getMask());
17031
17032 // NOTE: this strips poison generating flags.
17033 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
17034 // ninf, nsz, or fast.
17035 // However, contract, reassoc, afn, and arcp should be preserved,
17036 // as these fast-math flags do not introduce poison values.
17037 SDNodeFlags SrcFlags = N0->getFlags();
17038 SDNodeFlags SafeFlags;
17039 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
17040 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
17041 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
17042 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
17043 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
17044}
17045
17046// Returns true if floating point contraction is allowed on the FMUL-SDValue
17047// `N`
17049 assert(N.getOpcode() == ISD::FMUL);
17050
17051 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
17052 N->getFlags().hasAllowContract();
17053}
17054
17055// Returns true if `N` can assume no infinities involved in its computation.
17057 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
17058}
17059
17060/// Try to perform FMA combining on a given FADD node.
17061template <class MatchContextClass>
17062SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
17063 SDValue N0 = N->getOperand(0);
17064 SDValue N1 = N->getOperand(1);
17065 EVT VT = N->getValueType(0);
17066 SDLoc SL(N);
17067 MatchContextClass matcher(DAG, TLI, N);
17068 const TargetOptions &Options = DAG.getTarget().Options;
17069
17070 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17071
17072 // Floating-point multiply-add with intermediate rounding.
17073 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17074 // FIXME: Add VP_FMAD opcode.
17075 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17076
17077 // Floating-point multiply-add without intermediate rounding.
17078 bool HasFMA =
17079 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17081
17082 // No valid opcode, do not combine.
17083 if (!HasFMAD && !HasFMA)
17084 return SDValue();
17085
17086 bool AllowFusionGlobally =
17087 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
17088 // If the addition is not contractable, do not combine.
17089 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17090 return SDValue();
17091
17092 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17093 // beneficial. It does not reduce latency. It increases register pressure. It
17094 // replaces an fadd with an fma which is a more complex instruction, so is
17095 // likely to have a larger encoding, use more functional units, etc.
17096 if (N0 == N1)
17097 return SDValue();
17098
17099 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17100 return SDValue();
17101
17102 // Always prefer FMAD to FMA for precision.
17103 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17105
17106 auto isFusedOp = [&](SDValue N) {
17107 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17108 };
17109
17110 // Is the node an FMUL and contractable either due to global flags or
17111 // SDNodeFlags.
17112 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17113 if (!matcher.match(N, ISD::FMUL))
17114 return false;
17115 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17116 };
17117 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17118 // prefer to fold the multiply with fewer uses.
17120 if (N0->use_size() > N1->use_size())
17121 std::swap(N0, N1);
17122 }
17123
17124 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17125 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17126 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17127 N0.getOperand(1), N1);
17128 }
17129
17130 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17131 // Note: Commutes FADD operands.
17132 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17133 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17134 N1.getOperand(1), N0);
17135 }
17136
17137 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17138 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17139 // This also works with nested fma instructions:
17140 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17141 // fma A, B, (fma C, D, fma (E, F, G))
17142 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17143 // fma A, B, (fma C, D, fma (E, F, G)).
17144 // This requires reassociation because it changes the order of operations.
17145 bool CanReassociate = N->getFlags().hasAllowReassociation();
17146 if (CanReassociate) {
17147 SDValue FMA, E;
17148 if (isFusedOp(N0) && N0.hasOneUse()) {
17149 FMA = N0;
17150 E = N1;
17151 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17152 FMA = N1;
17153 E = N0;
17154 }
17155
17156 SDValue TmpFMA = FMA;
17157 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17158 SDValue FMul = TmpFMA->getOperand(2);
17159 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17160 SDValue C = FMul.getOperand(0);
17161 SDValue D = FMul.getOperand(1);
17162 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17164 // Replacing the inner FMul could cause the outer FMA to be simplified
17165 // away.
17166 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17167 }
17168
17169 TmpFMA = TmpFMA->getOperand(2);
17170 }
17171 }
17172
17173 // Look through FP_EXTEND nodes to do more combining.
17174
17175 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17176 if (matcher.match(N0, ISD::FP_EXTEND)) {
17177 SDValue N00 = N0.getOperand(0);
17178 if (isContractableFMUL(N00) &&
17179 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17180 N00.getValueType())) {
17181 return matcher.getNode(
17182 PreferredFusedOpcode, SL, VT,
17183 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17184 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17185 }
17186 }
17187
17188 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17189 // Note: Commutes FADD operands.
17190 if (matcher.match(N1, ISD::FP_EXTEND)) {
17191 SDValue N10 = N1.getOperand(0);
17192 if (isContractableFMUL(N10) &&
17193 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17194 N10.getValueType())) {
17195 return matcher.getNode(
17196 PreferredFusedOpcode, SL, VT,
17197 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17198 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17199 }
17200 }
17201
17202 // More folding opportunities when target permits.
17203 if (Aggressive) {
17204 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17205 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17206 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17207 SDValue Z) {
17208 return matcher.getNode(
17209 PreferredFusedOpcode, SL, VT, X, Y,
17210 matcher.getNode(PreferredFusedOpcode, SL, VT,
17211 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17212 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17213 };
17214 if (isFusedOp(N0)) {
17215 SDValue N02 = N0.getOperand(2);
17216 if (matcher.match(N02, ISD::FP_EXTEND)) {
17217 SDValue N020 = N02.getOperand(0);
17218 if (isContractableFMUL(N020) &&
17219 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17220 N020.getValueType())) {
17221 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17222 N020.getOperand(0), N020.getOperand(1),
17223 N1);
17224 }
17225 }
17226 }
17227
17228 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17229 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17230 // FIXME: This turns two single-precision and one double-precision
17231 // operation into two double-precision operations, which might not be
17232 // interesting for all targets, especially GPUs.
17233 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17234 SDValue Z) {
17235 return matcher.getNode(
17236 PreferredFusedOpcode, SL, VT,
17237 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17238 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17239 matcher.getNode(PreferredFusedOpcode, SL, VT,
17240 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17241 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17242 };
17243 if (N0.getOpcode() == ISD::FP_EXTEND) {
17244 SDValue N00 = N0.getOperand(0);
17245 if (isFusedOp(N00)) {
17246 SDValue N002 = N00.getOperand(2);
17247 if (isContractableFMUL(N002) &&
17248 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17249 N00.getValueType())) {
17250 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17251 N002.getOperand(0), N002.getOperand(1),
17252 N1);
17253 }
17254 }
17255 }
17256
17257 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17258 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17259 if (isFusedOp(N1)) {
17260 SDValue N12 = N1.getOperand(2);
17261 if (N12.getOpcode() == ISD::FP_EXTEND) {
17262 SDValue N120 = N12.getOperand(0);
17263 if (isContractableFMUL(N120) &&
17264 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17265 N120.getValueType())) {
17266 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17267 N120.getOperand(0), N120.getOperand(1),
17268 N0);
17269 }
17270 }
17271 }
17272
17273 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17274 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17275 // FIXME: This turns two single-precision and one double-precision
17276 // operation into two double-precision operations, which might not be
17277 // interesting for all targets, especially GPUs.
17278 if (N1.getOpcode() == ISD::FP_EXTEND) {
17279 SDValue N10 = N1.getOperand(0);
17280 if (isFusedOp(N10)) {
17281 SDValue N102 = N10.getOperand(2);
17282 if (isContractableFMUL(N102) &&
17283 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17284 N10.getValueType())) {
17285 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17286 N102.getOperand(0), N102.getOperand(1),
17287 N0);
17288 }
17289 }
17290 }
17291 }
17292
17293 return SDValue();
17294}
17295
17296/// Try to perform FMA combining on a given FSUB node.
17297template <class MatchContextClass>
17298SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17299 SDValue N0 = N->getOperand(0);
17300 SDValue N1 = N->getOperand(1);
17301 EVT VT = N->getValueType(0);
17302 SDLoc SL(N);
17303 MatchContextClass matcher(DAG, TLI, N);
17304 const TargetOptions &Options = DAG.getTarget().Options;
17305
17306 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17307
17308 // Floating-point multiply-add with intermediate rounding.
17309 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17310 // FIXME: Add VP_FMAD opcode.
17311 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17312
17313 // Floating-point multiply-add without intermediate rounding.
17314 bool HasFMA =
17315 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17317
17318 // No valid opcode, do not combine.
17319 if (!HasFMAD && !HasFMA)
17320 return SDValue();
17321
17322 const SDNodeFlags Flags = N->getFlags();
17323 bool AllowFusionGlobally =
17324 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17325
17326 // If the subtraction is not contractable, do not combine.
17327 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17328 return SDValue();
17329
17330 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17331 return SDValue();
17332
17333 // Always prefer FMAD to FMA for precision.
17334 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17336 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
17337
17338 // Is the node an FMUL and contractable either due to global flags or
17339 // SDNodeFlags.
17340 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17341 if (!matcher.match(N, ISD::FMUL))
17342 return false;
17343 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17344 };
17345
17346 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17347 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17348 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17349 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17350 XY.getOperand(1),
17351 matcher.getNode(ISD::FNEG, SL, VT, Z));
17352 }
17353 return SDValue();
17354 };
17355
17356 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17357 // Note: Commutes FSUB operands.
17358 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17359 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17360 return matcher.getNode(
17361 PreferredFusedOpcode, SL, VT,
17362 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17363 YZ.getOperand(1), X);
17364 }
17365 return SDValue();
17366 };
17367
17368 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17369 // prefer to fold the multiply with fewer uses.
17370 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17371 (N0->use_size() > N1->use_size())) {
17372 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17373 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17374 return V;
17375 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17376 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17377 return V;
17378 } else {
17379 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17380 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17381 return V;
17382 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17383 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17384 return V;
17385 }
17386
17387 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17388 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17389 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17390 SDValue N00 = N0.getOperand(0).getOperand(0);
17391 SDValue N01 = N0.getOperand(0).getOperand(1);
17392 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17393 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17394 matcher.getNode(ISD::FNEG, SL, VT, N1));
17395 }
17396
17397 // Look through FP_EXTEND nodes to do more combining.
17398
17399 // fold (fsub (fpext (fmul x, y)), z)
17400 // -> (fma (fpext x), (fpext y), (fneg z))
17401 if (matcher.match(N0, ISD::FP_EXTEND)) {
17402 SDValue N00 = N0.getOperand(0);
17403 if (isContractableFMUL(N00) &&
17404 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17405 N00.getValueType())) {
17406 return matcher.getNode(
17407 PreferredFusedOpcode, SL, VT,
17408 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17409 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17410 matcher.getNode(ISD::FNEG, SL, VT, N1));
17411 }
17412 }
17413
17414 // fold (fsub x, (fpext (fmul y, z)))
17415 // -> (fma (fneg (fpext y)), (fpext z), x)
17416 // Note: Commutes FSUB operands.
17417 if (matcher.match(N1, ISD::FP_EXTEND)) {
17418 SDValue N10 = N1.getOperand(0);
17419 if (isContractableFMUL(N10) &&
17420 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17421 N10.getValueType())) {
17422 return matcher.getNode(
17423 PreferredFusedOpcode, SL, VT,
17424 matcher.getNode(
17425 ISD::FNEG, SL, VT,
17426 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17427 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17428 }
17429 }
17430
17431 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17432 // -> (fneg (fma (fpext x), (fpext y), z))
17433 // Note: This could be removed with appropriate canonicalization of the
17434 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17435 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17436 // from implementing the canonicalization in visitFSUB.
17437 if (matcher.match(N0, ISD::FP_EXTEND)) {
17438 SDValue N00 = N0.getOperand(0);
17439 if (matcher.match(N00, ISD::FNEG)) {
17440 SDValue N000 = N00.getOperand(0);
17441 if (isContractableFMUL(N000) &&
17442 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17443 N00.getValueType())) {
17444 return matcher.getNode(
17445 ISD::FNEG, SL, VT,
17446 matcher.getNode(
17447 PreferredFusedOpcode, SL, VT,
17448 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17449 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17450 N1));
17451 }
17452 }
17453 }
17454
17455 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17456 // -> (fneg (fma (fpext x)), (fpext y), z)
17457 // Note: This could be removed with appropriate canonicalization of the
17458 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17459 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
17460 // from implementing the canonicalization in visitFSUB.
17461 if (matcher.match(N0, ISD::FNEG)) {
17462 SDValue N00 = N0.getOperand(0);
17463 if (matcher.match(N00, ISD::FP_EXTEND)) {
17464 SDValue N000 = N00.getOperand(0);
17465 if (isContractableFMUL(N000) &&
17466 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17467 N000.getValueType())) {
17468 return matcher.getNode(
17469 ISD::FNEG, SL, VT,
17470 matcher.getNode(
17471 PreferredFusedOpcode, SL, VT,
17472 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17473 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17474 N1));
17475 }
17476 }
17477 }
17478
17479 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17480 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17481 };
17482
17483 auto isFusedOp = [&](SDValue N) {
17484 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17485 };
17486
17487 // More folding opportunities when target permits.
17488 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17489 bool CanFuse = N->getFlags().hasAllowContract();
17490 // fold (fsub (fma x, y, (fmul u, v)), z)
17491 // -> (fma x, y (fma u, v, (fneg z)))
17492 if (CanFuse && isFusedOp(N0) &&
17493 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17494 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17495 return matcher.getNode(
17496 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17497 matcher.getNode(PreferredFusedOpcode, SL, VT,
17498 N0.getOperand(2).getOperand(0),
17499 N0.getOperand(2).getOperand(1),
17500 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17501 }
17502
17503 // fold (fsub x, (fma y, z, (fmul u, v)))
17504 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17505 if (CanFuse && isFusedOp(N1) &&
17506 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17507 N1->hasOneUse() && NoSignedZero) {
17508 SDValue N20 = N1.getOperand(2).getOperand(0);
17509 SDValue N21 = N1.getOperand(2).getOperand(1);
17510 return matcher.getNode(
17511 PreferredFusedOpcode, SL, VT,
17512 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17513 N1.getOperand(1),
17514 matcher.getNode(PreferredFusedOpcode, SL, VT,
17515 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17516 }
17517
17518 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17519 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17520 if (isFusedOp(N0) && N0->hasOneUse()) {
17521 SDValue N02 = N0.getOperand(2);
17522 if (matcher.match(N02, ISD::FP_EXTEND)) {
17523 SDValue N020 = N02.getOperand(0);
17524 if (isContractableAndReassociableFMUL(N020) &&
17525 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17526 N020.getValueType())) {
17527 return matcher.getNode(
17528 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17529 matcher.getNode(
17530 PreferredFusedOpcode, SL, VT,
17531 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17532 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17533 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17534 }
17535 }
17536 }
17537
17538 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17539 // -> (fma (fpext x), (fpext y),
17540 // (fma (fpext u), (fpext v), (fneg z)))
17541 // FIXME: This turns two single-precision and one double-precision
17542 // operation into two double-precision operations, which might not be
17543 // interesting for all targets, especially GPUs.
17544 if (matcher.match(N0, ISD::FP_EXTEND)) {
17545 SDValue N00 = N0.getOperand(0);
17546 if (isFusedOp(N00)) {
17547 SDValue N002 = N00.getOperand(2);
17548 if (isContractableAndReassociableFMUL(N002) &&
17549 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17550 N00.getValueType())) {
17551 return matcher.getNode(
17552 PreferredFusedOpcode, SL, VT,
17553 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17554 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17555 matcher.getNode(
17556 PreferredFusedOpcode, SL, VT,
17557 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17558 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17559 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17560 }
17561 }
17562 }
17563
17564 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17565 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17566 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17567 N1->hasOneUse()) {
17568 SDValue N120 = N1.getOperand(2).getOperand(0);
17569 if (isContractableAndReassociableFMUL(N120) &&
17570 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17571 N120.getValueType())) {
17572 SDValue N1200 = N120.getOperand(0);
17573 SDValue N1201 = N120.getOperand(1);
17574 return matcher.getNode(
17575 PreferredFusedOpcode, SL, VT,
17576 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17577 N1.getOperand(1),
17578 matcher.getNode(
17579 PreferredFusedOpcode, SL, VT,
17580 matcher.getNode(ISD::FNEG, SL, VT,
17581 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17582 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17583 }
17584 }
17585
17586 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17587 // -> (fma (fneg (fpext y)), (fpext z),
17588 // (fma (fneg (fpext u)), (fpext v), x))
17589 // FIXME: This turns two single-precision and one double-precision
17590 // operation into two double-precision operations, which might not be
17591 // interesting for all targets, especially GPUs.
17592 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17593 SDValue CvtSrc = N1.getOperand(0);
17594 SDValue N100 = CvtSrc.getOperand(0);
17595 SDValue N101 = CvtSrc.getOperand(1);
17596 SDValue N102 = CvtSrc.getOperand(2);
17597 if (isContractableAndReassociableFMUL(N102) &&
17598 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17599 CvtSrc.getValueType())) {
17600 SDValue N1020 = N102.getOperand(0);
17601 SDValue N1021 = N102.getOperand(1);
17602 return matcher.getNode(
17603 PreferredFusedOpcode, SL, VT,
17604 matcher.getNode(ISD::FNEG, SL, VT,
17605 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17606 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17607 matcher.getNode(
17608 PreferredFusedOpcode, SL, VT,
17609 matcher.getNode(ISD::FNEG, SL, VT,
17610 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17611 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17612 }
17613 }
17614 }
17615
17616 return SDValue();
17617}
17618
17619/// Try to perform FMA combining on a given FMUL node based on the distributive
17620/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17621/// subtraction instead of addition).
17622SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17623 SDValue N0 = N->getOperand(0);
17624 SDValue N1 = N->getOperand(1);
17625 EVT VT = N->getValueType(0);
17626 SDLoc SL(N);
17627
17628 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17629
17630 const TargetOptions &Options = DAG.getTarget().Options;
17631
17632 // The transforms below are incorrect when x == 0 and y == inf, because the
17633 // intermediate multiplication produces a nan.
17634 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17635 if (!hasNoInfs(Options, FAdd))
17636 return SDValue();
17637
17638 // Floating-point multiply-add without intermediate rounding.
17639 bool HasFMA =
17641 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17643
17644 // Floating-point multiply-add with intermediate rounding. This can result
17645 // in a less precise result due to the changed rounding order.
17646 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17647
17648 // No valid opcode, do not combine.
17649 if (!HasFMAD && !HasFMA)
17650 return SDValue();
17651
17652 // Always prefer FMAD to FMA for precision.
17653 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17655
17656 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17657 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17658 auto FuseFADD = [&](SDValue X, SDValue Y) {
17659 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17660 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17661 if (C->isExactlyValue(+1.0))
17662 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17663 Y);
17664 if (C->isExactlyValue(-1.0))
17665 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17666 DAG.getNode(ISD::FNEG, SL, VT, Y));
17667 }
17668 }
17669 return SDValue();
17670 };
17671
17672 if (SDValue FMA = FuseFADD(N0, N1))
17673 return FMA;
17674 if (SDValue FMA = FuseFADD(N1, N0))
17675 return FMA;
17676
17677 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17678 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17679 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17680 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17681 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17682 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17683 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17684 if (C0->isExactlyValue(+1.0))
17685 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17686 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17687 Y);
17688 if (C0->isExactlyValue(-1.0))
17689 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17690 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17691 DAG.getNode(ISD::FNEG, SL, VT, Y));
17692 }
17693 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17694 if (C1->isExactlyValue(+1.0))
17695 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17696 DAG.getNode(ISD::FNEG, SL, VT, Y));
17697 if (C1->isExactlyValue(-1.0))
17698 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17699 Y);
17700 }
17701 }
17702 return SDValue();
17703 };
17704
17705 if (SDValue FMA = FuseFSUB(N0, N1))
17706 return FMA;
17707 if (SDValue FMA = FuseFSUB(N1, N0))
17708 return FMA;
17709
17710 return SDValue();
17711}
17712
17713SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
17714 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17715
17716 // FADD -> FMA combines:
17717 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
17718 if (Fused.getOpcode() != ISD::DELETED_NODE)
17719 AddToWorklist(Fused.getNode());
17720 return Fused;
17721 }
17722 return SDValue();
17723}
17724
17725SDValue DAGCombiner::visitFADD(SDNode *N) {
17726 SDValue N0 = N->getOperand(0);
17727 SDValue N1 = N->getOperand(1);
17728 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
17729 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
17730 EVT VT = N->getValueType(0);
17731 SDLoc DL(N);
17732 const TargetOptions &Options = DAG.getTarget().Options;
17733 SDNodeFlags Flags = N->getFlags();
17734 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17735
17736 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17737 return R;
17738
17739 // fold (fadd c1, c2) -> c1 + c2
17740 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
17741 return C;
17742
17743 // canonicalize constant to RHS
17744 if (N0CFP && !N1CFP)
17745 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
17746
17747 // fold vector ops
17748 if (VT.isVector())
17749 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17750 return FoldedVOp;
17751
17752 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
17753 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
17754 if (N1C && N1C->isZero())
17755 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
17756 return N0;
17757
17758 if (SDValue NewSel = foldBinOpIntoSelect(N))
17759 return NewSel;
17760
17761 // fold (fadd A, (fneg B)) -> (fsub A, B)
17762 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17763 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17764 N1, DAG, LegalOperations, ForCodeSize))
17765 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17766
17767 // fold (fadd (fneg A), B) -> (fsub B, A)
17768 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17769 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17770 N0, DAG, LegalOperations, ForCodeSize))
17771 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17772
17773 auto isFMulNegTwo = [](SDValue FMul) {
17774 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17775 return false;
17776 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17777 return C && C->isExactlyValue(-2.0);
17778 };
17779
17780 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17781 if (isFMulNegTwo(N0)) {
17782 SDValue B = N0.getOperand(0);
17783 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17784 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17785 }
17786 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17787 if (isFMulNegTwo(N1)) {
17788 SDValue B = N1.getOperand(0);
17789 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17790 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17791 }
17792
17793 // No FP constant should be created after legalization as Instruction
17794 // Selection pass has a hard time dealing with FP constants.
17795 bool AllowNewConst = (Level < AfterLegalizeDAG);
17796
17797 // If nnan is enabled, fold lots of things.
17798 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
17799 // If allowed, fold (fadd (fneg x), x) -> 0.0
17800 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17801 return DAG.getConstantFP(0.0, DL, VT);
17802
17803 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17804 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17805 return DAG.getConstantFP(0.0, DL, VT);
17806 }
17807
17808 // If 'unsafe math' or reassoc and nsz, fold lots of things.
17809 // TODO: break out portions of the transformations below for which Unsafe is
17810 // considered and which do not require both nsz and reassoc
17811 if ((Options.NoSignedZerosFPMath ||
17812 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
17813 AllowNewConst) {
17814 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17815 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17817 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17818 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17819 }
17820
17821 // We can fold chains of FADD's of the same value into multiplications.
17822 // This transform is not safe in general because we are reducing the number
17823 // of rounding steps.
17824 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17825 if (N0.getOpcode() == ISD::FMUL) {
17826 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17827 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17828
17829 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17830 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17831 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17832 DAG.getConstantFP(1.0, DL, VT));
17833 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17834 }
17835
17836 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17837 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17838 N1.getOperand(0) == N1.getOperand(1) &&
17839 N0.getOperand(0) == N1.getOperand(0)) {
17840 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17841 DAG.getConstantFP(2.0, DL, VT));
17842 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17843 }
17844 }
17845
17846 if (N1.getOpcode() == ISD::FMUL) {
17847 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17848 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17849
17850 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17851 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17852 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17853 DAG.getConstantFP(1.0, DL, VT));
17854 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17855 }
17856
17857 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17858 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17859 N0.getOperand(0) == N0.getOperand(1) &&
17860 N1.getOperand(0) == N0.getOperand(0)) {
17861 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17862 DAG.getConstantFP(2.0, DL, VT));
17863 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17864 }
17865 }
17866
17867 if (N0.getOpcode() == ISD::FADD) {
17868 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17869 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17870 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17871 (N0.getOperand(0) == N1)) {
17872 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17873 DAG.getConstantFP(3.0, DL, VT));
17874 }
17875 }
17876
17877 if (N1.getOpcode() == ISD::FADD) {
17878 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17879 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17880 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17881 N1.getOperand(0) == N0) {
17882 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17883 DAG.getConstantFP(3.0, DL, VT));
17884 }
17885 }
17886
17887 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17888 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17889 N0.getOperand(0) == N0.getOperand(1) &&
17890 N1.getOperand(0) == N1.getOperand(1) &&
17891 N0.getOperand(0) == N1.getOperand(0)) {
17892 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17893 DAG.getConstantFP(4.0, DL, VT));
17894 }
17895 }
17896 } // enable-unsafe-fp-math && AllowNewConst
17897
17898 if ((Options.NoSignedZerosFPMath ||
17899 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
17900 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17901 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17902 VT, N0, N1, Flags))
17903 return SD;
17904 }
17905
17906 // FADD -> FMA combines:
17907 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17908 if (Fused.getOpcode() != ISD::DELETED_NODE)
17909 AddToWorklist(Fused.getNode());
17910 return Fused;
17911 }
17912 return SDValue();
17913}
17914
17915SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17916 SDValue Chain = N->getOperand(0);
17917 SDValue N0 = N->getOperand(1);
17918 SDValue N1 = N->getOperand(2);
17919 EVT VT = N->getValueType(0);
17920 EVT ChainVT = N->getValueType(1);
17921 SDLoc DL(N);
17922 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17923
17924 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17925 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17926 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17927 N1, DAG, LegalOperations, ForCodeSize)) {
17928 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17929 {Chain, N0, NegN1});
17930 }
17931
17932 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
17933 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17934 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17935 N0, DAG, LegalOperations, ForCodeSize)) {
17936 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17937 {Chain, N1, NegN0});
17938 }
17939 return SDValue();
17940}
17941
17942SDValue DAGCombiner::visitFSUB(SDNode *N) {
17943 SDValue N0 = N->getOperand(0);
17944 SDValue N1 = N->getOperand(1);
17945 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
17946 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
17947 EVT VT = N->getValueType(0);
17948 SDLoc DL(N);
17949 const TargetOptions &Options = DAG.getTarget().Options;
17950 const SDNodeFlags Flags = N->getFlags();
17951 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17952
17953 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17954 return R;
17955
17956 // fold (fsub c1, c2) -> c1-c2
17957 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
17958 return C;
17959
17960 // fold vector ops
17961 if (VT.isVector())
17962 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17963 return FoldedVOp;
17964
17965 if (SDValue NewSel = foldBinOpIntoSelect(N))
17966 return NewSel;
17967
17968 // (fsub A, 0) -> A
17969 if (N1CFP && N1CFP->isZero()) {
17970 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
17971 Flags.hasNoSignedZeros()) {
17972 return N0;
17973 }
17974 }
17975
17976 if (N0 == N1) {
17977 // (fsub x, x) -> 0.0
17978 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
17979 return DAG.getConstantFP(0.0f, DL, VT);
17980 }
17981
17982 // (fsub -0.0, N1) -> -N1
17983 if (N0CFP && N0CFP->isZero()) {
17984 if (N0CFP->isNegative() ||
17985 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
17986 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
17987 // flushed to zero, unless all users treat denorms as zero (DAZ).
17988 // FIXME: This transform will change the sign of a NaN and the behavior
17989 // of a signaling NaN. It is only valid when a NoNaN flag is present.
17990 DenormalMode DenormMode = DAG.getDenormalMode(VT);
17991 if (DenormMode == DenormalMode::getIEEE()) {
17992 if (SDValue NegN1 =
17993 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
17994 return NegN1;
17995 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17996 return DAG.getNode(ISD::FNEG, DL, VT, N1);
17997 }
17998 }
17999 }
18000
18001 if ((Options.NoSignedZerosFPMath ||
18002 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
18003 N1.getOpcode() == ISD::FADD) {
18004 // X - (X + Y) -> -Y
18005 if (N0 == N1->getOperand(0))
18006 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
18007 // X - (Y + X) -> -Y
18008 if (N0 == N1->getOperand(1))
18009 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
18010 }
18011
18012 // fold (fsub A, (fneg B)) -> (fadd A, B)
18013 if (SDValue NegN1 =
18014 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18015 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
18016
18017 // FSUB -> FMA combines:
18018 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
18019 AddToWorklist(Fused.getNode());
18020 return Fused;
18021 }
18022
18023 return SDValue();
18024}
18025
18026// Transform IEEE Floats:
18027// (fmul C, (uitofp Pow2))
18028// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
18029// (fdiv C, (uitofp Pow2))
18030// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
18031//
18032// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
18033// there is no need for more than an add/sub.
18034//
18035// This is valid under the following circumstances:
18036// 1) We are dealing with IEEE floats
18037// 2) C is normal
18038// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
18039// TODO: Much of this could also be used for generating `ldexp` on targets the
18040// prefer it.
18041SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
18042 EVT VT = N->getValueType(0);
18044 return SDValue();
18045
18046 SDValue ConstOp, Pow2Op;
18047
18048 std::optional<int> Mantissa;
18049 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
18050 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
18051 return false;
18052
18053 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
18054 Pow2Op = N->getOperand(1 - ConstOpIdx);
18055 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
18056 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
18057 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
18058 return false;
18059
18060 Pow2Op = Pow2Op.getOperand(0);
18061
18062 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
18063 // TODO: We could use knownbits to make this bound more precise.
18064 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
18065
18066 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
18067 if (CFP == nullptr)
18068 return false;
18069
18070 const APFloat &APF = CFP->getValueAPF();
18071
18072 // Make sure we have normal constant.
18073 if (!APF.isNormal())
18074 return false;
18075
18076 // Make sure the floats exponent is within the bounds that this transform
18077 // produces bitwise equals value.
18078 int CurExp = ilogb(APF);
18079 // FMul by pow2 will only increase exponent.
18080 int MinExp =
18081 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
18082 // FDiv by pow2 will only decrease exponent.
18083 int MaxExp =
18084 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
18085 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
18087 return false;
18088
18089 // Finally make sure we actually know the mantissa for the float type.
18090 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
18091 if (!Mantissa)
18092 Mantissa = ThisMantissa;
18093
18094 return *Mantissa == ThisMantissa && ThisMantissa > 0;
18095 };
18096
18097 // TODO: We may be able to include undefs.
18098 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18099 };
18100
18101 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18102 return SDValue();
18103
18104 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18105 return SDValue();
18106
18107 // Get log2 after all other checks have taken place. This is because
18108 // BuildLogBase2 may create a new node.
18109 SDLoc DL(N);
18110 // Get Log2 type with same bitwidth as the float type (VT).
18111 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
18112 if (VT.isVector())
18113 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
18115
18116 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18117 /*InexpensiveOnly*/ true, NewIntVT);
18118 if (!Log2)
18119 return SDValue();
18120
18121 // Perform actual transform.
18122 SDValue MantissaShiftCnt =
18123 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18124 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18125 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18126 // cast. We could implement that by handle here to handle the casts.
18127 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18128 SDValue ResAsInt =
18129 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18130 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18131 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18132 return ResAsFP;
18133}
18134
18135SDValue DAGCombiner::visitFMUL(SDNode *N) {
18136 SDValue N0 = N->getOperand(0);
18137 SDValue N1 = N->getOperand(1);
18138 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18139 EVT VT = N->getValueType(0);
18140 SDLoc DL(N);
18141 const SDNodeFlags Flags = N->getFlags();
18142 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18143
18144 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18145 return R;
18146
18147 // fold (fmul c1, c2) -> c1*c2
18148 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18149 return C;
18150
18151 // canonicalize constant to RHS
18154 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18155
18156 // fold vector ops
18157 if (VT.isVector())
18158 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18159 return FoldedVOp;
18160
18161 if (SDValue NewSel = foldBinOpIntoSelect(N))
18162 return NewSel;
18163
18164 if (Flags.hasAllowReassociation()) {
18165 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18167 N0.getOpcode() == ISD::FMUL) {
18168 SDValue N00 = N0.getOperand(0);
18169 SDValue N01 = N0.getOperand(1);
18170 // Avoid an infinite loop by making sure that N00 is not a constant
18171 // (the inner multiply has not been constant folded yet).
18174 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18175 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18176 }
18177 }
18178
18179 // Match a special-case: we convert X * 2.0 into fadd.
18180 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18181 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18182 N0.getOperand(0) == N0.getOperand(1)) {
18183 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18184 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18185 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18186 }
18187
18188 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18189 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18190 VT, N0, N1, Flags))
18191 return SD;
18192 }
18193
18194 // fold (fmul X, 2.0) -> (fadd X, X)
18195 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18196 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18197
18198 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18199 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18200 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18201 return DAG.getNode(ISD::FSUB, DL, VT,
18202 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18203 }
18204 }
18205
18206 // -N0 * -N1 --> N0 * N1
18211 SDValue NegN0 =
18212 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18213 if (NegN0) {
18214 HandleSDNode NegN0Handle(NegN0);
18215 SDValue NegN1 =
18216 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18217 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18219 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18220 }
18221
18222 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18223 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18224 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18225 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18226 TLI.isOperationLegal(ISD::FABS, VT)) {
18227 SDValue Select = N0, X = N1;
18228 if (Select.getOpcode() != ISD::SELECT)
18229 std::swap(Select, X);
18230
18231 SDValue Cond = Select.getOperand(0);
18232 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18233 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18234
18235 if (TrueOpnd && FalseOpnd &&
18236 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18237 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18238 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18239 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18240 switch (CC) {
18241 default: break;
18242 case ISD::SETOLT:
18243 case ISD::SETULT:
18244 case ISD::SETOLE:
18245 case ISD::SETULE:
18246 case ISD::SETLT:
18247 case ISD::SETLE:
18248 std::swap(TrueOpnd, FalseOpnd);
18249 [[fallthrough]];
18250 case ISD::SETOGT:
18251 case ISD::SETUGT:
18252 case ISD::SETOGE:
18253 case ISD::SETUGE:
18254 case ISD::SETGT:
18255 case ISD::SETGE:
18256 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18257 TLI.isOperationLegal(ISD::FNEG, VT))
18258 return DAG.getNode(ISD::FNEG, DL, VT,
18259 DAG.getNode(ISD::FABS, DL, VT, X));
18260 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18261 return DAG.getNode(ISD::FABS, DL, VT, X);
18262
18263 break;
18264 }
18265 }
18266 }
18267
18268 // FMUL -> FMA combines:
18269 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18270 AddToWorklist(Fused.getNode());
18271 return Fused;
18272 }
18273
18274 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18275 // able to run.
18276 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18277 return R;
18278
18279 return SDValue();
18280}
18281
18282template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18283 SDValue N0 = N->getOperand(0);
18284 SDValue N1 = N->getOperand(1);
18285 SDValue N2 = N->getOperand(2);
18286 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18287 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18288 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18289 EVT VT = N->getValueType(0);
18290 SDLoc DL(N);
18291 const TargetOptions &Options = DAG.getTarget().Options;
18292 // FMA nodes have flags that propagate to the created nodes.
18293 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18294 MatchContextClass matcher(DAG, TLI, N);
18295
18296 // Constant fold FMA.
18297 if (SDValue C =
18298 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18299 return C;
18300
18301 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18306 SDValue NegN0 =
18307 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18308 if (NegN0) {
18309 HandleSDNode NegN0Handle(NegN0);
18310 SDValue NegN1 =
18311 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18312 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18314 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18315 }
18316
18317 // FIXME: use fast math flags instead of Options.UnsafeFPMath
18318 // TODO: Finally migrate away from global TargetOptions.
18319 if ((Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
18320 (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
18321 if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros() ||
18322 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18323 if (N0CFP && N0CFP->isZero())
18324 return N2;
18325 if (N1CFP && N1CFP->isZero())
18326 return N2;
18327 }
18328 }
18329
18330 // FIXME: Support splat of constant.
18331 if (N0CFP && N0CFP->isExactlyValue(1.0))
18332 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18333 if (N1CFP && N1CFP->isExactlyValue(1.0))
18334 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18335
18336 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18339 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18340
18341 bool CanReassociate = N->getFlags().hasAllowReassociation();
18342 if (CanReassociate) {
18343 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18344 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18347 return matcher.getNode(
18348 ISD::FMUL, DL, VT, N0,
18349 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18350 }
18351
18352 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18353 if (matcher.match(N0, ISD::FMUL) &&
18356 return matcher.getNode(
18357 ISD::FMA, DL, VT, N0.getOperand(0),
18358 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18359 }
18360 }
18361
18362 // (fma x, -1, y) -> (fadd (fneg x), y)
18363 // FIXME: Support splat of constant.
18364 if (N1CFP) {
18365 if (N1CFP->isExactlyValue(1.0))
18366 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18367
18368 if (N1CFP->isExactlyValue(-1.0) &&
18369 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18370 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18371 AddToWorklist(RHSNeg.getNode());
18372 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18373 }
18374
18375 // fma (fneg x), K, y -> fma x -K, y
18376 if (matcher.match(N0, ISD::FNEG) &&
18378 (N1.hasOneUse() &&
18379 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18380 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18381 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18382 }
18383 }
18384
18385 // FIXME: Support splat of constant.
18386 if (CanReassociate) {
18387 // (fma x, c, x) -> (fmul x, (c+1))
18388 if (N1CFP && N0 == N2) {
18389 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18390 matcher.getNode(ISD::FADD, DL, VT, N1,
18391 DAG.getConstantFP(1.0, DL, VT)));
18392 }
18393
18394 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18395 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18396 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18397 matcher.getNode(ISD::FADD, DL, VT, N1,
18398 DAG.getConstantFP(-1.0, DL, VT)));
18399 }
18400 }
18401
18402 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18403 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18404 if (!TLI.isFNegFree(VT))
18406 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18407 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18408 return SDValue();
18409}
18410
18411SDValue DAGCombiner::visitFMAD(SDNode *N) {
18412 SDValue N0 = N->getOperand(0);
18413 SDValue N1 = N->getOperand(1);
18414 SDValue N2 = N->getOperand(2);
18415 EVT VT = N->getValueType(0);
18416 SDLoc DL(N);
18417
18418 // Constant fold FMAD.
18419 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18420 return C;
18421
18422 return SDValue();
18423}
18424
18425// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18426// reciprocal.
18427// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18428// Notice that this is not always beneficial. One reason is different targets
18429// may have different costs for FDIV and FMUL, so sometimes the cost of two
18430// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18431// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18432SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18433 // TODO: Limit this transform based on optsize/minsize - it always creates at
18434 // least 1 extra instruction. But the perf win may be substantial enough
18435 // that only minsize should restrict this.
18436 const SDNodeFlags Flags = N->getFlags();
18437 if (LegalDAG || !Flags.hasAllowReciprocal())
18438 return SDValue();
18439
18440 // Skip if current node is a reciprocal/fneg-reciprocal.
18441 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18442 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18443 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18444 return SDValue();
18445
18446 // Exit early if the target does not want this transform or if there can't
18447 // possibly be enough uses of the divisor to make the transform worthwhile.
18448 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18449
18450 // For splat vectors, scale the number of uses by the splat factor. If we can
18451 // convert the division into a scalar op, that will likely be much faster.
18452 unsigned NumElts = 1;
18453 EVT VT = N->getValueType(0);
18454 if (VT.isVector() && DAG.isSplatValue(N1))
18455 NumElts = VT.getVectorMinNumElements();
18456
18457 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18458 return SDValue();
18459
18460 // Find all FDIV users of the same divisor.
18461 // Use a set because duplicates may be present in the user list.
18462 SetVector<SDNode *> Users;
18463 for (auto *U : N1->users()) {
18464 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18465 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18466 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18467 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18468 U->getFlags().hasAllowReassociation() &&
18469 U->getFlags().hasNoSignedZeros())
18470 continue;
18471
18472 // This division is eligible for optimization only if global unsafe math
18473 // is enabled or if this division allows reciprocal formation.
18474 if (U->getFlags().hasAllowReciprocal())
18475 Users.insert(U);
18476 }
18477 }
18478
18479 // Now that we have the actual number of divisor uses, make sure it meets
18480 // the minimum threshold specified by the target.
18481 if ((Users.size() * NumElts) < MinUses)
18482 return SDValue();
18483
18484 SDLoc DL(N);
18485 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18486 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18487
18488 // Dividend / Divisor -> Dividend * Reciprocal
18489 for (auto *U : Users) {
18490 SDValue Dividend = U->getOperand(0);
18491 if (Dividend != FPOne) {
18492 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18493 Reciprocal, Flags);
18494 CombineTo(U, NewNode);
18495 } else if (U != Reciprocal.getNode()) {
18496 // In the absence of fast-math-flags, this user node is always the
18497 // same node as Reciprocal, but with FMF they may be different nodes.
18498 CombineTo(U, Reciprocal);
18499 }
18500 }
18501 return SDValue(N, 0); // N was replaced.
18502}
18503
18504SDValue DAGCombiner::visitFDIV(SDNode *N) {
18505 SDValue N0 = N->getOperand(0);
18506 SDValue N1 = N->getOperand(1);
18507 EVT VT = N->getValueType(0);
18508 SDLoc DL(N);
18509 const TargetOptions &Options = DAG.getTarget().Options;
18510 SDNodeFlags Flags = N->getFlags();
18511 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18512
18513 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18514 return R;
18515
18516 // fold (fdiv c1, c2) -> c1/c2
18517 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18518 return C;
18519
18520 // fold vector ops
18521 if (VT.isVector())
18522 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18523 return FoldedVOp;
18524
18525 if (SDValue NewSel = foldBinOpIntoSelect(N))
18526 return NewSel;
18527
18529 return V;
18530
18531 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18532 // the loss is acceptable with AllowReciprocal.
18533 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18534 // Compute the reciprocal 1.0 / c2.
18535 const APFloat &N1APF = N1CFP->getValueAPF();
18536 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18538 // Only do the transform if the reciprocal is a legal fp immediate that
18539 // isn't too nasty (eg NaN, denormal, ...).
18540 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18541 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18542 (!LegalOperations ||
18543 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18544 // backend)... we should handle this gracefully after Legalize.
18545 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18547 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18548 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18549 DAG.getConstantFP(Recip, DL, VT));
18550 }
18551
18552 if (Flags.hasAllowReciprocal()) {
18553 // If this FDIV is part of a reciprocal square root, it may be folded
18554 // into a target-specific square root estimate instruction.
18555 if (N1.getOpcode() == ISD::FSQRT) {
18556 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
18557 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18558 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18559 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18560 if (SDValue RV =
18561 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18562 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18563 AddToWorklist(RV.getNode());
18564 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18565 }
18566 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18567 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18568 if (SDValue RV =
18569 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
18570 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18571 AddToWorklist(RV.getNode());
18572 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18573 }
18574 } else if (N1.getOpcode() == ISD::FMUL) {
18575 // Look through an FMUL. Even though this won't remove the FDIV directly,
18576 // it's still worthwhile to get rid of the FSQRT if possible.
18577 SDValue Sqrt, Y;
18578 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18579 Sqrt = N1.getOperand(0);
18580 Y = N1.getOperand(1);
18581 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18582 Sqrt = N1.getOperand(1);
18583 Y = N1.getOperand(0);
18584 }
18585 if (Sqrt.getNode()) {
18586 // If the other multiply operand is known positive, pull it into the
18587 // sqrt. That will eliminate the division if we convert to an estimate.
18588 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18589 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18590 SDValue A;
18591 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18592 A = Y.getOperand(0);
18593 else if (Y == Sqrt.getOperand(0))
18594 A = Y;
18595 if (A) {
18596 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18597 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18598 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18599 SDValue AAZ =
18600 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18601 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
18602 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18603
18604 // Estimate creation failed. Clean up speculatively created nodes.
18605 recursivelyDeleteUnusedNodes(AAZ.getNode());
18606 }
18607 }
18608
18609 // We found a FSQRT, so try to make this fold:
18610 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18611 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
18612 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18613 AddToWorklist(Div.getNode());
18614 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18615 }
18616 }
18617 }
18618
18619 // Fold into a reciprocal estimate and multiply instead of a real divide.
18620 if (Options.NoInfsFPMath || Flags.hasNoInfs())
18621 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18622 return RV;
18623 }
18624
18625 // Fold X/Sqrt(X) -> Sqrt(X)
18626 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
18627 Flags.hasAllowReassociation())
18628 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18629 return N1;
18630
18631 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18636 SDValue NegN0 =
18637 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18638 if (NegN0) {
18639 HandleSDNode NegN0Handle(NegN0);
18640 SDValue NegN1 =
18641 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18642 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18644 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18645 }
18646
18647 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18648 return R;
18649
18650 return SDValue();
18651}
18652
18653SDValue DAGCombiner::visitFREM(SDNode *N) {
18654 SDValue N0 = N->getOperand(0);
18655 SDValue N1 = N->getOperand(1);
18656 EVT VT = N->getValueType(0);
18657 SDNodeFlags Flags = N->getFlags();
18658 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18659 SDLoc DL(N);
18660
18661 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18662 return R;
18663
18664 // fold (frem c1, c2) -> fmod(c1,c2)
18665 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18666 return C;
18667
18668 if (SDValue NewSel = foldBinOpIntoSelect(N))
18669 return NewSel;
18670
18671 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18672 // power of 2.
18673 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18676 TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
18677 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18678 bool NeedsCopySign =
18679 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
18680 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18681 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18682 SDValue MLA;
18684 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18685 N1, N0);
18686 } else {
18687 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18688 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18689 }
18690 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18691 }
18692
18693 return SDValue();
18694}
18695
18696SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18697 SDNodeFlags Flags = N->getFlags();
18698 const TargetOptions &Options = DAG.getTarget().Options;
18699
18700 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18701 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18702 if (!Flags.hasApproximateFuncs() ||
18703 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
18704 return SDValue();
18705
18706 SDValue N0 = N->getOperand(0);
18707 if (TLI.isFsqrtCheap(N0, DAG))
18708 return SDValue();
18709
18710 // FSQRT nodes have flags that propagate to the created nodes.
18711 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
18712 // transform the fdiv, we may produce a sub-optimal estimate sequence
18713 // because the reciprocal calculation may not have to filter out a
18714 // 0.0 input.
18715 return buildSqrtEstimate(N0, Flags);
18716}
18717
18718/// copysign(x, fp_extend(y)) -> copysign(x, y)
18719/// copysign(x, fp_round(y)) -> copysign(x, y)
18720/// Operands to the functions are the type of X and Y respectively.
18721static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
18722 // Always fold no-op FP casts.
18723 if (XTy == YTy)
18724 return true;
18725
18726 // Do not optimize out type conversion of f128 type yet.
18727 // For some targets like x86_64, configuration is changed to keep one f128
18728 // value in one SSE register, but instruction selection cannot handle
18729 // FCOPYSIGN on SSE registers yet.
18730 if (YTy == MVT::f128)
18731 return false;
18732
18733 // Avoid mismatched vector operand types, for better instruction selection.
18734 return !YTy.isVector();
18735}
18736
18738 SDValue N1 = N->getOperand(1);
18739 if (N1.getOpcode() != ISD::FP_EXTEND &&
18740 N1.getOpcode() != ISD::FP_ROUND)
18741 return false;
18742 EVT N1VT = N1->getValueType(0);
18743 EVT N1Op0VT = N1->getOperand(0).getValueType();
18744 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
18745}
18746
18747SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
18748 SDValue N0 = N->getOperand(0);
18749 SDValue N1 = N->getOperand(1);
18750 EVT VT = N->getValueType(0);
18751 SDLoc DL(N);
18752
18753 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
18754 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
18755 return C;
18756
18757 // copysign(x, fp_extend(y)) -> copysign(x, y)
18758 // copysign(x, fp_round(y)) -> copysign(x, y)
18760 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18761
18763 return SDValue(N, 0);
18764
18765 return SDValue();
18766}
18767
18768SDValue DAGCombiner::visitFPOW(SDNode *N) {
18769 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18770 if (!ExponentC)
18771 return SDValue();
18772 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18773
18774 // Try to convert x ** (1/3) into cube root.
18775 // TODO: Handle the various flavors of long double.
18776 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18777 // Some range near 1/3 should be fine.
18778 EVT VT = N->getValueType(0);
18779 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18780 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18781 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18782 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18783 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18784 // For regular numbers, rounding may cause the results to differ.
18785 // Therefore, we require { nsz ninf nnan afn } for this transform.
18786 // TODO: We could select out the special cases if we don't have nsz/ninf.
18787 SDNodeFlags Flags = N->getFlags();
18788 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18789 !Flags.hasApproximateFuncs())
18790 return SDValue();
18791
18792 // Do not create a cbrt() libcall if the target does not have it, and do not
18793 // turn a pow that has lowering support into a cbrt() libcall.
18794 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18795 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
18796 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
18797 return SDValue();
18798
18799 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18800 }
18801
18802 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18803 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18804 // TODO: This could be extended (using a target hook) to handle smaller
18805 // power-of-2 fractional exponents.
18806 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18807 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18808 if (ExponentIs025 || ExponentIs075) {
18809 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18810 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18811 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18812 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18813 // For regular numbers, rounding may cause the results to differ.
18814 // Therefore, we require { nsz ninf afn } for this transform.
18815 // TODO: We could select out the special cases if we don't have nsz/ninf.
18816 SDNodeFlags Flags = N->getFlags();
18817
18818 // We only need no signed zeros for the 0.25 case.
18819 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18820 !Flags.hasApproximateFuncs())
18821 return SDValue();
18822
18823 // Don't double the number of libcalls. We are trying to inline fast code.
18824 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
18825 return SDValue();
18826
18827 // Assume that libcalls are the smallest code.
18828 // TODO: This restriction should probably be lifted for vectors.
18829 if (ForCodeSize)
18830 return SDValue();
18831
18832 // pow(X, 0.25) --> sqrt(sqrt(X))
18833 SDLoc DL(N);
18834 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18835 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18836 if (ExponentIs025)
18837 return SqrtSqrt;
18838 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18839 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18840 }
18841
18842 return SDValue();
18843}
18844
18846 const TargetLowering &TLI) {
18847 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
18848 // replacing casts with a libcall. We also must be allowed to ignore -0.0
18849 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
18850 // conversions would return +0.0.
18851 // FIXME: We should be able to use node-level FMF here.
18852 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
18853 EVT VT = N->getValueType(0);
18854 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
18856 return SDValue();
18857
18858 // fptosi/fptoui round towards zero, so converting from FP to integer and
18859 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18860 SDValue N0 = N->getOperand(0);
18861 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18862 N0.getOperand(0).getValueType() == VT)
18863 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18864
18865 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18866 N0.getOperand(0).getValueType() == VT)
18867 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18868
18869 return SDValue();
18870}
18871
18872SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18873 SDValue N0 = N->getOperand(0);
18874 EVT VT = N->getValueType(0);
18875 EVT OpVT = N0.getValueType();
18876 SDLoc DL(N);
18877
18878 // [us]itofp(undef) = 0, because the result value is bounded.
18879 if (N0.isUndef())
18880 return DAG.getConstantFP(0.0, DL, VT);
18881
18882 // fold (sint_to_fp c1) -> c1fp
18883 // ...but only if the target supports immediate floating-point values
18884 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18885 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18886 return C;
18887
18888 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18889 // but UINT_TO_FP is legal on this target, try to convert.
18890 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18891 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18892 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18893 if (DAG.SignBitIsZero(N0))
18894 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18895 }
18896
18897 // The next optimizations are desirable only if SELECT_CC can be lowered.
18898 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18899 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18900 !VT.isVector() &&
18901 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18902 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18903 DAG.getConstantFP(0.0, DL, VT));
18904
18905 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18906 // (select (setcc x, y, cc), 1.0, 0.0)
18907 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18908 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18909 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18910 return DAG.getSelect(DL, VT, N0.getOperand(0),
18911 DAG.getConstantFP(1.0, DL, VT),
18912 DAG.getConstantFP(0.0, DL, VT));
18913
18914 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18915 return FTrunc;
18916
18917 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18918 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
18920 N0.getOperand(0).getValueType()))
18921 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
18922
18923 return SDValue();
18924}
18925
18926SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
18927 SDValue N0 = N->getOperand(0);
18928 EVT VT = N->getValueType(0);
18929 EVT OpVT = N0.getValueType();
18930 SDLoc DL(N);
18931
18932 // [us]itofp(undef) = 0, because the result value is bounded.
18933 if (N0.isUndef())
18934 return DAG.getConstantFP(0.0, DL, VT);
18935
18936 // fold (uint_to_fp c1) -> c1fp
18937 // ...but only if the target supports immediate floating-point values
18938 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18939 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
18940 return C;
18941
18942 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
18943 // but SINT_TO_FP is legal on this target, try to convert.
18944 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
18945 hasOperation(ISD::SINT_TO_FP, OpVT)) {
18946 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
18947 if (DAG.SignBitIsZero(N0))
18948 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
18949 }
18950
18951 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
18952 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
18953 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18954 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
18955 DAG.getConstantFP(0.0, DL, VT));
18956
18957 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18958 return FTrunc;
18959
18960 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
18961 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
18963 N0.getOperand(0).getValueType()))
18964 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
18965
18966 return SDValue();
18967}
18968
18969// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
18971 SDValue N0 = N->getOperand(0);
18972 EVT VT = N->getValueType(0);
18973
18974 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
18975 return SDValue();
18976
18977 SDValue Src = N0.getOperand(0);
18978 EVT SrcVT = Src.getValueType();
18979 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
18980 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
18981
18982 // We can safely assume the conversion won't overflow the output range,
18983 // because (for example) (uint8_t)18293.f is undefined behavior.
18984
18985 // Since we can assume the conversion won't overflow, our decision as to
18986 // whether the input will fit in the float should depend on the minimum
18987 // of the input range and output range.
18988
18989 // This means this is also safe for a signed input and unsigned output, since
18990 // a negative input would lead to undefined behavior.
18991 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
18992 unsigned OutputSize = (int)VT.getScalarSizeInBits();
18993 unsigned ActualSize = std::min(InputSize, OutputSize);
18994 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
18995
18996 // We can only fold away the float conversion if the input range can be
18997 // represented exactly in the float range.
18998 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
18999 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
19000 unsigned ExtOp =
19001 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
19002 return DAG.getNode(ExtOp, DL, VT, Src);
19003 }
19004 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
19005 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
19006 return DAG.getBitcast(VT, Src);
19007 }
19008 return SDValue();
19009}
19010
19011SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
19012 SDValue N0 = N->getOperand(0);
19013 EVT VT = N->getValueType(0);
19014 SDLoc DL(N);
19015
19016 // fold (fp_to_sint undef) -> undef
19017 if (N0.isUndef())
19018 return DAG.getUNDEF(VT);
19019
19020 // fold (fp_to_sint c1fp) -> c1
19021 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
19022 return C;
19023
19024 return FoldIntToFPToInt(N, DL, DAG);
19025}
19026
19027SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
19028 SDValue N0 = N->getOperand(0);
19029 EVT VT = N->getValueType(0);
19030 SDLoc DL(N);
19031
19032 // fold (fp_to_uint undef) -> undef
19033 if (N0.isUndef())
19034 return DAG.getUNDEF(VT);
19035
19036 // fold (fp_to_uint c1fp) -> c1
19037 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
19038 return C;
19039
19040 return FoldIntToFPToInt(N, DL, DAG);
19041}
19042
19043SDValue DAGCombiner::visitXROUND(SDNode *N) {
19044 SDValue N0 = N->getOperand(0);
19045 EVT VT = N->getValueType(0);
19046
19047 // fold (lrint|llrint undef) -> undef
19048 // fold (lround|llround undef) -> undef
19049 if (N0.isUndef())
19050 return DAG.getUNDEF(VT);
19051
19052 // fold (lrint|llrint c1fp) -> c1
19053 // fold (lround|llround c1fp) -> c1
19054 if (SDValue C =
19055 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
19056 return C;
19057
19058 return SDValue();
19059}
19060
19061SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
19062 SDValue N0 = N->getOperand(0);
19063 SDValue N1 = N->getOperand(1);
19064 EVT VT = N->getValueType(0);
19065 SDLoc DL(N);
19066
19067 // fold (fp_round c1fp) -> c1fp
19068 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
19069 return C;
19070
19071 // fold (fp_round (fp_extend x)) -> x
19072 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
19073 return N0.getOperand(0);
19074
19075 // fold (fp_round (fp_round x)) -> (fp_round x)
19076 if (N0.getOpcode() == ISD::FP_ROUND) {
19077 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
19078 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
19079
19080 // Avoid folding legal fp_rounds into non-legal ones.
19081 if (!hasOperation(ISD::FP_ROUND, VT))
19082 return SDValue();
19083
19084 // Skip this folding if it results in an fp_round from f80 to f16.
19085 //
19086 // f80 to f16 always generates an expensive (and as yet, unimplemented)
19087 // libcall to __truncxfhf2 instead of selecting native f16 conversion
19088 // instructions from f32 or f64. Moreover, the first (value-preserving)
19089 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19090 // x86.
19091 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
19092 return SDValue();
19093
19094 // If the first fp_round isn't a value preserving truncation, it might
19095 // introduce a tie in the second fp_round, that wouldn't occur in the
19096 // single-step fp_round we want to fold to.
19097 // In other words, double rounding isn't the same as rounding.
19098 // Also, this is a value preserving truncation iff both fp_round's are.
19099 if ((N->getFlags().hasAllowContract() &&
19100 N0->getFlags().hasAllowContract()) ||
19101 N0IsTrunc)
19102 return DAG.getNode(
19103 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19104 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19105 }
19106
19107 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19108 // Note: From a legality perspective, this is a two step transform. First,
19109 // we duplicate the fp_round to the arguments of the copysign, then we
19110 // eliminate the fp_round on Y. The second step requires an additional
19111 // predicate to match the implementation above.
19112 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19114 N0.getValueType())) {
19115 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19116 N0.getOperand(0), N1);
19117 AddToWorklist(Tmp.getNode());
19118 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19119 }
19120
19121 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19122 return NewVSel;
19123
19124 return SDValue();
19125}
19126
19127// Eliminate a floating-point widening of a narrowed value if the fast math
19128// flags allow it.
19130 SDValue N0 = N->getOperand(0);
19131 EVT VT = N->getValueType(0);
19132
19133 unsigned NarrowingOp;
19134 switch (N->getOpcode()) {
19135 case ISD::FP16_TO_FP:
19136 NarrowingOp = ISD::FP_TO_FP16;
19137 break;
19138 case ISD::BF16_TO_FP:
19139 NarrowingOp = ISD::FP_TO_BF16;
19140 break;
19141 case ISD::FP_EXTEND:
19142 NarrowingOp = ISD::FP_ROUND;
19143 break;
19144 default:
19145 llvm_unreachable("Expected widening FP cast");
19146 }
19147
19148 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19149 const SDNodeFlags NarrowFlags = N0->getFlags();
19150 const SDNodeFlags WidenFlags = N->getFlags();
19151 // Narrowing can introduce inf and change the encoding of a nan, so the
19152 // widen must have the nnan and ninf flags to indicate that we don't need to
19153 // care about that. We are also removing a rounding step, and that requires
19154 // both the narrow and widen to allow contraction.
19155 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19156 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19157 return N0.getOperand(0);
19158 }
19159 }
19160
19161 return SDValue();
19162}
19163
19164SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19165 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19166 SDValue N0 = N->getOperand(0);
19167 EVT VT = N->getValueType(0);
19168 SDLoc DL(N);
19169
19170 if (VT.isVector())
19171 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19172 return FoldedVOp;
19173
19174 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19175 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19176 return SDValue();
19177
19178 // fold (fp_extend c1fp) -> c1fp
19179 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19180 return C;
19181
19182 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19183 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19184 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
19185 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19186
19187 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19188 // value of X.
19189 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19190 SDValue In = N0.getOperand(0);
19191 if (In.getValueType() == VT) return In;
19192 if (VT.bitsLT(In.getValueType()))
19193 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19194 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19195 }
19196
19197 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19198 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19200 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19201 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19202 LN0->getChain(),
19203 LN0->getBasePtr(), N0.getValueType(),
19204 LN0->getMemOperand());
19205 CombineTo(N, ExtLoad);
19206 CombineTo(
19207 N0.getNode(),
19208 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19209 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19210 ExtLoad.getValue(1));
19211 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19212 }
19213
19214 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19215 return NewVSel;
19216
19217 if (SDValue CastEliminated = eliminateFPCastPair(N))
19218 return CastEliminated;
19219
19220 return SDValue();
19221}
19222
19223SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19224 SDValue N0 = N->getOperand(0);
19225 EVT VT = N->getValueType(0);
19226
19227 // fold (fceil c1) -> fceil(c1)
19228 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19229 return C;
19230
19231 return SDValue();
19232}
19233
19234SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19235 SDValue N0 = N->getOperand(0);
19236 EVT VT = N->getValueType(0);
19237
19238 // fold (ftrunc c1) -> ftrunc(c1)
19239 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19240 return C;
19241
19242 // fold ftrunc (known rounded int x) -> x
19243 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19244 // likely to be generated to extract integer from a rounded floating value.
19245 switch (N0.getOpcode()) {
19246 default: break;
19247 case ISD::FRINT:
19248 case ISD::FTRUNC:
19249 case ISD::FNEARBYINT:
19250 case ISD::FROUNDEVEN:
19251 case ISD::FFLOOR:
19252 case ISD::FCEIL:
19253 return N0;
19254 }
19255
19256 return SDValue();
19257}
19258
19259SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19260 SDValue N0 = N->getOperand(0);
19261
19262 // fold (ffrexp c1) -> ffrexp(c1)
19264 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19265 return SDValue();
19266}
19267
19268SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19269 SDValue N0 = N->getOperand(0);
19270 EVT VT = N->getValueType(0);
19271
19272 // fold (ffloor c1) -> ffloor(c1)
19273 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19274 return C;
19275
19276 return SDValue();
19277}
19278
19279SDValue DAGCombiner::visitFNEG(SDNode *N) {
19280 SDValue N0 = N->getOperand(0);
19281 EVT VT = N->getValueType(0);
19282 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19283
19284 // Constant fold FNEG.
19285 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19286 return C;
19287
19288 if (SDValue NegN0 =
19289 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19290 return NegN0;
19291
19292 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19293 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19294 // know it was called from a context with a nsz flag if the input fsub does
19295 // not.
19296 if (N0.getOpcode() == ISD::FSUB &&
19298 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
19299 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19300 N0.getOperand(0));
19301 }
19302
19304 return SDValue(N, 0);
19305
19306 if (SDValue Cast = foldSignChangeInBitcast(N))
19307 return Cast;
19308
19309 return SDValue();
19310}
19311
19312SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19313 SDValue N0 = N->getOperand(0);
19314 SDValue N1 = N->getOperand(1);
19315 EVT VT = N->getValueType(0);
19316 const SDNodeFlags Flags = N->getFlags();
19317 unsigned Opc = N->getOpcode();
19318 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19319 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
19320 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19321
19322 // Constant fold.
19323 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19324 return C;
19325
19326 // Canonicalize to constant on RHS.
19329 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19330
19331 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19332 const APFloat &AF = N1CFP->getValueAPF();
19333
19334 // minnum(X, nan) -> X
19335 // maxnum(X, nan) -> X
19336 // minimum(X, nan) -> nan
19337 // maximum(X, nan) -> nan
19338 if (AF.isNaN())
19339 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
19340
19341 // In the following folds, inf can be replaced with the largest finite
19342 // float, if the ninf flag is set.
19343 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19344 // minnum(X, -inf) -> -inf
19345 // maxnum(X, +inf) -> +inf
19346 // minimum(X, -inf) -> -inf if nnan
19347 // maximum(X, +inf) -> +inf if nnan
19348 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
19349 return N->getOperand(1);
19350
19351 // minnum(X, +inf) -> X if nnan
19352 // maxnum(X, -inf) -> X if nnan
19353 // minimum(X, +inf) -> X
19354 // maximum(X, -inf) -> X
19355 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
19356 return N->getOperand(0);
19357 }
19358 }
19359
19360 if (SDValue SD = reassociateReduction(
19361 PropagatesNaN
19362 ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
19363 : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
19364 Opc, SDLoc(N), VT, N0, N1, Flags))
19365 return SD;
19366
19367 return SDValue();
19368}
19369
19370SDValue DAGCombiner::visitFABS(SDNode *N) {
19371 SDValue N0 = N->getOperand(0);
19372 EVT VT = N->getValueType(0);
19373 SDLoc DL(N);
19374
19375 // fold (fabs c1) -> fabs(c1)
19376 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19377 return C;
19378
19380 return SDValue(N, 0);
19381
19382 if (SDValue Cast = foldSignChangeInBitcast(N))
19383 return Cast;
19384
19385 return SDValue();
19386}
19387
19388SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19389 SDValue Chain = N->getOperand(0);
19390 SDValue N1 = N->getOperand(1);
19391 SDValue N2 = N->getOperand(2);
19392
19393 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19394 // nondeterministic jumps).
19395 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19396 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19397 N1->getOperand(0), N2, N->getFlags());
19398 }
19399
19400 // Variant of the previous fold where there is a SETCC in between:
19401 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19402 // =>
19403 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19404 // =>
19405 // BRCOND(SETCC(X, CONST, Cond))
19406 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19407 // isn't equivalent to true or false.
19408 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19409 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19410 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19411 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19413 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19414 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19415 bool Updated = false;
19416
19417 // Is 'X Cond C' always true or false?
19418 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19419 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19420 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19421 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19422 (Cond == ISD::SETGT && C->isMaxSignedValue());
19423 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19424 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19425 (Cond == ISD::SETUGE && C->isZero()) ||
19426 (Cond == ISD::SETGE && C->isMinSignedValue());
19427 return True || False;
19428 };
19429
19430 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19431 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19432 S0 = S0->getOperand(0);
19433 Updated = true;
19434 }
19435 }
19436 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19437 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19438 S1 = S1->getOperand(0);
19439 Updated = true;
19440 }
19441 }
19442
19443 if (Updated)
19444 return DAG.getNode(
19445 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19446 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19447 N->getFlags());
19448 }
19449
19450 // If N is a constant we could fold this into a fallthrough or unconditional
19451 // branch. However that doesn't happen very often in normal code, because
19452 // Instcombine/SimplifyCFG should have handled the available opportunities.
19453 // If we did this folding here, it would be necessary to update the
19454 // MachineBasicBlock CFG, which is awkward.
19455
19456 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19457 // on the target, also copy fast math flags.
19458 if (N1.getOpcode() == ISD::SETCC &&
19459 TLI.isOperationLegalOrCustom(ISD::BR_CC,
19460 N1.getOperand(0).getValueType())) {
19461 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
19462 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
19463 N1->getFlags());
19464 }
19465
19466 if (N1.hasOneUse()) {
19467 // rebuildSetCC calls visitXor which may change the Chain when there is a
19468 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19469 HandleSDNode ChainHandle(Chain);
19470 if (SDValue NewN1 = rebuildSetCC(N1))
19471 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19472 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19473 }
19474
19475 return SDValue();
19476}
19477
19478SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19479 if (N.getOpcode() == ISD::SRL ||
19480 (N.getOpcode() == ISD::TRUNCATE &&
19481 (N.getOperand(0).hasOneUse() &&
19482 N.getOperand(0).getOpcode() == ISD::SRL))) {
19483 // Look pass the truncate.
19484 if (N.getOpcode() == ISD::TRUNCATE)
19485 N = N.getOperand(0);
19486
19487 // Match this pattern so that we can generate simpler code:
19488 //
19489 // %a = ...
19490 // %b = and i32 %a, 2
19491 // %c = srl i32 %b, 1
19492 // brcond i32 %c ...
19493 //
19494 // into
19495 //
19496 // %a = ...
19497 // %b = and i32 %a, 2
19498 // %c = setcc eq %b, 0
19499 // brcond %c ...
19500 //
19501 // This applies only when the AND constant value has one bit set and the
19502 // SRL constant is equal to the log2 of the AND constant. The back-end is
19503 // smart enough to convert the result into a TEST/JMP sequence.
19504 SDValue Op0 = N.getOperand(0);
19505 SDValue Op1 = N.getOperand(1);
19506
19507 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19508 SDValue AndOp1 = Op0.getOperand(1);
19509
19510 if (AndOp1.getOpcode() == ISD::Constant) {
19511 const APInt &AndConst = AndOp1->getAsAPIntVal();
19512
19513 if (AndConst.isPowerOf2() &&
19514 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19515 SDLoc DL(N);
19516 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19517 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19518 ISD::SETNE);
19519 }
19520 }
19521 }
19522 }
19523
19524 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19525 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19526 if (N.getOpcode() == ISD::XOR) {
19527 // Because we may call this on a speculatively constructed
19528 // SimplifiedSetCC Node, we need to simplify this node first.
19529 // Ideally this should be folded into SimplifySetCC and not
19530 // here. For now, grab a handle to N so we don't lose it from
19531 // replacements interal to the visit.
19532 HandleSDNode XORHandle(N);
19533 while (N.getOpcode() == ISD::XOR) {
19534 SDValue Tmp = visitXOR(N.getNode());
19535 // No simplification done.
19536 if (!Tmp.getNode())
19537 break;
19538 // Returning N is form in-visit replacement that may invalidated
19539 // N. Grab value from Handle.
19540 if (Tmp.getNode() == N.getNode())
19541 N = XORHandle.getValue();
19542 else // Node simplified. Try simplifying again.
19543 N = Tmp;
19544 }
19545
19546 if (N.getOpcode() != ISD::XOR)
19547 return N;
19548
19549 SDValue Op0 = N->getOperand(0);
19550 SDValue Op1 = N->getOperand(1);
19551
19552 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19553 bool Equal = false;
19554 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19555 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19556 Op0.getValueType() == MVT::i1) {
19557 N = Op0;
19558 Op0 = N->getOperand(0);
19559 Op1 = N->getOperand(1);
19560 Equal = true;
19561 }
19562
19563 EVT SetCCVT = N.getValueType();
19564 if (LegalTypes)
19565 SetCCVT = getSetCCResultType(SetCCVT);
19566 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19567 // it would introduce illegal operations post-legalization as this can
19568 // result in infinite looping between converting xor->setcc here, and
19569 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19571 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19572 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19573 }
19574 }
19575
19576 return SDValue();
19577}
19578
19579// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19580//
19581SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19582 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19583 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19584
19585 // If N is a constant we could fold this into a fallthrough or unconditional
19586 // branch. However that doesn't happen very often in normal code, because
19587 // Instcombine/SimplifyCFG should have handled the available opportunities.
19588 // If we did this folding here, it would be necessary to update the
19589 // MachineBasicBlock CFG, which is awkward.
19590
19591 // Use SimplifySetCC to simplify SETCC's.
19593 CondLHS, CondRHS, CC->get(), SDLoc(N),
19594 false);
19595 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19596
19597 // fold to a simpler setcc
19598 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19599 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19600 N->getOperand(0), Simp.getOperand(2),
19601 Simp.getOperand(0), Simp.getOperand(1),
19602 N->getOperand(4));
19603
19604 return SDValue();
19605}
19606
19607static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19608 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19609 const TargetLowering &TLI) {
19610 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19611 if (LD->isIndexed())
19612 return false;
19613 EVT VT = LD->getMemoryVT();
19614 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
19615 return false;
19616 Ptr = LD->getBasePtr();
19617 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19618 if (ST->isIndexed())
19619 return false;
19620 EVT VT = ST->getMemoryVT();
19621 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
19622 return false;
19623 Ptr = ST->getBasePtr();
19624 IsLoad = false;
19625 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19626 if (LD->isIndexed())
19627 return false;
19628 EVT VT = LD->getMemoryVT();
19629 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
19630 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
19631 return false;
19632 Ptr = LD->getBasePtr();
19633 IsMasked = true;
19635 if (ST->isIndexed())
19636 return false;
19637 EVT VT = ST->getMemoryVT();
19638 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
19639 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
19640 return false;
19641 Ptr = ST->getBasePtr();
19642 IsLoad = false;
19643 IsMasked = true;
19644 } else {
19645 return false;
19646 }
19647 return true;
19648}
19649
19650/// Try turning a load/store into a pre-indexed load/store when the base
19651/// pointer is an add or subtract and it has other uses besides the load/store.
19652/// After the transformation, the new indexed load/store has effectively folded
19653/// the add/subtract in and all of its other uses are redirected to the
19654/// new load/store.
19655bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
19656 if (Level < AfterLegalizeDAG)
19657 return false;
19658
19659 bool IsLoad = true;
19660 bool IsMasked = false;
19661 SDValue Ptr;
19662 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
19663 Ptr, TLI))
19664 return false;
19665
19666 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
19667 // out. There is no reason to make this a preinc/predec.
19668 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
19669 Ptr->hasOneUse())
19670 return false;
19671
19672 // Ask the target to do addressing mode selection.
19676 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
19677 return false;
19678
19679 // Backends without true r+i pre-indexed forms may need to pass a
19680 // constant base with a variable offset so that constant coercion
19681 // will work with the patterns in canonical form.
19682 bool Swapped = false;
19683 if (isa<ConstantSDNode>(BasePtr)) {
19684 std::swap(BasePtr, Offset);
19685 Swapped = true;
19686 }
19687
19688 // Don't create a indexed load / store with zero offset.
19690 return false;
19691
19692 // Try turning it into a pre-indexed load / store except when:
19693 // 1) The new base ptr is a frame index.
19694 // 2) If N is a store and the new base ptr is either the same as or is a
19695 // predecessor of the value being stored.
19696 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
19697 // that would create a cycle.
19698 // 4) All uses are load / store ops that use it as old base ptr.
19699
19700 // Check #1. Preinc'ing a frame index would require copying the stack pointer
19701 // (plus the implicit offset) to a register to preinc anyway.
19702 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19703 return false;
19704
19705 // Check #2.
19706 if (!IsLoad) {
19707 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
19708 : cast<StoreSDNode>(N)->getValue();
19709
19710 // Would require a copy.
19711 if (Val == BasePtr)
19712 return false;
19713
19714 // Would create a cycle.
19715 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
19716 return false;
19717 }
19718
19719 // Caches for hasPredecessorHelper.
19720 SmallPtrSet<const SDNode *, 32> Visited;
19722 Worklist.push_back(N);
19723
19724 // If the offset is a constant, there may be other adds of constants that
19725 // can be folded with this one. We should do this to avoid having to keep
19726 // a copy of the original base pointer.
19727 SmallVector<SDNode *, 16> OtherUses;
19730 for (SDUse &Use : BasePtr->uses()) {
19731 // Skip the use that is Ptr and uses of other results from BasePtr's
19732 // node (important for nodes that return multiple results).
19733 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
19734 continue;
19735
19736 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
19737 MaxSteps))
19738 continue;
19739
19740 if (Use.getUser()->getOpcode() != ISD::ADD &&
19741 Use.getUser()->getOpcode() != ISD::SUB) {
19742 OtherUses.clear();
19743 break;
19744 }
19745
19746 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
19747 if (!isa<ConstantSDNode>(Op1)) {
19748 OtherUses.clear();
19749 break;
19750 }
19751
19752 // FIXME: In some cases, we can be smarter about this.
19753 if (Op1.getValueType() != Offset.getValueType()) {
19754 OtherUses.clear();
19755 break;
19756 }
19757
19758 OtherUses.push_back(Use.getUser());
19759 }
19760
19761 if (Swapped)
19762 std::swap(BasePtr, Offset);
19763
19764 // Now check for #3 and #4.
19765 bool RealUse = false;
19766
19767 for (SDNode *User : Ptr->users()) {
19768 if (User == N)
19769 continue;
19770 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
19771 return false;
19772
19773 // If Ptr may be folded in addressing mode of other use, then it's
19774 // not profitable to do this transformation.
19775 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
19776 RealUse = true;
19777 }
19778
19779 if (!RealUse)
19780 return false;
19781
19783 if (!IsMasked) {
19784 if (IsLoad)
19785 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19786 else
19787 Result =
19788 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19789 } else {
19790 if (IsLoad)
19791 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19792 Offset, AM);
19793 else
19794 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19795 Offset, AM);
19796 }
19797 ++PreIndexedNodes;
19798 ++NodesCombined;
19799 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19800 Result.dump(&DAG); dbgs() << '\n');
19801 WorklistRemover DeadNodes(*this);
19802 if (IsLoad) {
19803 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19804 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19805 } else {
19806 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19807 }
19808
19809 // Finally, since the node is now dead, remove it from the graph.
19810 deleteAndRecombine(N);
19811
19812 if (Swapped)
19813 std::swap(BasePtr, Offset);
19814
19815 // Replace other uses of BasePtr that can be updated to use Ptr
19816 for (SDNode *OtherUse : OtherUses) {
19817 unsigned OffsetIdx = 1;
19818 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19819 OffsetIdx = 0;
19820 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
19821 "Expected BasePtr operand");
19822
19823 // We need to replace ptr0 in the following expression:
19824 // x0 * offset0 + y0 * ptr0 = t0
19825 // knowing that
19826 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19827 //
19828 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19829 // indexed load/store and the expression that needs to be re-written.
19830 //
19831 // Therefore, we have:
19832 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19833
19834 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
19835 const APInt &Offset0 = CN->getAPIntValue();
19836 const APInt &Offset1 = Offset->getAsAPIntVal();
19837 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19838 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19839 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19840 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19841
19842 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19843
19844 APInt CNV = Offset0;
19845 if (X0 < 0) CNV = -CNV;
19846 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19847 else CNV = CNV - Offset1;
19848
19849 SDLoc DL(OtherUse);
19850
19851 // We can now generate the new expression.
19852 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19853 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19854
19855 SDValue NewUse =
19856 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
19857 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
19858 deleteAndRecombine(OtherUse);
19859 }
19860
19861 // Replace the uses of Ptr with uses of the updated base value.
19862 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19863 deleteAndRecombine(Ptr.getNode());
19864 AddToWorklist(Result.getNode());
19865
19866 return true;
19867}
19868
19870 SDValue &BasePtr, SDValue &Offset,
19872 SelectionDAG &DAG,
19873 const TargetLowering &TLI) {
19874 if (PtrUse == N ||
19875 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19876 return false;
19877
19878 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19879 return false;
19880
19881 // Don't create a indexed load / store with zero offset.
19883 return false;
19884
19885 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19886 return false;
19887
19890 for (SDNode *User : BasePtr->users()) {
19891 if (User == Ptr.getNode())
19892 continue;
19893
19894 // No if there's a later user which could perform the index instead.
19895 if (isa<MemSDNode>(User)) {
19896 bool IsLoad = true;
19897 bool IsMasked = false;
19898 SDValue OtherPtr;
19900 IsMasked, OtherPtr, TLI)) {
19902 Worklist.push_back(User);
19903 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
19904 return false;
19905 }
19906 }
19907
19908 // If all the uses are load / store addresses, then don't do the
19909 // transformation.
19910 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
19911 for (SDNode *UserUser : User->users())
19912 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
19913 return false;
19914 }
19915 }
19916 return true;
19917}
19918
19920 bool &IsMasked, SDValue &Ptr,
19921 SDValue &BasePtr, SDValue &Offset,
19923 SelectionDAG &DAG,
19924 const TargetLowering &TLI) {
19926 IsMasked, Ptr, TLI) ||
19927 Ptr->hasOneUse())
19928 return nullptr;
19929
19930 // Try turning it into a post-indexed load / store except when
19931 // 1) All uses are load / store ops that use it as base ptr (and
19932 // it may be folded as addressing mmode).
19933 // 2) Op must be independent of N, i.e. Op is neither a predecessor
19934 // nor a successor of N. Otherwise, if Op is folded that would
19935 // create a cycle.
19937 for (SDNode *Op : Ptr->users()) {
19938 // Check for #1.
19939 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
19940 continue;
19941
19942 // Check for #2.
19945 // Ptr is predecessor to both N and Op.
19946 Visited.insert(Ptr.getNode());
19947 Worklist.push_back(N);
19948 Worklist.push_back(Op);
19949 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
19950 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
19951 return Op;
19952 }
19953 return nullptr;
19954}
19955
19956/// Try to combine a load/store with a add/sub of the base pointer node into a
19957/// post-indexed load/store. The transformation folded the add/subtract into the
19958/// new indexed load/store effectively and all of its uses are redirected to the
19959/// new load/store.
19960bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
19961 if (Level < AfterLegalizeDAG)
19962 return false;
19963
19964 bool IsLoad = true;
19965 bool IsMasked = false;
19966 SDValue Ptr;
19970 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
19971 Offset, AM, DAG, TLI);
19972 if (!Op)
19973 return false;
19974
19976 if (!IsMasked)
19977 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19978 Offset, AM)
19979 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
19980 BasePtr, Offset, AM);
19981 else
19982 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
19983 BasePtr, Offset, AM)
19984 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
19985 BasePtr, Offset, AM);
19986 ++PostIndexedNodes;
19987 ++NodesCombined;
19988 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
19989 Result.dump(&DAG); dbgs() << '\n');
19990 WorklistRemover DeadNodes(*this);
19991 if (IsLoad) {
19992 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19993 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19994 } else {
19995 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19996 }
19997
19998 // Finally, since the node is now dead, remove it from the graph.
19999 deleteAndRecombine(N);
20000
20001 // Replace the uses of Use with uses of the updated base value.
20003 Result.getValue(IsLoad ? 1 : 0));
20004 deleteAndRecombine(Op);
20005 return true;
20006}
20007
20008/// Return the base-pointer arithmetic from an indexed \p LD.
20009SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
20010 ISD::MemIndexedMode AM = LD->getAddressingMode();
20011 assert(AM != ISD::UNINDEXED);
20012 SDValue BP = LD->getOperand(1);
20013 SDValue Inc = LD->getOperand(2);
20014
20015 // Some backends use TargetConstants for load offsets, but don't expect
20016 // TargetConstants in general ADD nodes. We can convert these constants into
20017 // regular Constants (if the constant is not opaque).
20019 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
20020 "Cannot split out indexing using opaque target constants");
20021 if (Inc.getOpcode() == ISD::TargetConstant) {
20022 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
20023 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
20024 ConstInc->getValueType(0));
20025 }
20026
20027 unsigned Opc =
20028 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
20029 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
20030}
20031
20033 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
20034}
20035
20036bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
20037 EVT STType = Val.getValueType();
20038 EVT STMemType = ST->getMemoryVT();
20039 if (STType == STMemType)
20040 return true;
20041 if (isTypeLegal(STMemType))
20042 return false; // fail.
20043 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
20044 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
20045 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
20046 return true;
20047 }
20048 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
20049 STType.isInteger() && STMemType.isInteger()) {
20050 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
20051 return true;
20052 }
20053 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
20054 Val = DAG.getBitcast(STMemType, Val);
20055 return true;
20056 }
20057 return false; // fail.
20058}
20059
20060bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
20061 EVT LDMemType = LD->getMemoryVT();
20062 EVT LDType = LD->getValueType(0);
20063 assert(Val.getValueType() == LDMemType &&
20064 "Attempting to extend value of non-matching type");
20065 if (LDType == LDMemType)
20066 return true;
20067 if (LDMemType.isInteger() && LDType.isInteger()) {
20068 switch (LD->getExtensionType()) {
20069 case ISD::NON_EXTLOAD:
20070 Val = DAG.getBitcast(LDType, Val);
20071 return true;
20072 case ISD::EXTLOAD:
20073 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
20074 return true;
20075 case ISD::SEXTLOAD:
20076 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
20077 return true;
20078 case ISD::ZEXTLOAD:
20079 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
20080 return true;
20081 }
20082 }
20083 return false;
20084}
20085
20086StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
20087 int64_t &Offset) {
20088 SDValue Chain = LD->getOperand(0);
20089
20090 // Look through CALLSEQ_START.
20091 if (Chain.getOpcode() == ISD::CALLSEQ_START)
20092 Chain = Chain->getOperand(0);
20093
20094 StoreSDNode *ST = nullptr;
20096 if (Chain.getOpcode() == ISD::TokenFactor) {
20097 // Look for unique store within the TokenFactor.
20098 for (SDValue Op : Chain->ops()) {
20099 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
20100 if (!Store)
20101 continue;
20102 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20103 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20104 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20105 continue;
20106 // Make sure the store is not aliased with any nodes in TokenFactor.
20107 GatherAllAliases(Store, Chain, Aliases);
20108 if (Aliases.empty() ||
20109 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
20110 ST = Store;
20111 break;
20112 }
20113 } else {
20114 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
20115 if (Store) {
20116 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20117 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20118 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20119 ST = Store;
20120 }
20121 }
20122
20123 return ST;
20124}
20125
20126SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
20127 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
20128 return SDValue();
20129 SDValue Chain = LD->getOperand(0);
20130 int64_t Offset;
20131
20132 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
20133 // TODO: Relax this restriction for unordered atomics (see D66309)
20134 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
20135 return SDValue();
20136
20137 EVT LDType = LD->getValueType(0);
20138 EVT LDMemType = LD->getMemoryVT();
20139 EVT STMemType = ST->getMemoryVT();
20140 EVT STType = ST->getValue().getValueType();
20141
20142 // There are two cases to consider here:
20143 // 1. The store is fixed width and the load is scalable. In this case we
20144 // don't know at compile time if the store completely envelops the load
20145 // so we abandon the optimisation.
20146 // 2. The store is scalable and the load is fixed width. We could
20147 // potentially support a limited number of cases here, but there has been
20148 // no cost-benefit analysis to prove it's worth it.
20149 bool LdStScalable = LDMemType.isScalableVT();
20150 if (LdStScalable != STMemType.isScalableVT())
20151 return SDValue();
20152
20153 // If we are dealing with scalable vectors on a big endian platform the
20154 // calculation of offsets below becomes trickier, since we do not know at
20155 // compile time the absolute size of the vector. Until we've done more
20156 // analysis on big-endian platforms it seems better to bail out for now.
20157 if (LdStScalable && DAG.getDataLayout().isBigEndian())
20158 return SDValue();
20159
20160 // Normalize for Endianness. After this Offset=0 will denote that the least
20161 // significant bit in the loaded value maps to the least significant bit in
20162 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
20163 // n:th least significant byte of the stored value.
20164 int64_t OrigOffset = Offset;
20165 if (DAG.getDataLayout().isBigEndian())
20166 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
20167 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
20168 8 -
20169 Offset;
20170
20171 // Check that the stored value cover all bits that are loaded.
20172 bool STCoversLD;
20173
20174 TypeSize LdMemSize = LDMemType.getSizeInBits();
20175 TypeSize StMemSize = STMemType.getSizeInBits();
20176 if (LdStScalable)
20177 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
20178 else
20179 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
20180 StMemSize.getFixedValue());
20181
20182 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
20183 if (LD->isIndexed()) {
20184 // Cannot handle opaque target constants and we must respect the user's
20185 // request not to split indexes from loads.
20186 if (!canSplitIdx(LD))
20187 return SDValue();
20188 SDValue Idx = SplitIndexingFromLoad(LD);
20189 SDValue Ops[] = {Val, Idx, Chain};
20190 return CombineTo(LD, Ops, 3);
20191 }
20192 return CombineTo(LD, Val, Chain);
20193 };
20194
20195 if (!STCoversLD)
20196 return SDValue();
20197
20198 // Memory as copy space (potentially masked).
20199 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
20200 // Simple case: Direct non-truncating forwarding
20201 if (LDType.getSizeInBits() == LdMemSize)
20202 return ReplaceLd(LD, ST->getValue(), Chain);
20203 // Can we model the truncate and extension with an and mask?
20204 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
20205 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
20206 // Mask to size of LDMemType
20207 auto Mask =
20209 StMemSize.getFixedValue()),
20210 SDLoc(ST), STType);
20211 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
20212 return ReplaceLd(LD, Val, Chain);
20213 }
20214 }
20215
20216 // Handle some cases for big-endian that would be Offset 0 and handled for
20217 // little-endian.
20218 SDValue Val = ST->getValue();
20219 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
20220 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
20221 !LDType.isVector() && isTypeLegal(STType) &&
20222 TLI.isOperationLegal(ISD::SRL, STType)) {
20223 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
20224 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
20225 Offset = 0;
20226 }
20227 }
20228
20229 // TODO: Deal with nonzero offset.
20230 if (LD->getBasePtr().isUndef() || Offset != 0)
20231 return SDValue();
20232 // Model necessary truncations / extenstions.
20233 // Truncate Value To Stored Memory Size.
20234 do {
20235 if (!getTruncatedStoreValue(ST, Val))
20236 break;
20237 if (!isTypeLegal(LDMemType))
20238 break;
20239 if (STMemType != LDMemType) {
20240 // TODO: Support vectors? This requires extract_subvector/bitcast.
20241 if (!STMemType.isVector() && !LDMemType.isVector() &&
20242 STMemType.isInteger() && LDMemType.isInteger())
20243 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
20244 else
20245 break;
20246 }
20247 if (!extendLoadedValueToExtension(LD, Val))
20248 break;
20249 return ReplaceLd(LD, Val, Chain);
20250 } while (false);
20251
20252 // On failure, cleanup dead nodes we may have created.
20253 if (Val->use_empty())
20254 deleteAndRecombine(Val.getNode());
20255 return SDValue();
20256}
20257
20258SDValue DAGCombiner::visitLOAD(SDNode *N) {
20259 LoadSDNode *LD = cast<LoadSDNode>(N);
20260 SDValue Chain = LD->getChain();
20261 SDValue Ptr = LD->getBasePtr();
20262
20263 // If load is not volatile and there are no uses of the loaded value (and
20264 // the updated indexed value in case of indexed loads), change uses of the
20265 // chain value into uses of the chain input (i.e. delete the dead load).
20266 // TODO: Allow this for unordered atomics (see D66309)
20267 if (LD->isSimple()) {
20268 if (N->getValueType(1) == MVT::Other) {
20269 // Unindexed loads.
20270 if (!N->hasAnyUseOfValue(0)) {
20271 // It's not safe to use the two value CombineTo variant here. e.g.
20272 // v1, chain2 = load chain1, loc
20273 // v2, chain3 = load chain2, loc
20274 // v3 = add v2, c
20275 // Now we replace use of chain2 with chain1. This makes the second load
20276 // isomorphic to the one we are deleting, and thus makes this load live.
20277 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
20278 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
20279 dbgs() << "\n");
20280 WorklistRemover DeadNodes(*this);
20281 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20282 AddUsersToWorklist(Chain.getNode());
20283 if (N->use_empty())
20284 deleteAndRecombine(N);
20285
20286 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20287 }
20288 } else {
20289 // Indexed loads.
20290 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
20291
20292 // If this load has an opaque TargetConstant offset, then we cannot split
20293 // the indexing into an add/sub directly (that TargetConstant may not be
20294 // valid for a different type of node, and we cannot convert an opaque
20295 // target constant into a regular constant).
20296 bool CanSplitIdx = canSplitIdx(LD);
20297
20298 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
20299 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
20300 SDValue Index;
20301 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
20302 Index = SplitIndexingFromLoad(LD);
20303 // Try to fold the base pointer arithmetic into subsequent loads and
20304 // stores.
20305 AddUsersToWorklist(N);
20306 } else
20307 Index = DAG.getUNDEF(N->getValueType(1));
20308 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
20309 dbgs() << "\nWith: "; Undef.dump(&DAG);
20310 dbgs() << " and 2 other values\n");
20311 WorklistRemover DeadNodes(*this);
20312 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
20313 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
20314 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
20315 deleteAndRecombine(N);
20316 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20317 }
20318 }
20319 }
20320
20321 // If this load is directly stored, replace the load value with the stored
20322 // value.
20323 if (auto V = ForwardStoreValueToDirectLoad(LD))
20324 return V;
20325
20326 // Try to infer better alignment information than the load already has.
20327 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
20328 !LD->isAtomic()) {
20329 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
20330 if (*Alignment > LD->getAlign() &&
20331 isAligned(*Alignment, LD->getSrcValueOffset())) {
20332 SDValue NewLoad = DAG.getExtLoad(
20333 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
20334 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
20335 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20336 // NewLoad will always be N as we are only refining the alignment
20337 assert(NewLoad.getNode() == N);
20338 (void)NewLoad;
20339 }
20340 }
20341 }
20342
20343 if (LD->isUnindexed()) {
20344 // Walk up chain skipping non-aliasing memory nodes.
20345 SDValue BetterChain = FindBetterChain(LD, Chain);
20346
20347 // If there is a better chain.
20348 if (Chain != BetterChain) {
20349 SDValue ReplLoad;
20350
20351 // Replace the chain to void dependency.
20352 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
20353 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
20354 BetterChain, Ptr, LD->getMemOperand());
20355 } else {
20356 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
20357 LD->getValueType(0),
20358 BetterChain, Ptr, LD->getMemoryVT(),
20359 LD->getMemOperand());
20360 }
20361
20362 // Create token factor to keep old chain connected.
20363 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
20364 MVT::Other, Chain, ReplLoad.getValue(1));
20365
20366 // Replace uses with load result and token factor
20367 return CombineTo(N, ReplLoad.getValue(0), Token);
20368 }
20369 }
20370
20371 // Try transforming N to an indexed load.
20372 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
20373 return SDValue(N, 0);
20374
20375 // Try to slice up N to more direct loads if the slices are mapped to
20376 // different register banks or pairing can take place.
20377 if (SliceUpLoad(N))
20378 return SDValue(N, 0);
20379
20380 return SDValue();
20381}
20382
20383namespace {
20384
20385/// Helper structure used to slice a load in smaller loads.
20386/// Basically a slice is obtained from the following sequence:
20387/// Origin = load Ty1, Base
20388/// Shift = srl Ty1 Origin, CstTy Amount
20389/// Inst = trunc Shift to Ty2
20390///
20391/// Then, it will be rewritten into:
20392/// Slice = load SliceTy, Base + SliceOffset
20393/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
20394///
20395/// SliceTy is deduced from the number of bits that are actually used to
20396/// build Inst.
20397struct LoadedSlice {
20398 /// Helper structure used to compute the cost of a slice.
20399 struct Cost {
20400 /// Are we optimizing for code size.
20401 bool ForCodeSize = false;
20402
20403 /// Various cost.
20404 unsigned Loads = 0;
20405 unsigned Truncates = 0;
20406 unsigned CrossRegisterBanksCopies = 0;
20407 unsigned ZExts = 0;
20408 unsigned Shift = 0;
20409
20410 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
20411
20412 /// Get the cost of one isolated slice.
20413 Cost(const LoadedSlice &LS, bool ForCodeSize)
20414 : ForCodeSize(ForCodeSize), Loads(1) {
20415 EVT TruncType = LS.Inst->getValueType(0);
20416 EVT LoadedType = LS.getLoadedType();
20417 if (TruncType != LoadedType &&
20418 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
20419 ZExts = 1;
20420 }
20421
20422 /// Account for slicing gain in the current cost.
20423 /// Slicing provide a few gains like removing a shift or a
20424 /// truncate. This method allows to grow the cost of the original
20425 /// load with the gain from this slice.
20426 void addSliceGain(const LoadedSlice &LS) {
20427 // Each slice saves a truncate.
20428 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
20429 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
20430 ++Truncates;
20431 // If there is a shift amount, this slice gets rid of it.
20432 if (LS.Shift)
20433 ++Shift;
20434 // If this slice can merge a cross register bank copy, account for it.
20435 if (LS.canMergeExpensiveCrossRegisterBankCopy())
20436 ++CrossRegisterBanksCopies;
20437 }
20438
20439 Cost &operator+=(const Cost &RHS) {
20440 Loads += RHS.Loads;
20441 Truncates += RHS.Truncates;
20442 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
20443 ZExts += RHS.ZExts;
20444 Shift += RHS.Shift;
20445 return *this;
20446 }
20447
20448 bool operator==(const Cost &RHS) const {
20449 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
20450 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
20451 ZExts == RHS.ZExts && Shift == RHS.Shift;
20452 }
20453
20454 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
20455
20456 bool operator<(const Cost &RHS) const {
20457 // Assume cross register banks copies are as expensive as loads.
20458 // FIXME: Do we want some more target hooks?
20459 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
20460 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
20461 // Unless we are optimizing for code size, consider the
20462 // expensive operation first.
20463 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
20464 return ExpensiveOpsLHS < ExpensiveOpsRHS;
20465 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
20466 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
20467 }
20468
20469 bool operator>(const Cost &RHS) const { return RHS < *this; }
20470
20471 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
20472
20473 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
20474 };
20475
20476 // The last instruction that represent the slice. This should be a
20477 // truncate instruction.
20478 SDNode *Inst;
20479
20480 // The original load instruction.
20481 LoadSDNode *Origin;
20482
20483 // The right shift amount in bits from the original load.
20484 unsigned Shift;
20485
20486 // The DAG from which Origin came from.
20487 // This is used to get some contextual information about legal types, etc.
20488 SelectionDAG *DAG;
20489
20490 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
20491 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
20492 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
20493
20494 /// Get the bits used in a chunk of bits \p BitWidth large.
20495 /// \return Result is \p BitWidth and has used bits set to 1 and
20496 /// not used bits set to 0.
20497 APInt getUsedBits() const {
20498 // Reproduce the trunc(lshr) sequence:
20499 // - Start from the truncated value.
20500 // - Zero extend to the desired bit width.
20501 // - Shift left.
20502 assert(Origin && "No original load to compare against.");
20503 unsigned BitWidth = Origin->getValueSizeInBits(0);
20504 assert(Inst && "This slice is not bound to an instruction");
20505 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
20506 "Extracted slice is bigger than the whole type!");
20507 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
20508 UsedBits.setAllBits();
20509 UsedBits = UsedBits.zext(BitWidth);
20510 UsedBits <<= Shift;
20511 return UsedBits;
20512 }
20513
20514 /// Get the size of the slice to be loaded in bytes.
20515 unsigned getLoadedSize() const {
20516 unsigned SliceSize = getUsedBits().popcount();
20517 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
20518 return SliceSize / 8;
20519 }
20520
20521 /// Get the type that will be loaded for this slice.
20522 /// Note: This may not be the final type for the slice.
20523 EVT getLoadedType() const {
20524 assert(DAG && "Missing context");
20525 LLVMContext &Ctxt = *DAG->getContext();
20526 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
20527 }
20528
20529 /// Get the alignment of the load used for this slice.
20530 Align getAlign() const {
20531 Align Alignment = Origin->getAlign();
20532 uint64_t Offset = getOffsetFromBase();
20533 if (Offset != 0)
20534 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
20535 return Alignment;
20536 }
20537
20538 /// Check if this slice can be rewritten with legal operations.
20539 bool isLegal() const {
20540 // An invalid slice is not legal.
20541 if (!Origin || !Inst || !DAG)
20542 return false;
20543
20544 // Offsets are for indexed load only, we do not handle that.
20545 if (!Origin->getOffset().isUndef())
20546 return false;
20547
20548 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20549
20550 // Check that the type is legal.
20551 EVT SliceType = getLoadedType();
20552 if (!TLI.isTypeLegal(SliceType))
20553 return false;
20554
20555 // Check that the load is legal for this type.
20556 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
20557 return false;
20558
20559 // Check that the offset can be computed.
20560 // 1. Check its type.
20561 EVT PtrType = Origin->getBasePtr().getValueType();
20562 if (PtrType == MVT::Untyped || PtrType.isExtended())
20563 return false;
20564
20565 // 2. Check that it fits in the immediate.
20566 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
20567 return false;
20568
20569 // 3. Check that the computation is legal.
20570 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
20571 return false;
20572
20573 // Check that the zext is legal if it needs one.
20574 EVT TruncateType = Inst->getValueType(0);
20575 if (TruncateType != SliceType &&
20576 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
20577 return false;
20578
20579 return true;
20580 }
20581
20582 /// Get the offset in bytes of this slice in the original chunk of
20583 /// bits.
20584 /// \pre DAG != nullptr.
20585 uint64_t getOffsetFromBase() const {
20586 assert(DAG && "Missing context.");
20587 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
20588 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
20589 uint64_t Offset = Shift / 8;
20590 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
20591 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
20592 "The size of the original loaded type is not a multiple of a"
20593 " byte.");
20594 // If Offset is bigger than TySizeInBytes, it means we are loading all
20595 // zeros. This should have been optimized before in the process.
20596 assert(TySizeInBytes > Offset &&
20597 "Invalid shift amount for given loaded size");
20598 if (IsBigEndian)
20599 Offset = TySizeInBytes - Offset - getLoadedSize();
20600 return Offset;
20601 }
20602
20603 /// Generate the sequence of instructions to load the slice
20604 /// represented by this object and redirect the uses of this slice to
20605 /// this new sequence of instructions.
20606 /// \pre this->Inst && this->Origin are valid Instructions and this
20607 /// object passed the legal check: LoadedSlice::isLegal returned true.
20608 /// \return The last instruction of the sequence used to load the slice.
20609 SDValue loadSlice() const {
20610 assert(Inst && Origin && "Unable to replace a non-existing slice.");
20611 const SDValue &OldBaseAddr = Origin->getBasePtr();
20612 SDValue BaseAddr = OldBaseAddr;
20613 // Get the offset in that chunk of bytes w.r.t. the endianness.
20614 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
20615 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
20616 if (Offset) {
20617 // BaseAddr = BaseAddr + Offset.
20618 EVT ArithType = BaseAddr.getValueType();
20619 SDLoc DL(Origin);
20620 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
20621 DAG->getConstant(Offset, DL, ArithType));
20622 }
20623
20624 // Create the type of the loaded slice according to its size.
20625 EVT SliceType = getLoadedType();
20626
20627 // Create the load for the slice.
20628 SDValue LastInst =
20629 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
20631 Origin->getMemOperand()->getFlags());
20632 // If the final type is not the same as the loaded type, this means that
20633 // we have to pad with zero. Create a zero extend for that.
20634 EVT FinalType = Inst->getValueType(0);
20635 if (SliceType != FinalType)
20636 LastInst =
20637 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
20638 return LastInst;
20639 }
20640
20641 /// Check if this slice can be merged with an expensive cross register
20642 /// bank copy. E.g.,
20643 /// i = load i32
20644 /// f = bitcast i32 i to float
20645 bool canMergeExpensiveCrossRegisterBankCopy() const {
20646 if (!Inst || !Inst->hasOneUse())
20647 return false;
20648 SDNode *User = *Inst->user_begin();
20649 if (User->getOpcode() != ISD::BITCAST)
20650 return false;
20651 assert(DAG && "Missing context");
20652 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20653 EVT ResVT = User->getValueType(0);
20654 const TargetRegisterClass *ResRC =
20655 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
20656 const TargetRegisterClass *ArgRC =
20657 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
20658 User->getOperand(0)->isDivergent());
20659 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
20660 return false;
20661
20662 // At this point, we know that we perform a cross-register-bank copy.
20663 // Check if it is expensive.
20664 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
20665 // Assume bitcasts are cheap, unless both register classes do not
20666 // explicitly share a common sub class.
20667 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
20668 return false;
20669
20670 // Check if it will be merged with the load.
20671 // 1. Check the alignment / fast memory access constraint.
20672 unsigned IsFast = 0;
20673 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
20674 Origin->getAddressSpace(), getAlign(),
20675 Origin->getMemOperand()->getFlags(), &IsFast) ||
20676 !IsFast)
20677 return false;
20678
20679 // 2. Check that the load is a legal operation for that type.
20680 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
20681 return false;
20682
20683 // 3. Check that we do not have a zext in the way.
20684 if (Inst->getValueType(0) != getLoadedType())
20685 return false;
20686
20687 return true;
20688 }
20689};
20690
20691} // end anonymous namespace
20692
20693/// Check that all bits set in \p UsedBits form a dense region, i.e.,
20694/// \p UsedBits looks like 0..0 1..1 0..0.
20695static bool areUsedBitsDense(const APInt &UsedBits) {
20696 // If all the bits are one, this is dense!
20697 if (UsedBits.isAllOnes())
20698 return true;
20699
20700 // Get rid of the unused bits on the right.
20701 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
20702 // Get rid of the unused bits on the left.
20703 if (NarrowedUsedBits.countl_zero())
20704 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
20705 // Check that the chunk of bits is completely used.
20706 return NarrowedUsedBits.isAllOnes();
20707}
20708
20709/// Check whether or not \p First and \p Second are next to each other
20710/// in memory. This means that there is no hole between the bits loaded
20711/// by \p First and the bits loaded by \p Second.
20712static bool areSlicesNextToEachOther(const LoadedSlice &First,
20713 const LoadedSlice &Second) {
20714 assert(First.Origin == Second.Origin && First.Origin &&
20715 "Unable to match different memory origins.");
20716 APInt UsedBits = First.getUsedBits();
20717 assert((UsedBits & Second.getUsedBits()) == 0 &&
20718 "Slices are not supposed to overlap.");
20719 UsedBits |= Second.getUsedBits();
20720 return areUsedBitsDense(UsedBits);
20721}
20722
20723/// Adjust the \p GlobalLSCost according to the target
20724/// paring capabilities and the layout of the slices.
20725/// \pre \p GlobalLSCost should account for at least as many loads as
20726/// there is in the slices in \p LoadedSlices.
20728 LoadedSlice::Cost &GlobalLSCost) {
20729 unsigned NumberOfSlices = LoadedSlices.size();
20730 // If there is less than 2 elements, no pairing is possible.
20731 if (NumberOfSlices < 2)
20732 return;
20733
20734 // Sort the slices so that elements that are likely to be next to each
20735 // other in memory are next to each other in the list.
20736 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
20737 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
20738 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
20739 });
20740 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
20741 // First (resp. Second) is the first (resp. Second) potentially candidate
20742 // to be placed in a paired load.
20743 const LoadedSlice *First = nullptr;
20744 const LoadedSlice *Second = nullptr;
20745 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
20746 // Set the beginning of the pair.
20747 First = Second) {
20748 Second = &LoadedSlices[CurrSlice];
20749
20750 // If First is NULL, it means we start a new pair.
20751 // Get to the next slice.
20752 if (!First)
20753 continue;
20754
20755 EVT LoadedType = First->getLoadedType();
20756
20757 // If the types of the slices are different, we cannot pair them.
20758 if (LoadedType != Second->getLoadedType())
20759 continue;
20760
20761 // Check if the target supplies paired loads for this type.
20762 Align RequiredAlignment;
20763 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
20764 // move to the next pair, this type is hopeless.
20765 Second = nullptr;
20766 continue;
20767 }
20768 // Check if we meet the alignment requirement.
20769 if (First->getAlign() < RequiredAlignment)
20770 continue;
20771
20772 // Check that both loads are next to each other in memory.
20773 if (!areSlicesNextToEachOther(*First, *Second))
20774 continue;
20775
20776 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
20777 --GlobalLSCost.Loads;
20778 // Move to the next pair.
20779 Second = nullptr;
20780 }
20781}
20782
20783/// Check the profitability of all involved LoadedSlice.
20784/// Currently, it is considered profitable if there is exactly two
20785/// involved slices (1) which are (2) next to each other in memory, and
20786/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20787///
20788/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20789/// the elements themselves.
20790///
20791/// FIXME: When the cost model will be mature enough, we can relax
20792/// constraints (1) and (2).
20794 const APInt &UsedBits, bool ForCodeSize) {
20795 unsigned NumberOfSlices = LoadedSlices.size();
20797 return NumberOfSlices > 1;
20798
20799 // Check (1).
20800 if (NumberOfSlices != 2)
20801 return false;
20802
20803 // Check (2).
20804 if (!areUsedBitsDense(UsedBits))
20805 return false;
20806
20807 // Check (3).
20808 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20809 // The original code has one big load.
20810 OrigCost.Loads = 1;
20811 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20812 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20813 // Accumulate the cost of all the slices.
20814 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20815 GlobalSlicingCost += SliceCost;
20816
20817 // Account as cost in the original configuration the gain obtained
20818 // with the current slices.
20819 OrigCost.addSliceGain(LS);
20820 }
20821
20822 // If the target supports paired load, adjust the cost accordingly.
20823 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20824 return OrigCost > GlobalSlicingCost;
20825}
20826
20827/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20828/// operations, split it in the various pieces being extracted.
20829///
20830/// This sort of thing is introduced by SROA.
20831/// This slicing takes care not to insert overlapping loads.
20832/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20833bool DAGCombiner::SliceUpLoad(SDNode *N) {
20834 if (Level < AfterLegalizeDAG)
20835 return false;
20836
20837 LoadSDNode *LD = cast<LoadSDNode>(N);
20838 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20839 !LD->getValueType(0).isInteger())
20840 return false;
20841
20842 // The algorithm to split up a load of a scalable vector into individual
20843 // elements currently requires knowing the length of the loaded type,
20844 // so will need adjusting to work on scalable vectors.
20845 if (LD->getValueType(0).isScalableVector())
20846 return false;
20847
20848 // Keep track of already used bits to detect overlapping values.
20849 // In that case, we will just abort the transformation.
20850 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20851
20852 SmallVector<LoadedSlice, 4> LoadedSlices;
20853
20854 // Check if this load is used as several smaller chunks of bits.
20855 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20856 // of computation for each trunc.
20857 for (SDUse &U : LD->uses()) {
20858 // Skip the uses of the chain.
20859 if (U.getResNo() != 0)
20860 continue;
20861
20862 SDNode *User = U.getUser();
20863 unsigned Shift = 0;
20864
20865 // Check if this is a trunc(lshr).
20866 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20867 isa<ConstantSDNode>(User->getOperand(1))) {
20868 Shift = User->getConstantOperandVal(1);
20869 User = *User->user_begin();
20870 }
20871
20872 // At this point, User is a Truncate, iff we encountered, trunc or
20873 // trunc(lshr).
20874 if (User->getOpcode() != ISD::TRUNCATE)
20875 return false;
20876
20877 // The width of the type must be a power of 2 and greater than 8-bits.
20878 // Otherwise the load cannot be represented in LLVM IR.
20879 // Moreover, if we shifted with a non-8-bits multiple, the slice
20880 // will be across several bytes. We do not support that.
20881 unsigned Width = User->getValueSizeInBits(0);
20882 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20883 return false;
20884
20885 // Build the slice for this chain of computations.
20886 LoadedSlice LS(User, LD, Shift, &DAG);
20887 APInt CurrentUsedBits = LS.getUsedBits();
20888
20889 // Check if this slice overlaps with another.
20890 if ((CurrentUsedBits & UsedBits) != 0)
20891 return false;
20892 // Update the bits used globally.
20893 UsedBits |= CurrentUsedBits;
20894
20895 // Check if the new slice would be legal.
20896 if (!LS.isLegal())
20897 return false;
20898
20899 // Record the slice.
20900 LoadedSlices.push_back(LS);
20901 }
20902
20903 // Abort slicing if it does not seem to be profitable.
20904 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
20905 return false;
20906
20907 ++SlicedLoads;
20908
20909 // Rewrite each chain to use an independent load.
20910 // By construction, each chain can be represented by a unique load.
20911
20912 // Prepare the argument for the new token factor for all the slices.
20913 SmallVector<SDValue, 8> ArgChains;
20914 for (const LoadedSlice &LS : LoadedSlices) {
20915 SDValue SliceInst = LS.loadSlice();
20916 CombineTo(LS.Inst, SliceInst, true);
20917 if (SliceInst.getOpcode() != ISD::LOAD)
20918 SliceInst = SliceInst.getOperand(0);
20919 assert(SliceInst->getOpcode() == ISD::LOAD &&
20920 "It takes more than a zext to get to the loaded slice!!");
20921 ArgChains.push_back(SliceInst.getValue(1));
20922 }
20923
20924 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
20925 ArgChains);
20926 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20927 AddToWorklist(Chain.getNode());
20928 return true;
20929}
20930
20931/// Check to see if V is (and load (ptr), imm), where the load is having
20932/// specific bytes cleared out. If so, return the byte size being masked out
20933/// and the shift amount.
20934static std::pair<unsigned, unsigned>
20936 std::pair<unsigned, unsigned> Result(0, 0);
20937
20938 // Check for the structure we're looking for.
20939 if (V->getOpcode() != ISD::AND ||
20940 !isa<ConstantSDNode>(V->getOperand(1)) ||
20941 !ISD::isNormalLoad(V->getOperand(0).getNode()))
20942 return Result;
20943
20944 // Check the chain and pointer.
20945 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20946 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
20947
20948 // This only handles simple types.
20949 if (V.getValueType() != MVT::i16 &&
20950 V.getValueType() != MVT::i32 &&
20951 V.getValueType() != MVT::i64)
20952 return Result;
20953
20954 // Check the constant mask. Invert it so that the bits being masked out are
20955 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
20956 // follow the sign bit for uniformity.
20957 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20958 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
20959 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
20960 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
20961 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
20962 if (NotMaskLZ == 64) return Result; // All zero mask.
20963
20964 // See if we have a continuous run of bits. If so, we have 0*1+0*
20965 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
20966 return Result;
20967
20968 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
20969 if (V.getValueType() != MVT::i64 && NotMaskLZ)
20970 NotMaskLZ -= 64-V.getValueSizeInBits();
20971
20972 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20973 switch (MaskedBytes) {
20974 case 1:
20975 case 2:
20976 case 4: break;
20977 default: return Result; // All one mask, or 5-byte mask.
20978 }
20979
20980 // Verify that the first bit starts at a multiple of mask so that the access
20981 // is aligned the same as the access width.
20982 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
20983
20984 // For narrowing to be valid, it must be the case that the load the
20985 // immediately preceding memory operation before the store.
20986 if (LD == Chain.getNode())
20987 ; // ok.
20988 else if (Chain->getOpcode() == ISD::TokenFactor &&
20989 SDValue(LD, 1).hasOneUse()) {
20990 // LD has only 1 chain use so they are no indirect dependencies.
20991 if (!LD->isOperandOf(Chain.getNode()))
20992 return Result;
20993 } else
20994 return Result; // Fail.
20995
20996 Result.first = MaskedBytes;
20997 Result.second = NotMaskTZ/8;
20998 return Result;
20999}
21000
21001/// Check to see if IVal is something that provides a value as specified by
21002/// MaskInfo. If so, replace the specified store with a narrower store of
21003/// truncated IVal.
21004static SDValue
21005ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
21006 SDValue IVal, StoreSDNode *St,
21007 DAGCombiner *DC) {
21008 unsigned NumBytes = MaskInfo.first;
21009 unsigned ByteShift = MaskInfo.second;
21010 SelectionDAG &DAG = DC->getDAG();
21011
21012 // Check to see if IVal is all zeros in the part being masked in by the 'or'
21013 // that uses this. If not, this is not a replacement.
21014 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
21015 ByteShift*8, (ByteShift+NumBytes)*8);
21016 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
21017
21018 // Check that it is legal on the target to do this. It is legal if the new
21019 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
21020 // legalization. If the source type is legal, but the store type isn't, see
21021 // if we can use a truncating store.
21022 MVT VT = MVT::getIntegerVT(NumBytes * 8);
21023 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21024 bool UseTruncStore;
21025 if (DC->isTypeLegal(VT))
21026 UseTruncStore = false;
21027 else if (TLI.isTypeLegal(IVal.getValueType()) &&
21028 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
21029 UseTruncStore = true;
21030 else
21031 return SDValue();
21032
21033 // Can't do this for indexed stores.
21034 if (St->isIndexed())
21035 return SDValue();
21036
21037 // Check that the target doesn't think this is a bad idea.
21038 if (St->getMemOperand() &&
21039 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
21040 *St->getMemOperand()))
21041 return SDValue();
21042
21043 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
21044 // shifted by ByteShift and truncated down to NumBytes.
21045 if (ByteShift) {
21046 SDLoc DL(IVal);
21047 IVal = DAG.getNode(
21048 ISD::SRL, DL, IVal.getValueType(), IVal,
21049 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
21050 }
21051
21052 // Figure out the offset for the store and the alignment of the access.
21053 unsigned StOffset;
21054 if (DAG.getDataLayout().isLittleEndian())
21055 StOffset = ByteShift;
21056 else
21057 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
21058
21059 SDValue Ptr = St->getBasePtr();
21060 if (StOffset) {
21061 SDLoc DL(IVal);
21063 }
21064
21065 ++OpsNarrowed;
21066 if (UseTruncStore)
21067 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
21068 St->getPointerInfo().getWithOffset(StOffset), VT,
21069 St->getBaseAlign());
21070
21071 // Truncate down to the new size.
21072 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
21073
21074 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
21075 St->getPointerInfo().getWithOffset(StOffset),
21076 St->getBaseAlign());
21077}
21078
21079/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
21080/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
21081/// narrowing the load and store if it would end up being a win for performance
21082/// or code size.
21083SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
21084 StoreSDNode *ST = cast<StoreSDNode>(N);
21085 if (!ST->isSimple())
21086 return SDValue();
21087
21088 SDValue Chain = ST->getChain();
21089 SDValue Value = ST->getValue();
21090 SDValue Ptr = ST->getBasePtr();
21091 EVT VT = Value.getValueType();
21092
21093 if (ST->isTruncatingStore() || VT.isVector())
21094 return SDValue();
21095
21096 unsigned Opc = Value.getOpcode();
21097
21098 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
21099 !Value.hasOneUse())
21100 return SDValue();
21101
21102 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
21103 // is a byte mask indicating a consecutive number of bytes, check to see if
21104 // Y is known to provide just those bytes. If so, we try to replace the
21105 // load + replace + store sequence with a single (narrower) store, which makes
21106 // the load dead.
21108 std::pair<unsigned, unsigned> MaskedLoad;
21109 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
21110 if (MaskedLoad.first)
21111 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21112 Value.getOperand(1), ST,this))
21113 return NewST;
21114
21115 // Or is commutative, so try swapping X and Y.
21116 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
21117 if (MaskedLoad.first)
21118 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21119 Value.getOperand(0), ST,this))
21120 return NewST;
21121 }
21122
21124 return SDValue();
21125
21126 if (Value.getOperand(1).getOpcode() != ISD::Constant)
21127 return SDValue();
21128
21129 SDValue N0 = Value.getOperand(0);
21130 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
21131 Chain == SDValue(N0.getNode(), 1)) {
21132 LoadSDNode *LD = cast<LoadSDNode>(N0);
21133 if (LD->getBasePtr() != Ptr ||
21134 LD->getPointerInfo().getAddrSpace() !=
21135 ST->getPointerInfo().getAddrSpace())
21136 return SDValue();
21137
21138 // Find the type NewVT to narrow the load / op / store to.
21139 SDValue N1 = Value.getOperand(1);
21140 unsigned BitWidth = N1.getValueSizeInBits();
21141 APInt Imm = N1->getAsAPIntVal();
21142 if (Opc == ISD::AND)
21143 Imm.flipAllBits();
21144 if (Imm == 0 || Imm.isAllOnes())
21145 return SDValue();
21146 // Find least/most significant bit that need to be part of the narrowed
21147 // operation. We assume target will need to address/access full bytes, so
21148 // we make sure to align LSB and MSB at byte boundaries.
21149 unsigned BitsPerByteMask = 7u;
21150 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
21151 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
21152 unsigned NewBW = NextPowerOf2(MSB - LSB);
21153 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21154 // The narrowing should be profitable, the load/store operation should be
21155 // legal (or custom) and the store size should be equal to the NewVT width.
21156 while (NewBW < BitWidth &&
21157 (NewVT.getStoreSizeInBits() != NewBW ||
21158 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
21160 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
21161 NewBW = NextPowerOf2(NewBW);
21162 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21163 }
21164 if (NewBW >= BitWidth)
21165 return SDValue();
21166
21167 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
21168 // large enough to cover all bits that should be modified. This type might
21169 // however be larger than really needed (such as i32 while we actually only
21170 // need to modify one byte). Now we need to find our how to align the memory
21171 // accesses to satisfy preferred alignments as well as avoiding to access
21172 // memory outside the store size of the orignal access.
21173
21174 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
21175
21176 // Let ShAmt denote amount of bits to skip, counted from the least
21177 // significant bits of Imm. And let PtrOff how much the pointer needs to be
21178 // offsetted (in bytes) for the new access.
21179 unsigned ShAmt = 0;
21180 uint64_t PtrOff = 0;
21181 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
21182 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
21183 if (ShAmt > LSB)
21184 return SDValue();
21185 if (ShAmt + NewBW < MSB)
21186 continue;
21187
21188 // Calculate PtrOff.
21189 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
21190 ? VTStoreSize - NewBW - ShAmt
21191 : ShAmt;
21192 PtrOff = PtrAdjustmentInBits / 8;
21193
21194 // Now check if narrow access is allowed and fast, considering alignments.
21195 unsigned IsFast = 0;
21196 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21197 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
21198 LD->getAddressSpace(), NewAlign,
21199 LD->getMemOperand()->getFlags(), &IsFast) &&
21200 IsFast)
21201 break;
21202 }
21203 // If loop above did not find any accepted ShAmt we need to exit here.
21204 if (ShAmt + NewBW > VTStoreSize)
21205 return SDValue();
21206
21207 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
21208 if (Opc == ISD::AND)
21209 NewImm.flipAllBits();
21210 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21211 SDValue NewPtr =
21212 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
21213 SDValue NewLD =
21214 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
21215 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
21216 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21217 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
21218 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
21219 SDValue NewST =
21220 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
21221 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
21222
21223 AddToWorklist(NewPtr.getNode());
21224 AddToWorklist(NewLD.getNode());
21225 AddToWorklist(NewVal.getNode());
21226 WorklistRemover DeadNodes(*this);
21227 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
21228 ++OpsNarrowed;
21229 return NewST;
21230 }
21231
21232 return SDValue();
21233}
21234
21235/// For a given floating point load / store pair, if the load value isn't used
21236/// by any other operations, then consider transforming the pair to integer
21237/// load / store operations if the target deems the transformation profitable.
21238SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
21239 StoreSDNode *ST = cast<StoreSDNode>(N);
21240 SDValue Value = ST->getValue();
21241 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
21242 Value.hasOneUse()) {
21243 LoadSDNode *LD = cast<LoadSDNode>(Value);
21244 EVT VT = LD->getMemoryVT();
21245 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
21246 LD->isNonTemporal() || ST->isNonTemporal() ||
21247 LD->getPointerInfo().getAddrSpace() != 0 ||
21248 ST->getPointerInfo().getAddrSpace() != 0)
21249 return SDValue();
21250
21251 TypeSize VTSize = VT.getSizeInBits();
21252
21253 // We don't know the size of scalable types at compile time so we cannot
21254 // create an integer of the equivalent size.
21255 if (VTSize.isScalable())
21256 return SDValue();
21257
21258 unsigned FastLD = 0, FastST = 0;
21259 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
21260 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
21261 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
21262 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
21263 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
21264 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21265 *LD->getMemOperand(), &FastLD) ||
21266 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21267 *ST->getMemOperand(), &FastST) ||
21268 !FastLD || !FastST)
21269 return SDValue();
21270
21271 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
21272 LD->getBasePtr(), LD->getMemOperand());
21273
21274 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
21275 ST->getBasePtr(), ST->getMemOperand());
21276
21277 AddToWorklist(NewLD.getNode());
21278 AddToWorklist(NewST.getNode());
21279 WorklistRemover DeadNodes(*this);
21280 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
21281 ++LdStFP2Int;
21282 return NewST;
21283 }
21284
21285 return SDValue();
21286}
21287
21288// This is a helper function for visitMUL to check the profitability
21289// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
21290// MulNode is the original multiply, AddNode is (add x, c1),
21291// and ConstNode is c2.
21292//
21293// If the (add x, c1) has multiple uses, we could increase
21294// the number of adds if we make this transformation.
21295// It would only be worth doing this if we can remove a
21296// multiply in the process. Check for that here.
21297// To illustrate:
21298// (A + c1) * c3
21299// (A + c2) * c3
21300// We're checking for cases where we have common "c3 * A" expressions.
21301bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
21302 SDValue ConstNode) {
21303 // If the add only has one use, and the target thinks the folding is
21304 // profitable or does not lead to worse code, this would be OK to do.
21305 if (AddNode->hasOneUse() &&
21306 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
21307 return true;
21308
21309 // Walk all the users of the constant with which we're multiplying.
21310 for (SDNode *User : ConstNode->users()) {
21311 if (User == MulNode) // This use is the one we're on right now. Skip it.
21312 continue;
21313
21314 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
21315 SDNode *OtherOp;
21316 SDNode *MulVar = AddNode.getOperand(0).getNode();
21317
21318 // OtherOp is what we're multiplying against the constant.
21319 if (User->getOperand(0) == ConstNode)
21320 OtherOp = User->getOperand(1).getNode();
21321 else
21322 OtherOp = User->getOperand(0).getNode();
21323
21324 // Check to see if multiply is with the same operand of our "add".
21325 //
21326 // ConstNode = CONST
21327 // User = ConstNode * A <-- visiting User. OtherOp is A.
21328 // ...
21329 // AddNode = (A + c1) <-- MulVar is A.
21330 // = AddNode * ConstNode <-- current visiting instruction.
21331 //
21332 // If we make this transformation, we will have a common
21333 // multiply (ConstNode * A) that we can save.
21334 if (OtherOp == MulVar)
21335 return true;
21336
21337 // Now check to see if a future expansion will give us a common
21338 // multiply.
21339 //
21340 // ConstNode = CONST
21341 // AddNode = (A + c1)
21342 // ... = AddNode * ConstNode <-- current visiting instruction.
21343 // ...
21344 // OtherOp = (A + c2)
21345 // User = OtherOp * ConstNode <-- visiting User.
21346 //
21347 // If we make this transformation, we will have a common
21348 // multiply (CONST * A) after we also do the same transformation
21349 // to the "t2" instruction.
21350 if (OtherOp->getOpcode() == ISD::ADD &&
21352 OtherOp->getOperand(0).getNode() == MulVar)
21353 return true;
21354 }
21355 }
21356
21357 // Didn't find a case where this would be profitable.
21358 return false;
21359}
21360
21361SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
21362 unsigned NumStores) {
21364 SmallPtrSet<const SDNode *, 8> Visited;
21365 SDLoc StoreDL(StoreNodes[0].MemNode);
21366
21367 for (unsigned i = 0; i < NumStores; ++i) {
21368 Visited.insert(StoreNodes[i].MemNode);
21369 }
21370
21371 // don't include nodes that are children or repeated nodes.
21372 for (unsigned i = 0; i < NumStores; ++i) {
21373 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
21374 Chains.push_back(StoreNodes[i].MemNode->getChain());
21375 }
21376
21377 assert(!Chains.empty() && "Chain should have generated a chain");
21378 return DAG.getTokenFactor(StoreDL, Chains);
21379}
21380
21381bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
21382 const Value *UnderlyingObj = nullptr;
21383 for (const auto &MemOp : StoreNodes) {
21384 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
21385 // Pseudo value like stack frame has its own frame index and size, should
21386 // not use the first store's frame index for other frames.
21387 if (MMO->getPseudoValue())
21388 return false;
21389
21390 if (!MMO->getValue())
21391 return false;
21392
21393 const Value *Obj = getUnderlyingObject(MMO->getValue());
21394
21395 if (UnderlyingObj && UnderlyingObj != Obj)
21396 return false;
21397
21398 if (!UnderlyingObj)
21399 UnderlyingObj = Obj;
21400 }
21401
21402 return true;
21403}
21404
21405bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
21406 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
21407 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
21408 // Make sure we have something to merge.
21409 if (NumStores < 2)
21410 return false;
21411
21412 assert((!UseTrunc || !UseVector) &&
21413 "This optimization cannot emit a vector truncating store");
21414
21415 // The latest Node in the DAG.
21416 SDLoc DL(StoreNodes[0].MemNode);
21417
21418 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
21419 unsigned SizeInBits = NumStores * ElementSizeBits;
21420 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21421
21422 std::optional<MachineMemOperand::Flags> Flags;
21423 AAMDNodes AAInfo;
21424 for (unsigned I = 0; I != NumStores; ++I) {
21425 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21426 if (!Flags) {
21427 Flags = St->getMemOperand()->getFlags();
21428 AAInfo = St->getAAInfo();
21429 continue;
21430 }
21431 // Skip merging if there's an inconsistent flag.
21432 if (Flags != St->getMemOperand()->getFlags())
21433 return false;
21434 // Concatenate AA metadata.
21435 AAInfo = AAInfo.concat(St->getAAInfo());
21436 }
21437
21438 EVT StoreTy;
21439 if (UseVector) {
21440 unsigned Elts = NumStores * NumMemElts;
21441 // Get the type for the merged vector store.
21442 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21443 } else
21444 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
21445
21446 SDValue StoredVal;
21447 if (UseVector) {
21448 if (IsConstantSrc) {
21449 SmallVector<SDValue, 8> BuildVector;
21450 for (unsigned I = 0; I != NumStores; ++I) {
21451 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21452 SDValue Val = St->getValue();
21453 // If constant is of the wrong type, convert it now. This comes up
21454 // when one of our stores was truncating.
21455 if (MemVT != Val.getValueType()) {
21456 Val = peekThroughBitcasts(Val);
21457 // Deal with constants of wrong size.
21458 if (ElementSizeBits != Val.getValueSizeInBits()) {
21459 auto *C = dyn_cast<ConstantSDNode>(Val);
21460 if (!C)
21461 // Not clear how to truncate FP values.
21462 // TODO: Handle truncation of build_vector constants
21463 return false;
21464
21465 EVT IntMemVT =
21467 Val = DAG.getConstant(C->getAPIntValue()
21468 .zextOrTrunc(Val.getValueSizeInBits())
21469 .zextOrTrunc(ElementSizeBits),
21470 SDLoc(C), IntMemVT);
21471 }
21472 // Make sure correctly size type is the correct type.
21473 Val = DAG.getBitcast(MemVT, Val);
21474 }
21475 BuildVector.push_back(Val);
21476 }
21477 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21479 DL, StoreTy, BuildVector);
21480 } else {
21482 for (unsigned i = 0; i < NumStores; ++i) {
21483 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21485 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
21486 // type MemVT. If the underlying value is not the correct
21487 // type, but it is an extraction of an appropriate vector we
21488 // can recast Val to be of the correct type. This may require
21489 // converting between EXTRACT_VECTOR_ELT and
21490 // EXTRACT_SUBVECTOR.
21491 if ((MemVT != Val.getValueType()) &&
21494 EVT MemVTScalarTy = MemVT.getScalarType();
21495 // We may need to add a bitcast here to get types to line up.
21496 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
21497 Val = DAG.getBitcast(MemVT, Val);
21498 } else if (MemVT.isVector() &&
21500 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
21501 } else {
21502 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
21504 SDValue Vec = Val.getOperand(0);
21505 SDValue Idx = Val.getOperand(1);
21506 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
21507 }
21508 }
21509 Ops.push_back(Val);
21510 }
21511
21512 // Build the extracted vector elements back into a vector.
21513 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21515 DL, StoreTy, Ops);
21516 }
21517 } else {
21518 // We should always use a vector store when merging extracted vector
21519 // elements, so this path implies a store of constants.
21520 assert(IsConstantSrc && "Merged vector elements should use vector store");
21521
21522 APInt StoreInt(SizeInBits, 0);
21523
21524 // Construct a single integer constant which is made of the smaller
21525 // constant inputs.
21526 bool IsLE = DAG.getDataLayout().isLittleEndian();
21527 for (unsigned i = 0; i < NumStores; ++i) {
21528 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
21529 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
21530
21531 SDValue Val = St->getValue();
21532 Val = peekThroughBitcasts(Val);
21533 StoreInt <<= ElementSizeBits;
21534 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
21535 StoreInt |= C->getAPIntValue()
21536 .zextOrTrunc(ElementSizeBits)
21537 .zextOrTrunc(SizeInBits);
21538 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
21539 StoreInt |= C->getValueAPF()
21540 .bitcastToAPInt()
21541 .zextOrTrunc(ElementSizeBits)
21542 .zextOrTrunc(SizeInBits);
21543 // If fp truncation is necessary give up for now.
21544 if (MemVT.getSizeInBits() != ElementSizeBits)
21545 return false;
21546 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
21548 // Not yet handled
21549 return false;
21550 } else {
21551 llvm_unreachable("Invalid constant element type");
21552 }
21553 }
21554
21555 // Create the new Load and Store operations.
21556 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
21557 }
21558
21559 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21560 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
21561 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21562
21563 // make sure we use trunc store if it's necessary to be legal.
21564 // When generate the new widen store, if the first store's pointer info can
21565 // not be reused, discard the pointer info except the address space because
21566 // now the widen store can not be represented by the original pointer info
21567 // which is for the narrow memory object.
21568 SDValue NewStore;
21569 if (!UseTrunc) {
21570 NewStore = DAG.getStore(
21571 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
21572 CanReusePtrInfo
21573 ? FirstInChain->getPointerInfo()
21574 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21575 FirstInChain->getAlign(), *Flags, AAInfo);
21576 } else { // Must be realized as a trunc store
21577 EVT LegalizedStoredValTy =
21578 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
21579 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
21580 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
21581 SDValue ExtendedStoreVal =
21582 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
21583 LegalizedStoredValTy);
21584 NewStore = DAG.getTruncStore(
21585 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
21586 CanReusePtrInfo
21587 ? FirstInChain->getPointerInfo()
21588 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21589 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
21590 AAInfo);
21591 }
21592
21593 // Replace all merged stores with the new store.
21594 for (unsigned i = 0; i < NumStores; ++i)
21595 CombineTo(StoreNodes[i].MemNode, NewStore);
21596
21597 AddToWorklist(NewChain.getNode());
21598 return true;
21599}
21600
21601SDNode *
21602DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
21603 SmallVectorImpl<MemOpLink> &StoreNodes) {
21604 // This holds the base pointer, index, and the offset in bytes from the base
21605 // pointer. We must have a base and an offset. Do not handle stores to undef
21606 // base pointers.
21607 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21608 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
21609 return nullptr;
21610
21612 StoreSource StoreSrc = getStoreSource(Val);
21613 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
21614
21615 // Match on loadbaseptr if relevant.
21616 EVT MemVT = St->getMemoryVT();
21617 BaseIndexOffset LBasePtr;
21618 EVT LoadVT;
21619 if (StoreSrc == StoreSource::Load) {
21620 auto *Ld = cast<LoadSDNode>(Val);
21621 LBasePtr = BaseIndexOffset::match(Ld, DAG);
21622 LoadVT = Ld->getMemoryVT();
21623 // Load and store should be the same type.
21624 if (MemVT != LoadVT)
21625 return nullptr;
21626 // Loads must only have one use.
21627 if (!Ld->hasNUsesOfValue(1, 0))
21628 return nullptr;
21629 // The memory operands must not be volatile/indexed/atomic.
21630 // TODO: May be able to relax for unordered atomics (see D66309)
21631 if (!Ld->isSimple() || Ld->isIndexed())
21632 return nullptr;
21633 }
21634 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
21635 int64_t &Offset) -> bool {
21636 // The memory operands must not be volatile/indexed/atomic.
21637 // TODO: May be able to relax for unordered atomics (see D66309)
21638 if (!Other->isSimple() || Other->isIndexed())
21639 return false;
21640 // Don't mix temporal stores with non-temporal stores.
21641 if (St->isNonTemporal() != Other->isNonTemporal())
21642 return false;
21644 return false;
21645 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
21646 // Allow merging constants of different types as integers.
21647 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
21648 : Other->getMemoryVT() != MemVT;
21649 switch (StoreSrc) {
21650 case StoreSource::Load: {
21651 if (NoTypeMatch)
21652 return false;
21653 // The Load's Base Ptr must also match.
21654 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
21655 if (!OtherLd)
21656 return false;
21657 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
21658 if (LoadVT != OtherLd->getMemoryVT())
21659 return false;
21660 // Loads must only have one use.
21661 if (!OtherLd->hasNUsesOfValue(1, 0))
21662 return false;
21663 // The memory operands must not be volatile/indexed/atomic.
21664 // TODO: May be able to relax for unordered atomics (see D66309)
21665 if (!OtherLd->isSimple() || OtherLd->isIndexed())
21666 return false;
21667 // Don't mix temporal loads with non-temporal loads.
21668 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
21669 return false;
21671 *OtherLd))
21672 return false;
21673 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
21674 return false;
21675 break;
21676 }
21677 case StoreSource::Constant:
21678 if (NoTypeMatch)
21679 return false;
21680 if (getStoreSource(OtherBC) != StoreSource::Constant)
21681 return false;
21682 break;
21683 case StoreSource::Extract:
21684 // Do not merge truncated stores here.
21685 if (Other->isTruncatingStore())
21686 return false;
21687 if (!MemVT.bitsEq(OtherBC.getValueType()))
21688 return false;
21689 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
21690 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21691 return false;
21692 break;
21693 default:
21694 llvm_unreachable("Unhandled store source for merging");
21695 }
21697 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
21698 };
21699
21700 // We are looking for a root node which is an ancestor to all mergable
21701 // stores. We search up through a load, to our root and then down
21702 // through all children. For instance we will find Store{1,2,3} if
21703 // St is Store1, Store2. or Store3 where the root is not a load
21704 // which always true for nonvolatile ops. TODO: Expand
21705 // the search to find all valid candidates through multiple layers of loads.
21706 //
21707 // Root
21708 // |-------|-------|
21709 // Load Load Store3
21710 // | |
21711 // Store1 Store2
21712 //
21713 // FIXME: We should be able to climb and
21714 // descend TokenFactors to find candidates as well.
21715
21716 SDNode *RootNode = St->getChain().getNode();
21717 // Bail out if we already analyzed this root node and found nothing.
21718 if (ChainsWithoutMergeableStores.contains(RootNode))
21719 return nullptr;
21720
21721 // Check if the pair of StoreNode and the RootNode already bail out many
21722 // times which is over the limit in dependence check.
21723 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
21724 SDNode *RootNode) -> bool {
21725 auto RootCount = StoreRootCountMap.find(StoreNode);
21726 return RootCount != StoreRootCountMap.end() &&
21727 RootCount->second.first == RootNode &&
21728 RootCount->second.second > StoreMergeDependenceLimit;
21729 };
21730
21731 auto TryToAddCandidate = [&](SDUse &Use) {
21732 // This must be a chain use.
21733 if (Use.getOperandNo() != 0)
21734 return;
21735 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
21736 BaseIndexOffset Ptr;
21737 int64_t PtrDiff;
21738 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
21739 !OverLimitInDependenceCheck(OtherStore, RootNode))
21740 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
21741 }
21742 };
21743
21744 unsigned NumNodesExplored = 0;
21745 const unsigned MaxSearchNodes = 1024;
21746 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
21747 RootNode = Ldn->getChain().getNode();
21748 // Bail out if we already analyzed this root node and found nothing.
21749 if (ChainsWithoutMergeableStores.contains(RootNode))
21750 return nullptr;
21751 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21752 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
21753 SDNode *User = I->getUser();
21754 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
21755 for (SDUse &U2 : User->uses())
21756 TryToAddCandidate(U2);
21757 }
21758 // Check stores that depend on the root (e.g. Store 3 in the chart above).
21759 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
21760 TryToAddCandidate(*I);
21761 }
21762 }
21763 } else {
21764 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21765 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
21766 TryToAddCandidate(*I);
21767 }
21768
21769 return RootNode;
21770}
21771
21772// We need to check that merging these stores does not cause a loop in the
21773// DAG. Any store candidate may depend on another candidate indirectly through
21774// its operands. Check in parallel by searching up from operands of candidates.
21775bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
21776 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21777 SDNode *RootNode) {
21778 // FIXME: We should be able to truncate a full search of
21779 // predecessors by doing a BFS and keeping tabs the originating
21780 // stores from which worklist nodes come from in a similar way to
21781 // TokenFactor simplfication.
21782
21783 SmallPtrSet<const SDNode *, 32> Visited;
21785
21786 // RootNode is a predecessor to all candidates so we need not search
21787 // past it. Add RootNode (peeking through TokenFactors). Do not count
21788 // these towards size check.
21789
21790 Worklist.push_back(RootNode);
21791 while (!Worklist.empty()) {
21792 auto N = Worklist.pop_back_val();
21793 if (!Visited.insert(N).second)
21794 continue; // Already present in Visited.
21795 if (N->getOpcode() == ISD::TokenFactor) {
21796 for (SDValue Op : N->ops())
21797 Worklist.push_back(Op.getNode());
21798 }
21799 }
21800
21801 // Don't count pruning nodes towards max.
21802 unsigned int Max = 1024 + Visited.size();
21803 // Search Ops of store candidates.
21804 for (unsigned i = 0; i < NumStores; ++i) {
21805 SDNode *N = StoreNodes[i].MemNode;
21806 // Of the 4 Store Operands:
21807 // * Chain (Op 0) -> We have already considered these
21808 // in candidate selection, but only by following the
21809 // chain dependencies. We could still have a chain
21810 // dependency to a load, that has a non-chain dep to
21811 // another load, that depends on a store, etc. So it is
21812 // possible to have dependencies that consist of a mix
21813 // of chain and non-chain deps, and we need to include
21814 // chain operands in the analysis here..
21815 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21816 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21817 // but aren't necessarily fromt the same base node, so
21818 // cycles possible (e.g. via indexed store).
21819 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21820 // non-indexed stores). Not constant on all targets (e.g. ARM)
21821 // and so can participate in a cycle.
21822 for (const SDValue &Op : N->op_values())
21823 Worklist.push_back(Op.getNode());
21824 }
21825 // Search through DAG. We can stop early if we find a store node.
21826 for (unsigned i = 0; i < NumStores; ++i)
21827 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21828 Max)) {
21829 // If the searching bail out, record the StoreNode and RootNode in the
21830 // StoreRootCountMap. If we have seen the pair many times over a limit,
21831 // we won't add the StoreNode into StoreNodes set again.
21832 if (Visited.size() >= Max) {
21833 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21834 if (RootCount.first == RootNode)
21835 RootCount.second++;
21836 else
21837 RootCount = {RootNode, 1};
21838 }
21839 return false;
21840 }
21841 return true;
21842}
21843
21844bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) {
21845 SmallPtrSet<const SDNode *, 32> Visited;
21847 Worklist.emplace_back(St->getChain().getNode(), false);
21848
21849 while (!Worklist.empty()) {
21850 auto [Node, FoundCall] = Worklist.pop_back_val();
21851 if (!Visited.insert(Node).second || Node->getNumOperands() == 0)
21852 continue;
21853
21854 switch (Node->getOpcode()) {
21855 case ISD::CALLSEQ_END:
21856 Worklist.emplace_back(Node->getOperand(0).getNode(), true);
21857 break;
21858 case ISD::TokenFactor:
21859 for (SDValue Op : Node->ops())
21860 Worklist.emplace_back(Op.getNode(), FoundCall);
21861 break;
21862 case ISD::LOAD:
21863 if (Node == Ld)
21864 return FoundCall;
21865 [[fallthrough]];
21866 default:
21867 assert(Node->getOperand(0).getValueType() == MVT::Other &&
21868 "Invalid chain type");
21869 Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall);
21870 break;
21871 }
21872 }
21873 return false;
21874}
21875
21876unsigned
21877DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21878 int64_t ElementSizeBytes) const {
21879 while (true) {
21880 // Find a store past the width of the first store.
21881 size_t StartIdx = 0;
21882 while ((StartIdx + 1 < StoreNodes.size()) &&
21883 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21884 StoreNodes[StartIdx + 1].OffsetFromBase)
21885 ++StartIdx;
21886
21887 // Bail if we don't have enough candidates to merge.
21888 if (StartIdx + 1 >= StoreNodes.size())
21889 return 0;
21890
21891 // Trim stores that overlapped with the first store.
21892 if (StartIdx)
21893 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21894
21895 // Scan the memory operations on the chain and find the first
21896 // non-consecutive store memory address.
21897 unsigned NumConsecutiveStores = 1;
21898 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
21899 // Check that the addresses are consecutive starting from the second
21900 // element in the list of stores.
21901 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
21902 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
21903 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
21904 break;
21905 NumConsecutiveStores = i + 1;
21906 }
21907 if (NumConsecutiveStores > 1)
21908 return NumConsecutiveStores;
21909
21910 // There are no consecutive stores at the start of the list.
21911 // Remove the first store and try again.
21912 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
21913 }
21914}
21915
21916bool DAGCombiner::tryStoreMergeOfConstants(
21917 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
21918 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
21919 LLVMContext &Context = *DAG.getContext();
21920 const DataLayout &DL = DAG.getDataLayout();
21921 int64_t ElementSizeBytes = MemVT.getStoreSize();
21922 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21923 bool MadeChange = false;
21924
21925 // Store the constants into memory as one consecutive store.
21926 while (NumConsecutiveStores >= 2) {
21927 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21928 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
21929 Align FirstStoreAlign = FirstInChain->getAlign();
21930 unsigned LastLegalType = 1;
21931 unsigned LastLegalVectorType = 1;
21932 bool LastIntegerTrunc = false;
21933 bool NonZero = false;
21934 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
21935 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
21936 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
21937 SDValue StoredVal = ST->getValue();
21938 bool IsElementZero = false;
21939 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
21940 IsElementZero = C->isZero();
21941 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
21942 IsElementZero = C->getConstantFPValue()->isNullValue();
21943 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
21944 IsElementZero = true;
21945 if (IsElementZero) {
21946 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
21947 FirstZeroAfterNonZero = i;
21948 }
21949 NonZero |= !IsElementZero;
21950
21951 // Find a legal type for the constant store.
21952 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
21953 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
21954 unsigned IsFast = 0;
21955
21956 // Break early when size is too large to be legal.
21957 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
21958 break;
21959
21960 if (TLI.isTypeLegal(StoreTy) &&
21961 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
21962 DAG.getMachineFunction()) &&
21963 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21964 *FirstInChain->getMemOperand(), &IsFast) &&
21965 IsFast) {
21966 LastIntegerTrunc = false;
21967 LastLegalType = i + 1;
21968 // Or check whether a truncstore is legal.
21969 } else if (TLI.getTypeAction(Context, StoreTy) ==
21971 EVT LegalizedStoredValTy =
21972 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
21973 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
21974 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
21975 DAG.getMachineFunction()) &&
21976 TLI.allowsMemoryAccess(Context, DL, StoreTy,
21977 *FirstInChain->getMemOperand(), &IsFast) &&
21978 IsFast) {
21979 LastIntegerTrunc = true;
21980 LastLegalType = i + 1;
21981 }
21982 }
21983
21984 // We only use vectors if the target allows it and the function is not
21985 // marked with the noimplicitfloat attribute.
21986 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
21987 AllowVectors) {
21988 // Find a legal type for the vector store.
21989 unsigned Elts = (i + 1) * NumMemElts;
21990 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21991 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
21992 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
21993 TLI.allowsMemoryAccess(Context, DL, Ty,
21994 *FirstInChain->getMemOperand(), &IsFast) &&
21995 IsFast)
21996 LastLegalVectorType = i + 1;
21997 }
21998 }
21999
22000 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
22001 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
22002 bool UseTrunc = LastIntegerTrunc && !UseVector;
22003
22004 // Check if we found a legal integer type that creates a meaningful
22005 // merge.
22006 if (NumElem < 2) {
22007 // We know that candidate stores are in order and of correct
22008 // shape. While there is no mergeable sequence from the
22009 // beginning one may start later in the sequence. The only
22010 // reason a merge of size N could have failed where another of
22011 // the same size would not have, is if the alignment has
22012 // improved or we've dropped a non-zero value. Drop as many
22013 // candidates as we can here.
22014 unsigned NumSkip = 1;
22015 while ((NumSkip < NumConsecutiveStores) &&
22016 (NumSkip < FirstZeroAfterNonZero) &&
22017 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22018 NumSkip++;
22019
22020 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22021 NumConsecutiveStores -= NumSkip;
22022 continue;
22023 }
22024
22025 // Check that we can merge these candidates without causing a cycle.
22026 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22027 RootNode)) {
22028 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22029 NumConsecutiveStores -= NumElem;
22030 continue;
22031 }
22032
22033 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
22034 /*IsConstantSrc*/ true,
22035 UseVector, UseTrunc);
22036
22037 // Remove merged stores for next iteration.
22038 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22039 NumConsecutiveStores -= NumElem;
22040 }
22041 return MadeChange;
22042}
22043
22044bool DAGCombiner::tryStoreMergeOfExtracts(
22045 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22046 EVT MemVT, SDNode *RootNode) {
22047 LLVMContext &Context = *DAG.getContext();
22048 const DataLayout &DL = DAG.getDataLayout();
22049 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22050 bool MadeChange = false;
22051
22052 // Loop on Consecutive Stores on success.
22053 while (NumConsecutiveStores >= 2) {
22054 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22055 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22056 Align FirstStoreAlign = FirstInChain->getAlign();
22057 unsigned NumStoresToMerge = 1;
22058 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22059 // Find a legal type for the vector store.
22060 unsigned Elts = (i + 1) * NumMemElts;
22061 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
22062 unsigned IsFast = 0;
22063
22064 // Break early when size is too large to be legal.
22065 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
22066 break;
22067
22068 if (TLI.isTypeLegal(Ty) &&
22069 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
22070 TLI.allowsMemoryAccess(Context, DL, Ty,
22071 *FirstInChain->getMemOperand(), &IsFast) &&
22072 IsFast)
22073 NumStoresToMerge = i + 1;
22074 }
22075
22076 // Check if we found a legal integer type creating a meaningful
22077 // merge.
22078 if (NumStoresToMerge < 2) {
22079 // We know that candidate stores are in order and of correct
22080 // shape. While there is no mergeable sequence from the
22081 // beginning one may start later in the sequence. The only
22082 // reason a merge of size N could have failed where another of
22083 // the same size would not have, is if the alignment has
22084 // improved. Drop as many candidates as we can here.
22085 unsigned NumSkip = 1;
22086 while ((NumSkip < NumConsecutiveStores) &&
22087 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22088 NumSkip++;
22089
22090 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22091 NumConsecutiveStores -= NumSkip;
22092 continue;
22093 }
22094
22095 // Check that we can merge these candidates without causing a cycle.
22096 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
22097 RootNode)) {
22098 StoreNodes.erase(StoreNodes.begin(),
22099 StoreNodes.begin() + NumStoresToMerge);
22100 NumConsecutiveStores -= NumStoresToMerge;
22101 continue;
22102 }
22103
22104 MadeChange |= mergeStoresOfConstantsOrVecElts(
22105 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
22106 /*UseVector*/ true, /*UseTrunc*/ false);
22107
22108 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
22109 NumConsecutiveStores -= NumStoresToMerge;
22110 }
22111 return MadeChange;
22112}
22113
22114bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
22115 unsigned NumConsecutiveStores, EVT MemVT,
22116 SDNode *RootNode, bool AllowVectors,
22117 bool IsNonTemporalStore,
22118 bool IsNonTemporalLoad) {
22119 LLVMContext &Context = *DAG.getContext();
22120 const DataLayout &DL = DAG.getDataLayout();
22121 int64_t ElementSizeBytes = MemVT.getStoreSize();
22122 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22123 bool MadeChange = false;
22124
22125 // Look for load nodes which are used by the stored values.
22126 SmallVector<MemOpLink, 8> LoadNodes;
22127
22128 // Find acceptable loads. Loads need to have the same chain (token factor),
22129 // must not be zext, volatile, indexed, and they must be consecutive.
22130 BaseIndexOffset LdBasePtr;
22131
22132 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22133 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
22135 LoadSDNode *Ld = cast<LoadSDNode>(Val);
22136
22137 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
22138 // If this is not the first ptr that we check.
22139 int64_t LdOffset = 0;
22140 if (LdBasePtr.getBase().getNode()) {
22141 // The base ptr must be the same.
22142 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
22143 break;
22144 } else {
22145 // Check that all other base pointers are the same as this one.
22146 LdBasePtr = LdPtr;
22147 }
22148
22149 // We found a potential memory operand to merge.
22150 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
22151 }
22152
22153 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
22154 Align RequiredAlignment;
22155 bool NeedRotate = false;
22156 if (LoadNodes.size() == 2) {
22157 // If we have load/store pair instructions and we only have two values,
22158 // don't bother merging.
22159 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
22160 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
22161 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
22162 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
22163 break;
22164 }
22165 // If the loads are reversed, see if we can rotate the halves into place.
22166 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
22167 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
22168 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
22169 if (Offset0 - Offset1 == ElementSizeBytes &&
22170 (hasOperation(ISD::ROTL, PairVT) ||
22171 hasOperation(ISD::ROTR, PairVT))) {
22172 std::swap(LoadNodes[0], LoadNodes[1]);
22173 NeedRotate = true;
22174 }
22175 }
22176 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22177 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22178 Align FirstStoreAlign = FirstInChain->getAlign();
22179 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
22180
22181 // Scan the memory operations on the chain and find the first
22182 // non-consecutive load memory address. These variables hold the index in
22183 // the store node array.
22184
22185 unsigned LastConsecutiveLoad = 1;
22186
22187 // This variable refers to the size and not index in the array.
22188 unsigned LastLegalVectorType = 1;
22189 unsigned LastLegalIntegerType = 1;
22190 bool isDereferenceable = true;
22191 bool DoIntegerTruncate = false;
22192 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
22193 SDValue LoadChain = FirstLoad->getChain();
22194 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
22195 // All loads must share the same chain.
22196 if (LoadNodes[i].MemNode->getChain() != LoadChain)
22197 break;
22198
22199 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
22200 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22201 break;
22202 LastConsecutiveLoad = i;
22203
22204 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
22205 isDereferenceable = false;
22206
22207 // Find a legal type for the vector store.
22208 unsigned Elts = (i + 1) * NumMemElts;
22209 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22210
22211 // Break early when size is too large to be legal.
22212 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22213 break;
22214
22215 unsigned IsFastSt = 0;
22216 unsigned IsFastLd = 0;
22217 // Don't try vector types if we need a rotate. We may still fail the
22218 // legality checks for the integer type, but we can't handle the rotate
22219 // case with vectors.
22220 // FIXME: We could use a shuffle in place of the rotate.
22221 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
22222 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22223 DAG.getMachineFunction()) &&
22224 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22225 *FirstInChain->getMemOperand(), &IsFastSt) &&
22226 IsFastSt &&
22227 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22228 *FirstLoad->getMemOperand(), &IsFastLd) &&
22229 IsFastLd) {
22230 LastLegalVectorType = i + 1;
22231 }
22232
22233 // Find a legal type for the integer store.
22234 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22235 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22236 if (TLI.isTypeLegal(StoreTy) &&
22237 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22238 DAG.getMachineFunction()) &&
22239 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22240 *FirstInChain->getMemOperand(), &IsFastSt) &&
22241 IsFastSt &&
22242 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22243 *FirstLoad->getMemOperand(), &IsFastLd) &&
22244 IsFastLd) {
22245 LastLegalIntegerType = i + 1;
22246 DoIntegerTruncate = false;
22247 // Or check whether a truncstore and extload is legal.
22248 } else if (TLI.getTypeAction(Context, StoreTy) ==
22250 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
22251 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22252 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22253 DAG.getMachineFunction()) &&
22254 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22255 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22256 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
22257 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22258 *FirstInChain->getMemOperand(), &IsFastSt) &&
22259 IsFastSt &&
22260 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22261 *FirstLoad->getMemOperand(), &IsFastLd) &&
22262 IsFastLd) {
22263 LastLegalIntegerType = i + 1;
22264 DoIntegerTruncate = true;
22265 }
22266 }
22267 }
22268
22269 // Only use vector types if the vector type is larger than the integer
22270 // type. If they are the same, use integers.
22271 bool UseVectorTy =
22272 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
22273 unsigned LastLegalType =
22274 std::max(LastLegalVectorType, LastLegalIntegerType);
22275
22276 // We add +1 here because the LastXXX variables refer to location while
22277 // the NumElem refers to array/index size.
22278 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
22279 NumElem = std::min(LastLegalType, NumElem);
22280 Align FirstLoadAlign = FirstLoad->getAlign();
22281
22282 if (NumElem < 2) {
22283 // We know that candidate stores are in order and of correct
22284 // shape. While there is no mergeable sequence from the
22285 // beginning one may start later in the sequence. The only
22286 // reason a merge of size N could have failed where another of
22287 // the same size would not have is if the alignment or either
22288 // the load or store has improved. Drop as many candidates as we
22289 // can here.
22290 unsigned NumSkip = 1;
22291 while ((NumSkip < LoadNodes.size()) &&
22292 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
22293 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22294 NumSkip++;
22295 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22296 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
22297 NumConsecutiveStores -= NumSkip;
22298 continue;
22299 }
22300
22301 // Check that we can merge these candidates without causing a cycle.
22302 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22303 RootNode)) {
22304 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22305 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22306 NumConsecutiveStores -= NumElem;
22307 continue;
22308 }
22309
22310 // Find if it is better to use vectors or integers to load and store
22311 // to memory.
22312 EVT JointMemOpVT;
22313 if (UseVectorTy) {
22314 // Find a legal type for the vector store.
22315 unsigned Elts = NumElem * NumMemElts;
22316 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22317 } else {
22318 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
22319 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
22320 }
22321
22322 // Check if there is a call in the load/store chain.
22323 if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
22324 hasCallInLdStChain(cast<StoreSDNode>(StoreNodes[0].MemNode),
22325 cast<LoadSDNode>(LoadNodes[0].MemNode))) {
22326 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22327 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22328 NumConsecutiveStores -= NumElem;
22329 continue;
22330 }
22331
22332 SDLoc LoadDL(LoadNodes[0].MemNode);
22333 SDLoc StoreDL(StoreNodes[0].MemNode);
22334
22335 // The merged loads are required to have the same incoming chain, so
22336 // using the first's chain is acceptable.
22337
22338 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
22339 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22340 AddToWorklist(NewStoreChain.getNode());
22341
22342 MachineMemOperand::Flags LdMMOFlags =
22343 isDereferenceable ? MachineMemOperand::MODereferenceable
22345 if (IsNonTemporalLoad)
22347
22348 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
22349
22350 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
22353
22354 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
22355
22356 SDValue NewLoad, NewStore;
22357 if (UseVectorTy || !DoIntegerTruncate) {
22358 NewLoad = DAG.getLoad(
22359 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
22360 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
22361 SDValue StoreOp = NewLoad;
22362 if (NeedRotate) {
22363 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
22364 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
22365 "Unexpected type for rotate-able load pair");
22366 SDValue RotAmt =
22367 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
22368 // Target can convert to the identical ROTR if it does not have ROTL.
22369 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
22370 }
22371 NewStore = DAG.getStore(
22372 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
22373 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22374 : MachinePointerInfo(FirstStoreAS),
22375 FirstStoreAlign, StMMOFlags);
22376 } else { // This must be the truncstore/extload case
22377 EVT ExtendedTy =
22378 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
22379 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
22380 FirstLoad->getChain(), FirstLoad->getBasePtr(),
22381 FirstLoad->getPointerInfo(), JointMemOpVT,
22382 FirstLoadAlign, LdMMOFlags);
22383 NewStore = DAG.getTruncStore(
22384 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
22385 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22386 : MachinePointerInfo(FirstStoreAS),
22387 JointMemOpVT, FirstInChain->getAlign(),
22388 FirstInChain->getMemOperand()->getFlags());
22389 }
22390
22391 // Transfer chain users from old loads to the new load.
22392 for (unsigned i = 0; i < NumElem; ++i) {
22393 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
22395 SDValue(NewLoad.getNode(), 1));
22396 }
22397
22398 // Replace all stores with the new store. Recursively remove corresponding
22399 // values if they are no longer used.
22400 for (unsigned i = 0; i < NumElem; ++i) {
22401 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
22402 CombineTo(StoreNodes[i].MemNode, NewStore);
22403 if (Val->use_empty())
22404 recursivelyDeleteUnusedNodes(Val.getNode());
22405 }
22406
22407 MadeChange = true;
22408 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22409 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22410 NumConsecutiveStores -= NumElem;
22411 }
22412 return MadeChange;
22413}
22414
22415bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
22416 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
22417 return false;
22418
22419 // TODO: Extend this function to merge stores of scalable vectors.
22420 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
22421 // store since we know <vscale x 16 x i8> is exactly twice as large as
22422 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
22423 EVT MemVT = St->getMemoryVT();
22424 if (MemVT.isScalableVT())
22425 return false;
22426 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
22427 return false;
22428
22429 // This function cannot currently deal with non-byte-sized memory sizes.
22430 int64_t ElementSizeBytes = MemVT.getStoreSize();
22431 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
22432 return false;
22433
22434 // Do not bother looking at stored values that are not constants, loads, or
22435 // extracted vector elements.
22436 SDValue StoredVal = peekThroughBitcasts(St->getValue());
22437 const StoreSource StoreSrc = getStoreSource(StoredVal);
22438 if (StoreSrc == StoreSource::Unknown)
22439 return false;
22440
22441 SmallVector<MemOpLink, 8> StoreNodes;
22442 // Find potential store merge candidates by searching through chain sub-DAG
22443 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
22444
22445 // Check if there is anything to merge.
22446 if (StoreNodes.size() < 2)
22447 return false;
22448
22449 // Sort the memory operands according to their distance from the
22450 // base pointer.
22451 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
22452 return LHS.OffsetFromBase < RHS.OffsetFromBase;
22453 });
22454
22455 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
22456 Attribute::NoImplicitFloat);
22457 bool IsNonTemporalStore = St->isNonTemporal();
22458 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
22459 cast<LoadSDNode>(StoredVal)->isNonTemporal();
22460
22461 // Store Merge attempts to merge the lowest stores. This generally
22462 // works out as if successful, as the remaining stores are checked
22463 // after the first collection of stores is merged. However, in the
22464 // case that a non-mergeable store is found first, e.g., {p[-2],
22465 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
22466 // mergeable cases. To prevent this, we prune such stores from the
22467 // front of StoreNodes here.
22468 bool MadeChange = false;
22469 while (StoreNodes.size() > 1) {
22470 unsigned NumConsecutiveStores =
22471 getConsecutiveStores(StoreNodes, ElementSizeBytes);
22472 // There are no more stores in the list to examine.
22473 if (NumConsecutiveStores == 0)
22474 return MadeChange;
22475
22476 // We have at least 2 consecutive stores. Try to merge them.
22477 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
22478 switch (StoreSrc) {
22479 case StoreSource::Constant:
22480 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
22481 MemVT, RootNode, AllowVectors);
22482 break;
22483
22484 case StoreSource::Extract:
22485 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
22486 MemVT, RootNode);
22487 break;
22488
22489 case StoreSource::Load:
22490 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
22491 MemVT, RootNode, AllowVectors,
22492 IsNonTemporalStore, IsNonTemporalLoad);
22493 break;
22494
22495 default:
22496 llvm_unreachable("Unhandled store source type");
22497 }
22498 }
22499
22500 // Remember if we failed to optimize, to save compile time.
22501 if (!MadeChange)
22502 ChainsWithoutMergeableStores.insert(RootNode);
22503
22504 return MadeChange;
22505}
22506
22507SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
22508 SDLoc SL(ST);
22509 SDValue ReplStore;
22510
22511 // Replace the chain to avoid dependency.
22512 if (ST->isTruncatingStore()) {
22513 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
22514 ST->getBasePtr(), ST->getMemoryVT(),
22515 ST->getMemOperand());
22516 } else {
22517 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
22518 ST->getMemOperand());
22519 }
22520
22521 // Create token to keep both nodes around.
22522 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
22523 MVT::Other, ST->getChain(), ReplStore);
22524
22525 // Make sure the new and old chains are cleaned up.
22526 AddToWorklist(Token.getNode());
22527
22528 // Don't add users to work list.
22529 return CombineTo(ST, Token, false);
22530}
22531
22532SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
22533 SDValue Value = ST->getValue();
22534 if (Value.getOpcode() == ISD::TargetConstantFP)
22535 return SDValue();
22536
22537 if (!ISD::isNormalStore(ST))
22538 return SDValue();
22539
22540 SDLoc DL(ST);
22541
22542 SDValue Chain = ST->getChain();
22543 SDValue Ptr = ST->getBasePtr();
22544
22545 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
22546
22547 // NOTE: If the original store is volatile, this transform must not increase
22548 // the number of stores. For example, on x86-32 an f64 can be stored in one
22549 // processor operation but an i64 (which is not legal) requires two. So the
22550 // transform should not be done in this case.
22551
22552 SDValue Tmp;
22553 switch (CFP->getSimpleValueType(0).SimpleTy) {
22554 default:
22555 llvm_unreachable("Unknown FP type");
22556 case MVT::f16: // We don't do this for these yet.
22557 case MVT::bf16:
22558 case MVT::f80:
22559 case MVT::f128:
22560 case MVT::ppcf128:
22561 return SDValue();
22562 case MVT::f32:
22563 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
22564 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
22565 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
22566 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
22567 MVT::i32);
22568 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
22569 }
22570
22571 return SDValue();
22572 case MVT::f64:
22573 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
22574 ST->isSimple()) ||
22575 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
22576 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
22577 getZExtValue(), SDLoc(CFP), MVT::i64);
22578 return DAG.getStore(Chain, DL, Tmp,
22579 Ptr, ST->getMemOperand());
22580 }
22581
22582 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
22583 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
22584 // Many FP stores are not made apparent until after legalize, e.g. for
22585 // argument passing. Since this is so common, custom legalize the
22586 // 64-bit integer store into two 32-bit stores.
22587 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
22588 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
22589 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
22590 if (DAG.getDataLayout().isBigEndian())
22591 std::swap(Lo, Hi);
22592
22593 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22594 AAMDNodes AAInfo = ST->getAAInfo();
22595
22596 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22597 ST->getBaseAlign(), MMOFlags, AAInfo);
22599 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
22600 ST->getPointerInfo().getWithOffset(4),
22601 ST->getBaseAlign(), MMOFlags, AAInfo);
22602 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
22603 St0, St1);
22604 }
22605
22606 return SDValue();
22607 }
22608}
22609
22610// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
22611//
22612// If a store of a load with an element inserted into it has no other
22613// uses in between the chain, then we can consider the vector store
22614// dead and replace it with just the single scalar element store.
22615SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
22616 SDLoc DL(ST);
22617 SDValue Value = ST->getValue();
22618 SDValue Ptr = ST->getBasePtr();
22619 SDValue Chain = ST->getChain();
22620 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
22621 return SDValue();
22622
22623 SDValue Elt = Value.getOperand(1);
22624 SDValue Idx = Value.getOperand(2);
22625
22626 // If the element isn't byte sized or is implicitly truncated then we can't
22627 // compute an offset.
22628 EVT EltVT = Elt.getValueType();
22629 if (!EltVT.isByteSized() ||
22630 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
22631 return SDValue();
22632
22633 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
22634 if (!Ld || Ld->getBasePtr() != Ptr ||
22635 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
22636 !ISD::isNormalStore(ST) ||
22637 Ld->getAddressSpace() != ST->getAddressSpace() ||
22639 return SDValue();
22640
22641 unsigned IsFast;
22642 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22643 Elt.getValueType(), ST->getAddressSpace(),
22644 ST->getAlign(), ST->getMemOperand()->getFlags(),
22645 &IsFast) ||
22646 !IsFast)
22647 return SDValue();
22648
22649 MachinePointerInfo PointerInfo(ST->getAddressSpace());
22650
22651 // If the offset is a known constant then try to recover the pointer
22652 // info
22653 SDValue NewPtr;
22654 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
22655 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
22656 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
22657 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
22658 } else {
22659 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
22660 }
22661
22662 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
22663 ST->getMemOperand()->getFlags());
22664}
22665
22666SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
22667 AtomicSDNode *ST = cast<AtomicSDNode>(N);
22668 SDValue Val = ST->getVal();
22669 EVT VT = Val.getValueType();
22670 EVT MemVT = ST->getMemoryVT();
22671
22672 if (MemVT.bitsLT(VT)) { // Is truncating store
22673 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
22674 MemVT.getScalarSizeInBits());
22675 // See if we can simplify the operation with SimplifyDemandedBits, which
22676 // only works if the value has a single use.
22677 if (SimplifyDemandedBits(Val, TruncDemandedBits))
22678 return SDValue(N, 0);
22679 }
22680
22681 return SDValue();
22682}
22683
22685 const SDLoc &Dl) {
22686 if (!Store->isSimple() || !ISD::isNormalStore(Store))
22687 return SDValue();
22688
22689 SDValue StoredVal = Store->getValue();
22690 SDValue StorePtr = Store->getBasePtr();
22691 SDValue StoreOffset = Store->getOffset();
22692 EVT VT = Store->getMemoryVT();
22693 unsigned AddrSpace = Store->getAddressSpace();
22694 Align Alignment = Store->getAlign();
22695 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22696
22697 if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
22698 !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
22699 return SDValue();
22700
22701 SDValue Mask, OtherVec, LoadCh;
22702 unsigned LoadPos;
22703 if (sd_match(StoredVal,
22704 m_VSelect(m_Value(Mask), m_Value(OtherVec),
22705 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22706 m_Specific(StoreOffset))))) {
22707 LoadPos = 2;
22708 } else if (sd_match(StoredVal,
22709 m_VSelect(m_Value(Mask),
22710 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22711 m_Specific(StoreOffset)),
22712 m_Value(OtherVec)))) {
22713 LoadPos = 1;
22714 } else {
22715 return SDValue();
22716 }
22717
22718 auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
22719 if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
22720 Load->getAddressSpace() != AddrSpace)
22721 return SDValue();
22722
22723 if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
22724 return SDValue();
22725
22726 if (LoadPos == 1)
22727 Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
22728
22729 return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
22730 StoreOffset, Mask, VT, Store->getMemOperand(),
22731 Store->getAddressingMode());
22732}
22733
22734SDValue DAGCombiner::visitSTORE(SDNode *N) {
22735 StoreSDNode *ST = cast<StoreSDNode>(N);
22736 SDValue Chain = ST->getChain();
22737 SDValue Value = ST->getValue();
22738 SDValue Ptr = ST->getBasePtr();
22739
22740 // If this is a store of a bit convert, store the input value if the
22741 // resultant store does not need a higher alignment than the original.
22742 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
22743 ST->isUnindexed()) {
22744 EVT SVT = Value.getOperand(0).getValueType();
22745 // If the store is volatile, we only want to change the store type if the
22746 // resulting store is legal. Otherwise we might increase the number of
22747 // memory accesses. We don't care if the original type was legal or not
22748 // as we assume software couldn't rely on the number of accesses of an
22749 // illegal type.
22750 // TODO: May be able to relax for unordered atomics (see D66309)
22751 if (((!LegalOperations && ST->isSimple()) ||
22752 TLI.isOperationLegal(ISD::STORE, SVT)) &&
22753 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
22754 DAG, *ST->getMemOperand())) {
22755 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22756 ST->getMemOperand());
22757 }
22758 }
22759
22760 // Turn 'store undef, Ptr' -> nothing.
22761 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
22762 return Chain;
22763
22764 // Try to infer better alignment information than the store already has.
22765 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
22766 !ST->isAtomic()) {
22767 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
22768 if (*Alignment > ST->getAlign() &&
22769 isAligned(*Alignment, ST->getSrcValueOffset())) {
22770 SDValue NewStore =
22771 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
22772 ST->getMemoryVT(), *Alignment,
22773 ST->getMemOperand()->getFlags(), ST->getAAInfo());
22774 // NewStore will always be N as we are only refining the alignment
22775 assert(NewStore.getNode() == N);
22776 (void)NewStore;
22777 }
22778 }
22779 }
22780
22781 // Try transforming a pair floating point load / store ops to integer
22782 // load / store ops.
22783 if (SDValue NewST = TransformFPLoadStorePair(N))
22784 return NewST;
22785
22786 // Try transforming several stores into STORE (BSWAP).
22787 if (SDValue Store = mergeTruncStores(ST))
22788 return Store;
22789
22790 if (ST->isUnindexed()) {
22791 // Walk up chain skipping non-aliasing memory nodes, on this store and any
22792 // adjacent stores.
22793 if (findBetterNeighborChains(ST)) {
22794 // replaceStoreChain uses CombineTo, which handled all of the worklist
22795 // manipulation. Return the original node to not do anything else.
22796 return SDValue(ST, 0);
22797 }
22798 Chain = ST->getChain();
22799 }
22800
22801 // FIXME: is there such a thing as a truncating indexed store?
22802 if (ST->isTruncatingStore() && ST->isUnindexed() &&
22803 Value.getValueType().isInteger() &&
22805 !cast<ConstantSDNode>(Value)->isOpaque())) {
22806 // Convert a truncating store of a extension into a standard store.
22807 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
22808 Value.getOpcode() == ISD::SIGN_EXTEND ||
22809 Value.getOpcode() == ISD::ANY_EXTEND) &&
22810 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
22811 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
22812 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22813 ST->getMemOperand());
22814
22815 APInt TruncDemandedBits =
22816 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
22817 ST->getMemoryVT().getScalarSizeInBits());
22818
22819 // See if we can simplify the operation with SimplifyDemandedBits, which
22820 // only works if the value has a single use.
22821 AddToWorklist(Value.getNode());
22822 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
22823 // Re-visit the store if anything changed and the store hasn't been merged
22824 // with another node (N is deleted) SimplifyDemandedBits will add Value's
22825 // node back to the worklist if necessary, but we also need to re-visit
22826 // the Store node itself.
22827 if (N->getOpcode() != ISD::DELETED_NODE)
22828 AddToWorklist(N);
22829 return SDValue(N, 0);
22830 }
22831
22832 // Otherwise, see if we can simplify the input to this truncstore with
22833 // knowledge that only the low bits are being used. For example:
22834 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
22835 if (SDValue Shorter =
22836 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
22837 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
22838 ST->getMemOperand());
22839
22840 // If we're storing a truncated constant, see if we can simplify it.
22841 // TODO: Move this to targetShrinkDemandedConstant?
22842 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
22843 if (!Cst->isOpaque()) {
22844 const APInt &CValue = Cst->getAPIntValue();
22845 APInt NewVal = CValue & TruncDemandedBits;
22846 if (NewVal != CValue) {
22847 SDValue Shorter =
22848 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
22849 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
22850 ST->getMemoryVT(), ST->getMemOperand());
22851 }
22852 }
22853 }
22854
22855 // If this is a load followed by a store to the same location, then the store
22856 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
22857 // TODO: Add big-endian truncate support with test coverage.
22858 // TODO: Can relax for unordered atomics (see D66309)
22859 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
22861 : Value;
22862 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
22863 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
22864 ST->isUnindexed() && ST->isSimple() &&
22865 Ld->getAddressSpace() == ST->getAddressSpace() &&
22866 // There can't be any side effects between the load and store, such as
22867 // a call or store.
22869 // The store is dead, remove it.
22870 return Chain;
22871 }
22872 }
22873
22874 // Try scalarizing vector stores of loads where we only change one element
22875 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22876 return NewST;
22877
22878 // TODO: Can relax for unordered atomics (see D66309)
22879 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22880 if (ST->isUnindexed() && ST->isSimple() &&
22881 ST1->isUnindexed() && ST1->isSimple()) {
22882 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22883 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22884 ST->getAddressSpace() == ST1->getAddressSpace()) {
22885 // If this is a store followed by a store with the same value to the
22886 // same location, then the store is dead/noop.
22887 return Chain;
22888 }
22889
22890 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
22891 !ST1->getBasePtr().isUndef() &&
22892 ST->getAddressSpace() == ST1->getAddressSpace()) {
22893 // If we consider two stores and one smaller in size is a scalable
22894 // vector type and another one a bigger size store with a fixed type,
22895 // then we could not allow the scalable store removal because we don't
22896 // know its final size in the end.
22897 if (ST->getMemoryVT().isScalableVector() ||
22898 ST1->getMemoryVT().isScalableVector()) {
22899 if (ST1->getBasePtr() == Ptr &&
22900 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
22901 ST->getMemoryVT().getStoreSize())) {
22902 CombineTo(ST1, ST1->getChain());
22903 return SDValue(N, 0);
22904 }
22905 } else {
22906 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
22907 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
22908 // If this is a store who's preceding store to a subset of the current
22909 // location and no one other node is chained to that store we can
22910 // effectively drop the store. Do not remove stores to undef as they
22911 // may be used as data sinks.
22912 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
22913 ChainBase,
22914 ST1->getMemoryVT().getFixedSizeInBits())) {
22915 CombineTo(ST1, ST1->getChain());
22916 return SDValue(N, 0);
22917 }
22918 }
22919 }
22920 }
22921 }
22922
22923 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
22924 // truncating store. We can do this even if this is already a truncstore.
22925 if ((Value.getOpcode() == ISD::FP_ROUND ||
22926 Value.getOpcode() == ISD::TRUNCATE) &&
22927 Value->hasOneUse() && ST->isUnindexed() &&
22928 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
22929 ST->getMemoryVT(), LegalOperations)) {
22930 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
22931 Ptr, ST->getMemoryVT(), ST->getMemOperand());
22932 }
22933
22934 // Always perform this optimization before types are legal. If the target
22935 // prefers, also try this after legalization to catch stores that were created
22936 // by intrinsics or other nodes.
22937 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
22938 while (true) {
22939 // There can be multiple store sequences on the same chain.
22940 // Keep trying to merge store sequences until we are unable to do so
22941 // or until we merge the last store on the chain.
22942 bool Changed = mergeConsecutiveStores(ST);
22943 if (!Changed) break;
22944 // Return N as merge only uses CombineTo and no worklist clean
22945 // up is necessary.
22946 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
22947 return SDValue(N, 0);
22948 }
22949 }
22950
22951 // Try transforming N to an indexed store.
22952 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
22953 return SDValue(N, 0);
22954
22955 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
22956 //
22957 // Make sure to do this only after attempting to merge stores in order to
22958 // avoid changing the types of some subset of stores due to visit order,
22959 // preventing their merging.
22960 if (isa<ConstantFPSDNode>(ST->getValue())) {
22961 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
22962 return NewSt;
22963 }
22964
22965 if (SDValue NewSt = splitMergedValStore(ST))
22966 return NewSt;
22967
22968 if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
22969 return MaskedStore;
22970
22971 return ReduceLoadOpStoreWidth(N);
22972}
22973
22974SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
22975 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
22976 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), 0, false);
22977
22978 // We walk up the chains to find stores.
22979 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
22980 while (!Chains.empty()) {
22981 SDValue Chain = Chains.pop_back_val();
22982 if (!Chain.hasOneUse())
22983 continue;
22984 switch (Chain.getOpcode()) {
22985 case ISD::TokenFactor:
22986 for (unsigned Nops = Chain.getNumOperands(); Nops;)
22987 Chains.push_back(Chain.getOperand(--Nops));
22988 break;
22989 case ISD::LIFETIME_START:
22990 case ISD::LIFETIME_END:
22991 // We can forward past any lifetime start/end that can be proven not to
22992 // alias the node.
22993 if (!mayAlias(Chain.getNode(), N))
22994 Chains.push_back(Chain.getOperand(0));
22995 break;
22996 case ISD::STORE: {
22997 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
22998 // TODO: Can relax for unordered atomics (see D66309)
22999 if (!ST->isSimple() || ST->isIndexed())
23000 continue;
23001 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
23002 // The bounds of a scalable store are not known until runtime, so this
23003 // store cannot be elided.
23004 if (StoreSize.isScalable())
23005 continue;
23006 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
23007 // If we store purely within object bounds just before its lifetime ends,
23008 // we can remove the store.
23009 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
23010 if (LifetimeEndBase.contains(
23011 DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8,
23012 StoreBase, StoreSize.getFixedValue() * 8)) {
23013 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
23014 dbgs() << "\nwithin LIFETIME_END of : ";
23015 LifetimeEndBase.dump(); dbgs() << "\n");
23016 CombineTo(ST, ST->getChain());
23017 return SDValue(N, 0);
23018 }
23019 }
23020 }
23021 }
23022 return SDValue();
23023}
23024
23025/// For the instruction sequence of store below, F and I values
23026/// are bundled together as an i64 value before being stored into memory.
23027/// Sometimes it is more efficent to generate separate stores for F and I,
23028/// which can remove the bitwise instructions or sink them to colder places.
23029///
23030/// (store (or (zext (bitcast F to i32) to i64),
23031/// (shl (zext I to i64), 32)), addr) -->
23032/// (store F, addr) and (store I, addr+4)
23033///
23034/// Similarly, splitting for other merged store can also be beneficial, like:
23035/// For pair of {i32, i32}, i64 store --> two i32 stores.
23036/// For pair of {i32, i16}, i64 store --> two i32 stores.
23037/// For pair of {i16, i16}, i32 store --> two i16 stores.
23038/// For pair of {i16, i8}, i32 store --> two i16 stores.
23039/// For pair of {i8, i8}, i16 store --> two i8 stores.
23040///
23041/// We allow each target to determine specifically which kind of splitting is
23042/// supported.
23043///
23044/// The store patterns are commonly seen from the simple code snippet below
23045/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
23046/// void goo(const std::pair<int, float> &);
23047/// hoo() {
23048/// ...
23049/// goo(std::make_pair(tmp, ftmp));
23050/// ...
23051/// }
23052///
23053SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
23054 if (OptLevel == CodeGenOptLevel::None)
23055 return SDValue();
23056
23057 // Can't change the number of memory accesses for a volatile store or break
23058 // atomicity for an atomic one.
23059 if (!ST->isSimple())
23060 return SDValue();
23061
23062 SDValue Val = ST->getValue();
23063 SDLoc DL(ST);
23064
23065 // Match OR operand.
23066 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
23067 return SDValue();
23068
23069 // Match SHL operand and get Lower and Higher parts of Val.
23070 SDValue Op1 = Val.getOperand(0);
23071 SDValue Op2 = Val.getOperand(1);
23072 SDValue Lo, Hi;
23073 if (Op1.getOpcode() != ISD::SHL) {
23074 std::swap(Op1, Op2);
23075 if (Op1.getOpcode() != ISD::SHL)
23076 return SDValue();
23077 }
23078 Lo = Op2;
23079 Hi = Op1.getOperand(0);
23080 if (!Op1.hasOneUse())
23081 return SDValue();
23082
23083 // Match shift amount to HalfValBitSize.
23084 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
23085 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
23086 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
23087 return SDValue();
23088
23089 // Lo and Hi are zero-extended from int with size less equal than 32
23090 // to i64.
23091 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
23092 !Lo.getOperand(0).getValueType().isScalarInteger() ||
23093 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
23094 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
23095 !Hi.getOperand(0).getValueType().isScalarInteger() ||
23096 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
23097 return SDValue();
23098
23099 // Use the EVT of low and high parts before bitcast as the input
23100 // of target query.
23101 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
23102 ? Lo.getOperand(0).getValueType()
23103 : Lo.getValueType();
23104 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
23105 ? Hi.getOperand(0).getValueType()
23106 : Hi.getValueType();
23107 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
23108 return SDValue();
23109
23110 // Start to split store.
23111 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23112 AAMDNodes AAInfo = ST->getAAInfo();
23113
23114 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
23115 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
23116 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
23117 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
23118
23119 SDValue Chain = ST->getChain();
23120 SDValue Ptr = ST->getBasePtr();
23121 // Lower value store.
23122 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
23123 ST->getBaseAlign(), MMOFlags, AAInfo);
23124 Ptr =
23125 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
23126 // Higher value store.
23127 SDValue St1 = DAG.getStore(
23128 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
23129 ST->getBaseAlign(), MMOFlags, AAInfo);
23130 return St1;
23131}
23132
23133// Merge an insertion into an existing shuffle:
23134// (insert_vector_elt (vector_shuffle X, Y, Mask),
23135// .(extract_vector_elt X, N), InsIndex)
23136// --> (vector_shuffle X, Y, NewMask)
23137// and variations where shuffle operands may be CONCAT_VECTORS.
23139 SmallVectorImpl<int> &NewMask, SDValue Elt,
23140 unsigned InsIndex) {
23141 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23143 return false;
23144
23145 // Vec's operand 0 is using indices from 0 to N-1 and
23146 // operand 1 from N to 2N - 1, where N is the number of
23147 // elements in the vectors.
23148 SDValue InsertVal0 = Elt.getOperand(0);
23149 int ElementOffset = -1;
23150
23151 // We explore the inputs of the shuffle in order to see if we find the
23152 // source of the extract_vector_elt. If so, we can use it to modify the
23153 // shuffle rather than perform an insert_vector_elt.
23155 ArgWorkList.emplace_back(Mask.size(), Y);
23156 ArgWorkList.emplace_back(0, X);
23157
23158 while (!ArgWorkList.empty()) {
23159 int ArgOffset;
23160 SDValue ArgVal;
23161 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
23162
23163 if (ArgVal == InsertVal0) {
23164 ElementOffset = ArgOffset;
23165 break;
23166 }
23167
23168 // Peek through concat_vector.
23169 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
23170 int CurrentArgOffset =
23171 ArgOffset + ArgVal.getValueType().getVectorNumElements();
23172 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
23173 for (SDValue Op : reverse(ArgVal->ops())) {
23174 CurrentArgOffset -= Step;
23175 ArgWorkList.emplace_back(CurrentArgOffset, Op);
23176 }
23177
23178 // Make sure we went through all the elements and did not screw up index
23179 // computation.
23180 assert(CurrentArgOffset == ArgOffset);
23181 }
23182 }
23183
23184 // If we failed to find a match, see if we can replace an UNDEF shuffle
23185 // operand.
23186 if (ElementOffset == -1) {
23187 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
23188 return false;
23189 ElementOffset = Mask.size();
23190 Y = InsertVal0;
23191 }
23192
23193 NewMask.assign(Mask.begin(), Mask.end());
23194 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
23195 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
23196 "NewMask[InsIndex] is out of bound");
23197 return true;
23198}
23199
23200// Merge an insertion into an existing shuffle:
23201// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
23202// InsIndex)
23203// --> (vector_shuffle X, Y) and variations where shuffle operands may be
23204// CONCAT_VECTORS.
23205SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
23206 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23207 "Expected extract_vector_elt");
23208 SDValue InsertVal = N->getOperand(1);
23209 SDValue Vec = N->getOperand(0);
23210
23211 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
23212 if (!SVN || !Vec.hasOneUse())
23213 return SDValue();
23214
23215 ArrayRef<int> Mask = SVN->getMask();
23216 SDValue X = Vec.getOperand(0);
23217 SDValue Y = Vec.getOperand(1);
23218
23219 SmallVector<int, 16> NewMask(Mask);
23220 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
23221 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
23222 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
23223 if (LegalShuffle)
23224 return LegalShuffle;
23225 }
23226
23227 return SDValue();
23228}
23229
23230// Convert a disguised subvector insertion into a shuffle:
23231// insert_vector_elt V, (bitcast X from vector type), IdxC -->
23232// bitcast(shuffle (bitcast V), (extended X), Mask)
23233// Note: We do not use an insert_subvector node because that requires a
23234// legal subvector type.
23235SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
23236 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23237 "Expected extract_vector_elt");
23238 SDValue InsertVal = N->getOperand(1);
23239
23240 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
23241 !InsertVal.getOperand(0).getValueType().isVector())
23242 return SDValue();
23243
23244 SDValue SubVec = InsertVal.getOperand(0);
23245 SDValue DestVec = N->getOperand(0);
23246 EVT SubVecVT = SubVec.getValueType();
23247 EVT VT = DestVec.getValueType();
23248 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23249 // If the source only has a single vector element, the cost of creating adding
23250 // it to a vector is likely to exceed the cost of a insert_vector_elt.
23251 if (NumSrcElts == 1)
23252 return SDValue();
23253 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
23254 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
23255
23256 // Step 1: Create a shuffle mask that implements this insert operation. The
23257 // vector that we are inserting into will be operand 0 of the shuffle, so
23258 // those elements are just 'i'. The inserted subvector is in the first
23259 // positions of operand 1 of the shuffle. Example:
23260 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
23261 SmallVector<int, 16> Mask(NumMaskVals);
23262 for (unsigned i = 0; i != NumMaskVals; ++i) {
23263 if (i / NumSrcElts == InsIndex)
23264 Mask[i] = (i % NumSrcElts) + NumMaskVals;
23265 else
23266 Mask[i] = i;
23267 }
23268
23269 // Bail out if the target can not handle the shuffle we want to create.
23270 EVT SubVecEltVT = SubVecVT.getVectorElementType();
23271 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
23272 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
23273 return SDValue();
23274
23275 // Step 2: Create a wide vector from the inserted source vector by appending
23276 // undefined elements. This is the same size as our destination vector.
23277 SDLoc DL(N);
23278 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
23279 ConcatOps[0] = SubVec;
23280 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
23281
23282 // Step 3: Shuffle in the padded subvector.
23283 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
23284 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
23285 AddToWorklist(PaddedSubV.getNode());
23286 AddToWorklist(DestVecBC.getNode());
23287 AddToWorklist(Shuf.getNode());
23288 return DAG.getBitcast(VT, Shuf);
23289}
23290
23291// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
23292// possible and the new load will be quick. We use more loads but less shuffles
23293// and inserts.
23294SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
23295 EVT VT = N->getValueType(0);
23296
23297 // InsIndex is expected to be the first of last lane.
23298 if (!VT.isFixedLengthVector() ||
23299 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
23300 return SDValue();
23301
23302 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
23303 // depending on the InsIndex.
23304 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
23305 SDValue Scalar = N->getOperand(1);
23306 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
23307 return InsIndex == P.index() || P.value() < 0 ||
23308 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
23309 (InsIndex == VT.getVectorNumElements() - 1 &&
23310 P.value() == (int)P.index() + 1);
23311 }))
23312 return SDValue();
23313
23314 // We optionally skip over an extend so long as both loads are extended in the
23315 // same way from the same type.
23316 unsigned Extend = 0;
23317 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
23318 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
23319 Scalar.getOpcode() == ISD::ANY_EXTEND) {
23320 Extend = Scalar.getOpcode();
23321 Scalar = Scalar.getOperand(0);
23322 }
23323
23324 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
23325 if (!ScalarLoad)
23326 return SDValue();
23327
23328 SDValue Vec = Shuffle->getOperand(0);
23329 if (Extend) {
23330 if (Vec.getOpcode() != Extend)
23331 return SDValue();
23332 Vec = Vec.getOperand(0);
23333 }
23334 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
23335 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
23336 return SDValue();
23337
23338 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
23339 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
23340 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23341 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23342 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
23343 return SDValue();
23344
23345 // Check that the offset between the pointers to produce a single continuous
23346 // load.
23347 if (InsIndex == 0) {
23348 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
23349 -1))
23350 return SDValue();
23351 } else {
23353 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
23354 return SDValue();
23355 }
23356
23357 // And that the new unaligned load will be fast.
23358 unsigned IsFast = 0;
23359 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
23360 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
23361 Vec.getValueType(), VecLoad->getAddressSpace(),
23362 NewAlign, VecLoad->getMemOperand()->getFlags(),
23363 &IsFast) ||
23364 !IsFast)
23365 return SDValue();
23366
23367 // Calculate the new Ptr and create the new load.
23368 SDLoc DL(N);
23369 SDValue Ptr = ScalarLoad->getBasePtr();
23370 if (InsIndex != 0)
23371 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
23372 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
23373 MachinePointerInfo PtrInfo =
23374 InsIndex == 0 ? ScalarLoad->getPointerInfo()
23375 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
23376
23377 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
23378 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
23379 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
23380 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
23381 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
23382}
23383
23384SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
23385 SDValue InVec = N->getOperand(0);
23386 SDValue InVal = N->getOperand(1);
23387 SDValue EltNo = N->getOperand(2);
23388 SDLoc DL(N);
23389
23390 EVT VT = InVec.getValueType();
23391 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
23392
23393 // Insert into out-of-bounds element is undefined.
23394 if (IndexC && VT.isFixedLengthVector() &&
23395 IndexC->getZExtValue() >= VT.getVectorNumElements())
23396 return DAG.getUNDEF(VT);
23397
23398 // Remove redundant insertions:
23399 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
23400 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23401 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
23402 return InVec;
23403
23404 // Remove insert of UNDEF/POISON elements.
23405 if (InVal.isUndef()) {
23406 if (InVal.getOpcode() == ISD::POISON || InVec.getOpcode() == ISD::UNDEF)
23407 return InVec;
23408 return DAG.getFreeze(InVec);
23409 }
23410
23411 if (!IndexC) {
23412 // If this is variable insert to undef vector, it might be better to splat:
23413 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23414 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23415 return DAG.getSplat(VT, DL, InVal);
23416 return SDValue();
23417 }
23418
23419 if (VT.isScalableVector())
23420 return SDValue();
23421
23422 unsigned NumElts = VT.getVectorNumElements();
23423
23424 // We must know which element is being inserted for folds below here.
23425 unsigned Elt = IndexC->getZExtValue();
23426
23427 // Handle <1 x ???> vector insertion special cases.
23428 if (NumElts == 1) {
23429 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
23430 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23431 InVal.getOperand(0).getValueType() == VT &&
23432 isNullConstant(InVal.getOperand(1)))
23433 return InVal.getOperand(0);
23434 }
23435
23436 // Canonicalize insert_vector_elt dag nodes.
23437 // Example:
23438 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
23439 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
23440 //
23441 // Do this only if the child insert_vector node has one use; also
23442 // do this only if indices are both constants and Idx1 < Idx0.
23443 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
23444 && isa<ConstantSDNode>(InVec.getOperand(2))) {
23445 unsigned OtherElt = InVec.getConstantOperandVal(2);
23446 if (Elt < OtherElt) {
23447 // Swap nodes.
23448 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
23449 InVec.getOperand(0), InVal, EltNo);
23450 AddToWorklist(NewOp.getNode());
23451 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
23452 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
23453 }
23454 }
23455
23456 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
23457 return Shuf;
23458
23459 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
23460 return Shuf;
23461
23462 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
23463 return Shuf;
23464
23465 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
23466 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
23467 // vXi1 vector - we don't need to recurse.
23468 if (NumElts == 1)
23469 return DAG.getBuildVector(VT, DL, {InVal});
23470
23471 // If we haven't already collected the element, insert into the op list.
23472 EVT MaxEltVT = InVal.getValueType();
23473 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
23474 unsigned Idx) {
23475 if (!Ops[Idx]) {
23476 Ops[Idx] = Elt;
23477 if (VT.isInteger()) {
23478 EVT EltVT = Elt.getValueType();
23479 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
23480 }
23481 }
23482 };
23483
23484 // Ensure all the operands are the same value type, fill any missing
23485 // operands with UNDEF and create the BUILD_VECTOR.
23486 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
23487 bool FreezeUndef = false) {
23488 assert(Ops.size() == NumElts && "Unexpected vector size");
23489 SDValue UndefOp = FreezeUndef ? DAG.getFreeze(DAG.getUNDEF(MaxEltVT))
23490 : DAG.getUNDEF(MaxEltVT);
23491 for (SDValue &Op : Ops) {
23492 if (Op)
23493 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
23494 else
23495 Op = UndefOp;
23496 }
23497 return DAG.getBuildVector(VT, DL, Ops);
23498 };
23499
23501 Ops[Elt] = InVal;
23502
23503 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
23504 for (SDValue CurVec = InVec; CurVec;) {
23505 // UNDEF - build new BUILD_VECTOR from already inserted operands.
23506 if (CurVec.isUndef())
23507 return CanonicalizeBuildVector(Ops);
23508
23509 // FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
23510 if (ISD::isFreezeUndef(CurVec.getNode()) && CurVec.hasOneUse())
23511 return CanonicalizeBuildVector(Ops, /*FreezeUndef=*/true);
23512
23513 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
23514 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
23515 for (unsigned I = 0; I != NumElts; ++I)
23516 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
23517 return CanonicalizeBuildVector(Ops);
23518 }
23519
23520 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
23521 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
23522 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
23523 return CanonicalizeBuildVector(Ops);
23524 }
23525
23526 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
23527 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
23528 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
23529 if (CurIdx->getAPIntValue().ult(NumElts)) {
23530 unsigned Idx = CurIdx->getZExtValue();
23531 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
23532
23533 // Found entire BUILD_VECTOR.
23534 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
23535 return CanonicalizeBuildVector(Ops);
23536
23537 CurVec = CurVec->getOperand(0);
23538 continue;
23539 }
23540
23541 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
23542 // update the shuffle mask (and second operand if we started with unary
23543 // shuffle) and create a new legal shuffle.
23544 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
23545 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
23546 SDValue LHS = SVN->getOperand(0);
23547 SDValue RHS = SVN->getOperand(1);
23548 SmallVector<int, 16> Mask(SVN->getMask());
23549 bool Merged = true;
23550 for (auto I : enumerate(Ops)) {
23551 SDValue &Op = I.value();
23552 if (Op) {
23553 SmallVector<int, 16> NewMask;
23554 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
23555 Merged = false;
23556 break;
23557 }
23558 Mask = std::move(NewMask);
23559 }
23560 }
23561 if (Merged)
23562 if (SDValue NewShuffle =
23563 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
23564 return NewShuffle;
23565 }
23566
23567 if (!LegalOperations) {
23568 bool IsNull = llvm::isNullConstant(InVal);
23569 // We can convert to AND/OR mask if all insertions are zero or -1
23570 // respectively.
23571 if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
23572 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23573 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23574 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23575 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23577
23578 // Build the mask and return the corresponding DAG node.
23579 auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
23580 unsigned MaskOpcode) {
23581 for (unsigned I = 0; I != NumElts; ++I)
23582 Mask[I] = Ops[I] ? TrueVal : FalseVal;
23583 return DAG.getNode(MaskOpcode, DL, VT, CurVec,
23584 DAG.getBuildVector(VT, DL, Mask));
23585 };
23586
23587 // If all elements are zero, we can use AND with all ones.
23588 if (IsNull)
23589 return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
23590
23591 // If all elements are -1, we can use OR with zero.
23592 return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
23593 }
23594 }
23595
23596 // Failed to find a match in the chain - bail.
23597 break;
23598 }
23599
23600 // See if we can fill in the missing constant elements as zeros.
23601 // TODO: Should we do this for any constant?
23602 APInt DemandedZeroElts = APInt::getZero(NumElts);
23603 for (unsigned I = 0; I != NumElts; ++I)
23604 if (!Ops[I])
23605 DemandedZeroElts.setBit(I);
23606
23607 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
23608 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
23609 : DAG.getConstantFP(0, DL, MaxEltVT);
23610 for (unsigned I = 0; I != NumElts; ++I)
23611 if (!Ops[I])
23612 Ops[I] = Zero;
23613
23614 return CanonicalizeBuildVector(Ops);
23615 }
23616 }
23617
23618 return SDValue();
23619}
23620
23621/// Transform a vector binary operation into a scalar binary operation by moving
23622/// the math/logic after an extract element of a vector.
23624 const SDLoc &DL, bool LegalTypes) {
23625 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23626 SDValue Vec = ExtElt->getOperand(0);
23627 SDValue Index = ExtElt->getOperand(1);
23628 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23629 unsigned Opc = Vec.getOpcode();
23630 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
23631 Vec->getNumValues() != 1)
23632 return SDValue();
23633
23634 // Targets may want to avoid this to prevent an expensive register transfer.
23635 if (!TLI.shouldScalarizeBinop(Vec))
23636 return SDValue();
23637
23638 EVT ResVT = ExtElt->getValueType(0);
23639 if (Opc == ISD::SETCC &&
23640 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
23641 return SDValue();
23642
23643 // Extracting an element of a vector constant is constant-folded, so this
23644 // transform is just replacing a vector op with a scalar op while moving the
23645 // extract.
23646 SDValue Op0 = Vec.getOperand(0);
23647 SDValue Op1 = Vec.getOperand(1);
23648 APInt SplatVal;
23649 if (!isAnyConstantBuildVector(Op0, true) &&
23650 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
23651 !isAnyConstantBuildVector(Op1, true) &&
23652 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
23653 return SDValue();
23654
23655 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
23656 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
23657 if (Opc == ISD::SETCC) {
23658 EVT OpVT = Op0.getValueType().getVectorElementType();
23659 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
23660 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
23661 SDValue NewVal = DAG.getSetCC(
23662 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
23663 // We may need to sign- or zero-extend the result to match the same
23664 // behaviour as the vector version of SETCC.
23665 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
23666 if (ResVT != MVT::i1 &&
23667 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
23668 VecBoolContents != TLI.getBooleanContents(ResVT)) {
23670 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
23671 DAG.getValueType(MVT::i1));
23672 else
23673 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
23674 }
23675 return NewVal;
23676 }
23677 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
23678 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
23679 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
23680}
23681
23682// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
23683// recursively analyse all of it's users. and try to model themselves as
23684// bit sequence extractions. If all of them agree on the new, narrower element
23685// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
23686// new element type, do so now.
23687// This is mainly useful to recover from legalization that scalarized
23688// the vector as wide elements, but tries to rebuild it with narrower elements.
23689//
23690// Some more nodes could be modelled if that helps cover interesting patterns.
23691bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
23692 SDNode *N) {
23693 // We perform this optimization post type-legalization because
23694 // the type-legalizer often scalarizes integer-promoted vectors.
23695 // Performing this optimization before may cause legalizaton cycles.
23696 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23697 return false;
23698
23699 // TODO: Add support for big-endian.
23700 if (DAG.getDataLayout().isBigEndian())
23701 return false;
23702
23703 SDValue VecOp = N->getOperand(0);
23704 EVT VecVT = VecOp.getValueType();
23705 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
23706
23707 // We must start with a constant extraction index.
23708 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
23709 if (!IndexC)
23710 return false;
23711
23712 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
23713 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
23714
23715 // TODO: deal with the case of implicit anyext of the extraction.
23716 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23717 EVT ScalarVT = N->getValueType(0);
23718 if (VecVT.getScalarType() != ScalarVT)
23719 return false;
23720
23721 // TODO: deal with the cases other than everything being integer-typed.
23722 if (!ScalarVT.isScalarInteger())
23723 return false;
23724
23725 struct Entry {
23726 SDNode *Producer;
23727
23728 // Which bits of VecOp does it contain?
23729 unsigned BitPos;
23730 int NumBits;
23731 // NOTE: the actual width of \p Producer may be wider than NumBits!
23732
23733 Entry(Entry &&) = default;
23734 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
23735 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
23736
23737 Entry() = delete;
23738 Entry(const Entry &) = delete;
23739 Entry &operator=(const Entry &) = delete;
23740 Entry &operator=(Entry &&) = delete;
23741 };
23742 SmallVector<Entry, 32> Worklist;
23744
23745 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
23746 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
23747 /*NumBits=*/VecEltBitWidth);
23748
23749 while (!Worklist.empty()) {
23750 Entry E = Worklist.pop_back_val();
23751 // Does the node not even use any of the VecOp bits?
23752 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
23753 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
23754 return false; // Let's allow the other combines clean this up first.
23755 // Did we fail to model any of the users of the Producer?
23756 bool ProducerIsLeaf = false;
23757 // Look at each user of this Producer.
23758 for (SDNode *User : E.Producer->users()) {
23759 switch (User->getOpcode()) {
23760 // TODO: support ISD::BITCAST
23761 // TODO: support ISD::ANY_EXTEND
23762 // TODO: support ISD::ZERO_EXTEND
23763 // TODO: support ISD::SIGN_EXTEND
23764 case ISD::TRUNCATE:
23765 // Truncation simply means we keep position, but extract less bits.
23766 Worklist.emplace_back(User, E.BitPos,
23767 /*NumBits=*/User->getValueSizeInBits(0));
23768 break;
23769 // TODO: support ISD::SRA
23770 // TODO: support ISD::SHL
23771 case ISD::SRL:
23772 // We should be shifting the Producer by a constant amount.
23773 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
23774 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
23775 // Logical right-shift means that we start extraction later,
23776 // but stop it at the same position we did previously.
23777 unsigned ShAmt = ShAmtC->getZExtValue();
23778 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
23779 break;
23780 }
23781 [[fallthrough]];
23782 default:
23783 // We can not model this user of the Producer.
23784 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
23785 ProducerIsLeaf = true;
23786 // Profitability check: all users that we can not model
23787 // must be ISD::BUILD_VECTOR's.
23788 if (User->getOpcode() != ISD::BUILD_VECTOR)
23789 return false;
23790 break;
23791 }
23792 }
23793 if (ProducerIsLeaf)
23794 Leafs.emplace_back(std::move(E));
23795 }
23796
23797 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
23798
23799 // If we are still at the same element granularity, give up,
23800 if (NewVecEltBitWidth == VecEltBitWidth)
23801 return false;
23802
23803 // The vector width must be a multiple of the new element width.
23804 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
23805 return false;
23806
23807 // All leafs must agree on the new element width.
23808 // All leafs must not expect any "padding" bits ontop of that width.
23809 // All leafs must start extraction from multiple of that width.
23810 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
23811 return (unsigned)E.NumBits == NewVecEltBitWidth &&
23812 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
23813 E.BitPos % NewVecEltBitWidth == 0;
23814 }))
23815 return false;
23816
23817 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
23818 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
23819 VecVT.getSizeInBits() / NewVecEltBitWidth);
23820
23821 if (LegalTypes &&
23822 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
23823 return false;
23824
23825 if (LegalOperations &&
23826 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
23828 return false;
23829
23830 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
23831 for (const Entry &E : Leafs) {
23832 SDLoc DL(E.Producer);
23833 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
23834 assert(NewIndex < NewVecVT.getVectorNumElements() &&
23835 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
23836 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
23837 DAG.getVectorIdxConstant(NewIndex, DL));
23838 CombineTo(E.Producer, V);
23839 }
23840
23841 return true;
23842}
23843
23844SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
23845 SDValue VecOp = N->getOperand(0);
23846 SDValue Index = N->getOperand(1);
23847 EVT ScalarVT = N->getValueType(0);
23848 EVT VecVT = VecOp.getValueType();
23849 if (VecOp.isUndef())
23850 return DAG.getUNDEF(ScalarVT);
23851
23852 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
23853 //
23854 // This only really matters if the index is non-constant since other combines
23855 // on the constant elements already work.
23856 SDLoc DL(N);
23857 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
23858 Index == VecOp.getOperand(2)) {
23859 SDValue Elt = VecOp.getOperand(1);
23860 AddUsersToWorklist(VecOp.getNode());
23861 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
23862 }
23863
23864 // (vextract (scalar_to_vector val, 0) -> val
23865 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23866 // Only 0'th element of SCALAR_TO_VECTOR is defined.
23867 if (DAG.isKnownNeverZero(Index))
23868 return DAG.getUNDEF(ScalarVT);
23869
23870 // Check if the result type doesn't match the inserted element type.
23871 // The inserted element and extracted element may have mismatched bitwidth.
23872 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
23873 SDValue InOp = VecOp.getOperand(0);
23874 if (InOp.getValueType() != ScalarVT) {
23875 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
23876 if (InOp.getValueType().bitsGT(ScalarVT))
23877 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
23878 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
23879 }
23880 return InOp;
23881 }
23882
23883 // extract_vector_elt of out-of-bounds element -> UNDEF
23884 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23885 if (IndexC && VecVT.isFixedLengthVector() &&
23886 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
23887 return DAG.getUNDEF(ScalarVT);
23888
23889 // extract_vector_elt (build_vector x, y), 1 -> y
23890 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
23891 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
23892 TLI.isTypeLegal(VecVT)) {
23893 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
23894 VecVT.isFixedLengthVector()) &&
23895 "BUILD_VECTOR used for scalable vectors");
23896 unsigned IndexVal =
23897 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
23898 SDValue Elt = VecOp.getOperand(IndexVal);
23899 EVT InEltVT = Elt.getValueType();
23900
23901 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
23902 isNullConstant(Elt)) {
23903 // Sometimes build_vector's scalar input types do not match result type.
23904 if (ScalarVT == InEltVT)
23905 return Elt;
23906
23907 // TODO: It may be useful to truncate if free if the build_vector
23908 // implicitly converts.
23909 }
23910 }
23911
23912 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
23913 return BO;
23914
23915 if (VecVT.isScalableVector())
23916 return SDValue();
23917
23918 // All the code from this point onwards assumes fixed width vectors, but it's
23919 // possible that some of the combinations could be made to work for scalable
23920 // vectors too.
23921 unsigned NumElts = VecVT.getVectorNumElements();
23922 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23923
23924 // See if the extracted element is constant, in which case fold it if its
23925 // a legal fp immediate.
23926 if (IndexC && ScalarVT.isFloatingPoint()) {
23927 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
23928 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
23929 if (KnownElt.isConstant()) {
23930 APFloat CstFP =
23931 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
23932 if (TLI.isFPImmLegal(CstFP, ScalarVT))
23933 return DAG.getConstantFP(CstFP, DL, ScalarVT);
23934 }
23935 }
23936
23937 // TODO: These transforms should not require the 'hasOneUse' restriction, but
23938 // there are regressions on multiple targets without it. We can end up with a
23939 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
23940 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
23941 VecOp.hasOneUse()) {
23942 // The vector index of the LSBs of the source depend on the endian-ness.
23943 bool IsLE = DAG.getDataLayout().isLittleEndian();
23944 unsigned ExtractIndex = IndexC->getZExtValue();
23945 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
23946 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
23947 SDValue BCSrc = VecOp.getOperand(0);
23948 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
23949 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
23950
23951 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
23952 if (LegalTypes && BCSrc.getValueType().isInteger() &&
23953 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23954 BCSrc.getScalarValueSizeInBits() ==
23956 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
23957 // trunc i64 X to i32
23958 SDValue X = BCSrc.getOperand(0);
23959 EVT XVT = X.getValueType();
23960 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
23961 "Extract element and scalar to vector can't change element type "
23962 "from FP to integer.");
23963 unsigned XBitWidth = X.getValueSizeInBits();
23964 unsigned Scale = XBitWidth / VecEltBitWidth;
23965 BCTruncElt = IsLE ? 0 : Scale - 1;
23966
23967 // An extract element return value type can be wider than its vector
23968 // operand element type. In that case, the high bits are undefined, so
23969 // it's possible that we may need to extend rather than truncate.
23970 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
23971 assert(XBitWidth % VecEltBitWidth == 0 &&
23972 "Scalar bitwidth must be a multiple of vector element bitwidth");
23973
23974 if (ExtractIndex != BCTruncElt) {
23975 unsigned ShiftIndex =
23976 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23977 X = DAG.getNode(
23978 ISD::SRL, DL, XVT, X,
23979 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23980 }
23981
23982 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
23983 }
23984 }
23985 }
23986
23987 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
23988 // We only perform this optimization before the op legalization phase because
23989 // we may introduce new vector instructions which are not backed by TD
23990 // patterns. For example on AVX, extracting elements from a wide vector
23991 // without using extract_subvector. However, if we can find an underlying
23992 // scalar value, then we can always use that.
23993 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
23994 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
23995 // Find the new index to extract from.
23996 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
23997
23998 // Extracting an undef index is undef.
23999 if (OrigElt == -1)
24000 return DAG.getUNDEF(ScalarVT);
24001
24002 // Select the right vector half to extract from.
24003 SDValue SVInVec;
24004 if (OrigElt < (int)NumElts) {
24005 SVInVec = VecOp.getOperand(0);
24006 } else {
24007 SVInVec = VecOp.getOperand(1);
24008 OrigElt -= NumElts;
24009 }
24010
24011 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
24012 // TODO: Check if shuffle mask is legal?
24013 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
24014 !VecOp.hasOneUse())
24015 return SDValue();
24016
24017 SDValue InOp = SVInVec.getOperand(OrigElt);
24018 if (InOp.getValueType() != ScalarVT) {
24019 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
24020 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
24021 }
24022
24023 return InOp;
24024 }
24025
24026 // FIXME: We should handle recursing on other vector shuffles and
24027 // scalar_to_vector here as well.
24028
24029 if (!LegalOperations ||
24030 // FIXME: Should really be just isOperationLegalOrCustom.
24033 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
24034 DAG.getVectorIdxConstant(OrigElt, DL));
24035 }
24036 }
24037
24038 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
24039 // simplify it based on the (valid) extraction indices.
24040 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
24041 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24042 Use->getOperand(0) == VecOp &&
24043 isa<ConstantSDNode>(Use->getOperand(1));
24044 })) {
24045 APInt DemandedElts = APInt::getZero(NumElts);
24046 for (SDNode *User : VecOp->users()) {
24047 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
24048 if (CstElt->getAPIntValue().ult(NumElts))
24049 DemandedElts.setBit(CstElt->getZExtValue());
24050 }
24051 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
24052 // We simplified the vector operand of this extract element. If this
24053 // extract is not dead, visit it again so it is folded properly.
24054 if (N->getOpcode() != ISD::DELETED_NODE)
24055 AddToWorklist(N);
24056 return SDValue(N, 0);
24057 }
24058 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
24059 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
24060 // We simplified the vector operand of this extract element. If this
24061 // extract is not dead, visit it again so it is folded properly.
24062 if (N->getOpcode() != ISD::DELETED_NODE)
24063 AddToWorklist(N);
24064 return SDValue(N, 0);
24065 }
24066 }
24067
24068 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
24069 return SDValue(N, 0);
24070
24071 // Everything under here is trying to match an extract of a loaded value.
24072 // If the result of load has to be truncated, then it's not necessarily
24073 // profitable.
24074 bool BCNumEltsChanged = false;
24075 EVT ExtVT = VecVT.getVectorElementType();
24076 EVT LVT = ExtVT;
24077 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
24078 return SDValue();
24079
24080 if (VecOp.getOpcode() == ISD::BITCAST) {
24081 // Don't duplicate a load with other uses.
24082 if (!VecOp.hasOneUse())
24083 return SDValue();
24084
24085 EVT BCVT = VecOp.getOperand(0).getValueType();
24086 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
24087 return SDValue();
24088 if (NumElts != BCVT.getVectorNumElements())
24089 BCNumEltsChanged = true;
24090 VecOp = VecOp.getOperand(0);
24091 ExtVT = BCVT.getVectorElementType();
24092 }
24093
24094 // extract (vector load $addr), i --> load $addr + i * size
24095 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
24096 ISD::isNormalLoad(VecOp.getNode()) &&
24097 !Index->hasPredecessor(VecOp.getNode())) {
24098 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
24099 if (VecLoad && VecLoad->isSimple()) {
24100 if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
24101 ScalarVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
24102 ++OpsNarrowed;
24103 return Scalarized;
24104 }
24105 }
24106 }
24107
24108 // Perform only after legalization to ensure build_vector / vector_shuffle
24109 // optimizations have already been done.
24110 if (!LegalOperations || !IndexC)
24111 return SDValue();
24112
24113 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
24114 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
24115 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
24116 int Elt = IndexC->getZExtValue();
24117 LoadSDNode *LN0 = nullptr;
24118 if (ISD::isNormalLoad(VecOp.getNode())) {
24119 LN0 = cast<LoadSDNode>(VecOp);
24120 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24121 VecOp.getOperand(0).getValueType() == ExtVT &&
24122 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
24123 // Don't duplicate a load with other uses.
24124 if (!VecOp.hasOneUse())
24125 return SDValue();
24126
24127 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
24128 }
24129 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
24130 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
24131 // =>
24132 // (load $addr+1*size)
24133
24134 // Don't duplicate a load with other uses.
24135 if (!VecOp.hasOneUse())
24136 return SDValue();
24137
24138 // If the bit convert changed the number of elements, it is unsafe
24139 // to examine the mask.
24140 if (BCNumEltsChanged)
24141 return SDValue();
24142
24143 // Select the input vector, guarding against out of range extract vector.
24144 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
24145 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
24146
24147 if (VecOp.getOpcode() == ISD::BITCAST) {
24148 // Don't duplicate a load with other uses.
24149 if (!VecOp.hasOneUse())
24150 return SDValue();
24151
24152 VecOp = VecOp.getOperand(0);
24153 }
24154 if (ISD::isNormalLoad(VecOp.getNode())) {
24155 LN0 = cast<LoadSDNode>(VecOp);
24156 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
24157 Index = DAG.getConstant(Elt, DL, Index.getValueType());
24158 }
24159 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
24160 VecVT.getVectorElementType() == ScalarVT &&
24161 (!LegalTypes ||
24162 TLI.isTypeLegal(
24164 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
24165 // -> extract_vector_elt a, 0
24166 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
24167 // -> extract_vector_elt a, 1
24168 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
24169 // -> extract_vector_elt b, 0
24170 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
24171 // -> extract_vector_elt b, 1
24172 EVT ConcatVT = VecOp.getOperand(0).getValueType();
24173 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
24174 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
24175 Index.getValueType());
24176
24177 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
24179 ConcatVT.getVectorElementType(),
24180 ConcatOp, NewIdx);
24181 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
24182 }
24183
24184 // Make sure we found a non-volatile load and the extractelement is
24185 // the only use.
24186 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
24187 return SDValue();
24188
24189 // If Idx was -1 above, Elt is going to be -1, so just return undef.
24190 if (Elt == -1)
24191 return DAG.getUNDEF(LVT);
24192
24193 if (SDValue Scalarized =
24194 TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
24195 ++OpsNarrowed;
24196 return Scalarized;
24197 }
24198
24199 return SDValue();
24200}
24201
24202// Simplify (build_vec (ext )) to (bitcast (build_vec ))
24203SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
24204 // We perform this optimization post type-legalization because
24205 // the type-legalizer often scalarizes integer-promoted vectors.
24206 // Performing this optimization before may create bit-casts which
24207 // will be type-legalized to complex code sequences.
24208 // We perform this optimization only before the operation legalizer because we
24209 // may introduce illegal operations.
24210 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24211 return SDValue();
24212
24213 unsigned NumInScalars = N->getNumOperands();
24214 SDLoc DL(N);
24215 EVT VT = N->getValueType(0);
24216
24217 // Check to see if this is a BUILD_VECTOR of a bunch of values
24218 // which come from any_extend or zero_extend nodes. If so, we can create
24219 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
24220 // optimizations. We do not handle sign-extend because we can't fill the sign
24221 // using shuffles.
24222 EVT SourceType = MVT::Other;
24223 bool AllAnyExt = true;
24224
24225 for (unsigned i = 0; i != NumInScalars; ++i) {
24226 SDValue In = N->getOperand(i);
24227 // Ignore undef inputs.
24228 if (In.isUndef()) continue;
24229
24230 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
24231 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
24232
24233 // Abort if the element is not an extension.
24234 if (!ZeroExt && !AnyExt) {
24235 SourceType = MVT::Other;
24236 break;
24237 }
24238
24239 // The input is a ZeroExt or AnyExt. Check the original type.
24240 EVT InTy = In.getOperand(0).getValueType();
24241
24242 // Check that all of the widened source types are the same.
24243 if (SourceType == MVT::Other)
24244 // First time.
24245 SourceType = InTy;
24246 else if (InTy != SourceType) {
24247 // Multiple income types. Abort.
24248 SourceType = MVT::Other;
24249 break;
24250 }
24251
24252 // Check if all of the extends are ANY_EXTENDs.
24253 AllAnyExt &= AnyExt;
24254 }
24255
24256 // In order to have valid types, all of the inputs must be extended from the
24257 // same source type and all of the inputs must be any or zero extend.
24258 // Scalar sizes must be a power of two.
24259 EVT OutScalarTy = VT.getScalarType();
24260 bool ValidTypes =
24261 SourceType != MVT::Other &&
24264
24265 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
24266 // turn into a single shuffle instruction.
24267 if (!ValidTypes)
24268 return SDValue();
24269
24270 // If we already have a splat buildvector, then don't fold it if it means
24271 // introducing zeros.
24272 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
24273 return SDValue();
24274
24275 bool isLE = DAG.getDataLayout().isLittleEndian();
24276 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
24277 assert(ElemRatio > 1 && "Invalid element size ratio");
24278 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
24279 DAG.getConstant(0, DL, SourceType);
24280
24281 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
24282 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
24283
24284 // Populate the new build_vector
24285 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24286 SDValue Cast = N->getOperand(i);
24287 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
24288 Cast.getOpcode() == ISD::ZERO_EXTEND ||
24289 Cast.isUndef()) && "Invalid cast opcode");
24290 SDValue In;
24291 if (Cast.isUndef())
24292 In = DAG.getUNDEF(SourceType);
24293 else
24294 In = Cast->getOperand(0);
24295 unsigned Index = isLE ? (i * ElemRatio) :
24296 (i * ElemRatio + (ElemRatio - 1));
24297
24298 assert(Index < Ops.size() && "Invalid index");
24299 Ops[Index] = In;
24300 }
24301
24302 // The type of the new BUILD_VECTOR node.
24303 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
24304 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
24305 "Invalid vector size");
24306 // Check if the new vector type is legal.
24307 if (!isTypeLegal(VecVT) ||
24308 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
24310 return SDValue();
24311
24312 // Make the new BUILD_VECTOR.
24313 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
24314
24315 // The new BUILD_VECTOR node has the potential to be further optimized.
24316 AddToWorklist(BV.getNode());
24317 // Bitcast to the desired type.
24318 return DAG.getBitcast(VT, BV);
24319}
24320
24321// Simplify (build_vec (trunc $1)
24322// (trunc (srl $1 half-width))
24323// (trunc (srl $1 (2 * half-width))))
24324// to (bitcast $1)
24325SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
24326 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24327
24328 EVT VT = N->getValueType(0);
24329
24330 // Don't run this before LegalizeTypes if VT is legal.
24331 // Targets may have other preferences.
24332 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
24333 return SDValue();
24334
24335 // Only for little endian
24336 if (!DAG.getDataLayout().isLittleEndian())
24337 return SDValue();
24338
24339 EVT OutScalarTy = VT.getScalarType();
24340 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
24341
24342 // Only for power of two types to be sure that bitcast works well
24343 if (!isPowerOf2_64(ScalarTypeBitsize))
24344 return SDValue();
24345
24346 unsigned NumInScalars = N->getNumOperands();
24347
24348 // Look through bitcasts
24349 auto PeekThroughBitcast = [](SDValue Op) {
24350 if (Op.getOpcode() == ISD::BITCAST)
24351 return Op.getOperand(0);
24352 return Op;
24353 };
24354
24355 // The source value where all the parts are extracted.
24356 SDValue Src;
24357 for (unsigned i = 0; i != NumInScalars; ++i) {
24358 SDValue In = PeekThroughBitcast(N->getOperand(i));
24359 // Ignore undef inputs.
24360 if (In.isUndef()) continue;
24361
24362 if (In.getOpcode() != ISD::TRUNCATE)
24363 return SDValue();
24364
24365 In = PeekThroughBitcast(In.getOperand(0));
24366
24367 if (In.getOpcode() != ISD::SRL) {
24368 // For now only build_vec without shuffling, handle shifts here in the
24369 // future.
24370 if (i != 0)
24371 return SDValue();
24372
24373 Src = In;
24374 } else {
24375 // In is SRL
24376 SDValue part = PeekThroughBitcast(In.getOperand(0));
24377
24378 if (!Src) {
24379 Src = part;
24380 } else if (Src != part) {
24381 // Vector parts do not stem from the same variable
24382 return SDValue();
24383 }
24384
24385 SDValue ShiftAmtVal = In.getOperand(1);
24386 if (!isa<ConstantSDNode>(ShiftAmtVal))
24387 return SDValue();
24388
24389 uint64_t ShiftAmt = In.getConstantOperandVal(1);
24390
24391 // The extracted value is not extracted at the right position
24392 if (ShiftAmt != i * ScalarTypeBitsize)
24393 return SDValue();
24394 }
24395 }
24396
24397 // Only cast if the size is the same
24398 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
24399 return SDValue();
24400
24401 return DAG.getBitcast(VT, Src);
24402}
24403
24404SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
24405 ArrayRef<int> VectorMask,
24406 SDValue VecIn1, SDValue VecIn2,
24407 unsigned LeftIdx, bool DidSplitVec) {
24408 EVT VT = N->getValueType(0);
24409 EVT InVT1 = VecIn1.getValueType();
24410 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
24411
24412 unsigned NumElems = VT.getVectorNumElements();
24413 unsigned ShuffleNumElems = NumElems;
24414
24415 // If we artificially split a vector in two already, then the offsets in the
24416 // operands will all be based off of VecIn1, even those in VecIn2.
24417 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
24418
24419 uint64_t VTSize = VT.getFixedSizeInBits();
24420 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
24421 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
24422
24423 assert(InVT2Size <= InVT1Size &&
24424 "Inputs must be sorted to be in non-increasing vector size order.");
24425
24426 // We can't generate a shuffle node with mismatched input and output types.
24427 // Try to make the types match the type of the output.
24428 if (InVT1 != VT || InVT2 != VT) {
24429 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
24430 // If the output vector length is a multiple of both input lengths,
24431 // we can concatenate them and pad the rest with undefs.
24432 unsigned NumConcats = VTSize / InVT1Size;
24433 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
24434 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
24435 ConcatOps[0] = VecIn1;
24436 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
24437 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24438 VecIn2 = SDValue();
24439 } else if (InVT1Size == VTSize * 2) {
24440 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
24441 return SDValue();
24442
24443 if (!VecIn2.getNode()) {
24444 // If we only have one input vector, and it's twice the size of the
24445 // output, split it in two.
24446 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
24447 DAG.getVectorIdxConstant(NumElems, DL));
24448 VecIn1 = DAG.getExtractSubvector(DL, VT, VecIn1, 0);
24449 // Since we now have shorter input vectors, adjust the offset of the
24450 // second vector's start.
24451 Vec2Offset = NumElems;
24452 } else {
24453 assert(InVT2Size <= InVT1Size &&
24454 "Second input is not going to be larger than the first one.");
24455
24456 // VecIn1 is wider than the output, and we have another, possibly
24457 // smaller input. Pad the smaller input with undefs, shuffle at the
24458 // input vector width, and extract the output.
24459 // The shuffle type is different than VT, so check legality again.
24460 if (LegalOperations &&
24462 return SDValue();
24463
24464 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
24465 // lower it back into a BUILD_VECTOR. So if the inserted type is
24466 // illegal, don't even try.
24467 if (InVT1 != InVT2) {
24468 if (!TLI.isTypeLegal(InVT2))
24469 return SDValue();
24470 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24471 }
24472 ShuffleNumElems = NumElems * 2;
24473 }
24474 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
24475 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
24476 ConcatOps[0] = VecIn2;
24477 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24478 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
24479 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
24480 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
24481 return SDValue();
24482 // If dest vector has less than two elements, then use shuffle and extract
24483 // from larger regs will cost even more.
24484 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
24485 return SDValue();
24486 assert(InVT2Size <= InVT1Size &&
24487 "Second input is not going to be larger than the first one.");
24488
24489 // VecIn1 is wider than the output, and we have another, possibly
24490 // smaller input. Pad the smaller input with undefs, shuffle at the
24491 // input vector width, and extract the output.
24492 // The shuffle type is different than VT, so check legality again.
24493 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
24494 return SDValue();
24495
24496 if (InVT1 != InVT2) {
24497 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24498 }
24499 ShuffleNumElems = InVT1Size / VTSize * NumElems;
24500 } else {
24501 // TODO: Support cases where the length mismatch isn't exactly by a
24502 // factor of 2.
24503 // TODO: Move this check upwards, so that if we have bad type
24504 // mismatches, we don't create any DAG nodes.
24505 return SDValue();
24506 }
24507 }
24508
24509 // Initialize mask to undef.
24510 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
24511
24512 // Only need to run up to the number of elements actually used, not the
24513 // total number of elements in the shuffle - if we are shuffling a wider
24514 // vector, the high lanes should be set to undef.
24515 for (unsigned i = 0; i != NumElems; ++i) {
24516 if (VectorMask[i] <= 0)
24517 continue;
24518
24519 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
24520 if (VectorMask[i] == (int)LeftIdx) {
24521 Mask[i] = ExtIndex;
24522 } else if (VectorMask[i] == (int)LeftIdx + 1) {
24523 Mask[i] = Vec2Offset + ExtIndex;
24524 }
24525 }
24526
24527 // The type the input vectors may have changed above.
24528 InVT1 = VecIn1.getValueType();
24529
24530 // If we already have a VecIn2, it should have the same type as VecIn1.
24531 // If we don't, get an undef/zero vector of the appropriate type.
24532 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
24533 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
24534
24535 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
24536 if (ShuffleNumElems > NumElems)
24537 Shuffle = DAG.getExtractSubvector(DL, VT, Shuffle, 0);
24538
24539 return Shuffle;
24540}
24541
24543 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24544
24545 // First, determine where the build vector is not undef.
24546 // TODO: We could extend this to handle zero elements as well as undefs.
24547 int NumBVOps = BV->getNumOperands();
24548 int ZextElt = -1;
24549 for (int i = 0; i != NumBVOps; ++i) {
24550 SDValue Op = BV->getOperand(i);
24551 if (Op.isUndef())
24552 continue;
24553 if (ZextElt == -1)
24554 ZextElt = i;
24555 else
24556 return SDValue();
24557 }
24558 // Bail out if there's no non-undef element.
24559 if (ZextElt == -1)
24560 return SDValue();
24561
24562 // The build vector contains some number of undef elements and exactly
24563 // one other element. That other element must be a zero-extended scalar
24564 // extracted from a vector at a constant index to turn this into a shuffle.
24565 // Also, require that the build vector does not implicitly truncate/extend
24566 // its elements.
24567 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
24568 EVT VT = BV->getValueType(0);
24569 SDValue Zext = BV->getOperand(ZextElt);
24570 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
24574 return SDValue();
24575
24576 // The zero-extend must be a multiple of the source size, and we must be
24577 // building a vector of the same size as the source of the extract element.
24578 SDValue Extract = Zext.getOperand(0);
24579 unsigned DestSize = Zext.getValueSizeInBits();
24580 unsigned SrcSize = Extract.getValueSizeInBits();
24581 if (DestSize % SrcSize != 0 ||
24582 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
24583 return SDValue();
24584
24585 // Create a shuffle mask that will combine the extracted element with zeros
24586 // and undefs.
24587 int ZextRatio = DestSize / SrcSize;
24588 int NumMaskElts = NumBVOps * ZextRatio;
24589 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
24590 for (int i = 0; i != NumMaskElts; ++i) {
24591 if (i / ZextRatio == ZextElt) {
24592 // The low bits of the (potentially translated) extracted element map to
24593 // the source vector. The high bits map to zero. We will use a zero vector
24594 // as the 2nd source operand of the shuffle, so use the 1st element of
24595 // that vector (mask value is number-of-elements) for the high bits.
24596 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
24597 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
24598 : NumMaskElts;
24599 }
24600
24601 // Undef elements of the build vector remain undef because we initialize
24602 // the shuffle mask with -1.
24603 }
24604
24605 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
24606 // bitcast (shuffle V, ZeroVec, VectorMask)
24607 SDLoc DL(BV);
24608 EVT VecVT = Extract.getOperand(0).getValueType();
24609 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
24610 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24611 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
24612 ZeroVec, ShufMask, DAG);
24613 if (!Shuf)
24614 return SDValue();
24615 return DAG.getBitcast(VT, Shuf);
24616}
24617
24618// FIXME: promote to STLExtras.
24619template <typename R, typename T>
24620static auto getFirstIndexOf(R &&Range, const T &Val) {
24621 auto I = find(Range, Val);
24622 if (I == Range.end())
24623 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
24624 return std::distance(Range.begin(), I);
24625}
24626
24627// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
24628// operations. If the types of the vectors we're extracting from allow it,
24629// turn this into a vector_shuffle node.
24630SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
24631 SDLoc DL(N);
24632 EVT VT = N->getValueType(0);
24633
24634 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
24635 if (!isTypeLegal(VT))
24636 return SDValue();
24637
24639 return V;
24640
24641 // May only combine to shuffle after legalize if shuffle is legal.
24642 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
24643 return SDValue();
24644
24645 bool UsesZeroVector = false;
24646 unsigned NumElems = N->getNumOperands();
24647
24648 // Record, for each element of the newly built vector, which input vector
24649 // that element comes from. -1 stands for undef, 0 for the zero vector,
24650 // and positive values for the input vectors.
24651 // VectorMask maps each element to its vector number, and VecIn maps vector
24652 // numbers to their initial SDValues.
24653
24654 SmallVector<int, 8> VectorMask(NumElems, -1);
24656 VecIn.push_back(SDValue());
24657
24658 // If we have a single extract_element with a constant index, track the index
24659 // value.
24660 unsigned OneConstExtractIndex = ~0u;
24661
24662 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
24663 unsigned NumExtracts = 0;
24664
24665 for (unsigned i = 0; i != NumElems; ++i) {
24666 SDValue Op = N->getOperand(i);
24667
24668 if (Op.isUndef())
24669 continue;
24670
24671 // See if we can use a blend with a zero vector.
24672 // TODO: Should we generalize this to a blend with an arbitrary constant
24673 // vector?
24675 UsesZeroVector = true;
24676 VectorMask[i] = 0;
24677 continue;
24678 }
24679
24680 // Not an undef or zero. If the input is something other than an
24681 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
24682 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
24683 return SDValue();
24684
24685 SDValue ExtractedFromVec = Op.getOperand(0);
24686 if (ExtractedFromVec.getValueType().isScalableVector())
24687 return SDValue();
24688 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
24689 if (!ExtractIdx)
24690 return SDValue();
24691
24692 if (ExtractIdx->getAsAPIntVal().uge(
24693 ExtractedFromVec.getValueType().getVectorNumElements()))
24694 return SDValue();
24695
24696 // All inputs must have the same element type as the output.
24697 if (VT.getVectorElementType() !=
24698 ExtractedFromVec.getValueType().getVectorElementType())
24699 return SDValue();
24700
24701 OneConstExtractIndex = ExtractIdx->getZExtValue();
24702 ++NumExtracts;
24703
24704 // Have we seen this input vector before?
24705 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
24706 // a map back from SDValues to numbers isn't worth it.
24707 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
24708 if (Idx == -1) { // A new source vector?
24709 Idx = VecIn.size();
24710 VecIn.push_back(ExtractedFromVec);
24711 }
24712
24713 VectorMask[i] = Idx;
24714 }
24715
24716 // If we didn't find at least one input vector, bail out.
24717 if (VecIn.size() < 2)
24718 return SDValue();
24719
24720 // If all the Operands of BUILD_VECTOR extract from same
24721 // vector, then split the vector efficiently based on the maximum
24722 // vector access index and adjust the VectorMask and
24723 // VecIn accordingly.
24724 bool DidSplitVec = false;
24725 if (VecIn.size() == 2) {
24726 // If we only found a single constant indexed extract_vector_elt feeding the
24727 // build_vector, do not produce a more complicated shuffle if the extract is
24728 // cheap with other constant/undef elements. Skip broadcast patterns with
24729 // multiple uses in the build_vector.
24730
24731 // TODO: This should be more aggressive about skipping the shuffle
24732 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
24733 // index.
24734 if (NumExtracts == 1 &&
24737 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
24738 return SDValue();
24739
24740 unsigned MaxIndex = 0;
24741 unsigned NearestPow2 = 0;
24742 SDValue Vec = VecIn.back();
24743 EVT InVT = Vec.getValueType();
24744 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
24745
24746 for (unsigned i = 0; i < NumElems; i++) {
24747 if (VectorMask[i] <= 0)
24748 continue;
24749 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
24750 IndexVec[i] = Index;
24751 MaxIndex = std::max(MaxIndex, Index);
24752 }
24753
24754 NearestPow2 = PowerOf2Ceil(MaxIndex);
24755 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
24756 NumElems * 2 < NearestPow2) {
24757 unsigned SplitSize = NearestPow2 / 2;
24758 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
24759 InVT.getVectorElementType(), SplitSize);
24760 if (TLI.isTypeLegal(SplitVT) &&
24761 SplitSize + SplitVT.getVectorNumElements() <=
24762 InVT.getVectorNumElements()) {
24763 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24764 DAG.getVectorIdxConstant(SplitSize, DL));
24765 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24766 DAG.getVectorIdxConstant(0, DL));
24767 VecIn.pop_back();
24768 VecIn.push_back(VecIn1);
24769 VecIn.push_back(VecIn2);
24770 DidSplitVec = true;
24771
24772 for (unsigned i = 0; i < NumElems; i++) {
24773 if (VectorMask[i] <= 0)
24774 continue;
24775 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
24776 }
24777 }
24778 }
24779 }
24780
24781 // Sort input vectors by decreasing vector element count,
24782 // while preserving the relative order of equally-sized vectors.
24783 // Note that we keep the first "implicit zero vector as-is.
24784 SmallVector<SDValue, 8> SortedVecIn(VecIn);
24785 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
24786 [](const SDValue &a, const SDValue &b) {
24787 return a.getValueType().getVectorNumElements() >
24788 b.getValueType().getVectorNumElements();
24789 });
24790
24791 // We now also need to rebuild the VectorMask, because it referenced element
24792 // order in VecIn, and we just sorted them.
24793 for (int &SourceVectorIndex : VectorMask) {
24794 if (SourceVectorIndex <= 0)
24795 continue;
24796 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
24797 assert(Idx > 0 && Idx < SortedVecIn.size() &&
24798 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
24799 SourceVectorIndex = Idx;
24800 }
24801
24802 VecIn = std::move(SortedVecIn);
24803
24804 // TODO: Should this fire if some of the input vectors has illegal type (like
24805 // it does now), or should we let legalization run its course first?
24806
24807 // Shuffle phase:
24808 // Take pairs of vectors, and shuffle them so that the result has elements
24809 // from these vectors in the correct places.
24810 // For example, given:
24811 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
24812 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
24813 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
24814 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
24815 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
24816 // We will generate:
24817 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
24818 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
24819 SmallVector<SDValue, 4> Shuffles;
24820 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
24821 unsigned LeftIdx = 2 * In + 1;
24822 SDValue VecLeft = VecIn[LeftIdx];
24823 SDValue VecRight =
24824 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
24825
24826 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
24827 VecRight, LeftIdx, DidSplitVec))
24828 Shuffles.push_back(Shuffle);
24829 else
24830 return SDValue();
24831 }
24832
24833 // If we need the zero vector as an "ingredient" in the blend tree, add it
24834 // to the list of shuffles.
24835 if (UsesZeroVector)
24836 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
24837 : DAG.getConstantFP(0.0, DL, VT));
24838
24839 // If we only have one shuffle, we're done.
24840 if (Shuffles.size() == 1)
24841 return Shuffles[0];
24842
24843 // Update the vector mask to point to the post-shuffle vectors.
24844 for (int &Vec : VectorMask)
24845 if (Vec == 0)
24846 Vec = Shuffles.size() - 1;
24847 else
24848 Vec = (Vec - 1) / 2;
24849
24850 // More than one shuffle. Generate a binary tree of blends, e.g. if from
24851 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
24852 // generate:
24853 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
24854 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
24855 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
24856 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
24857 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
24858 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
24859 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
24860
24861 // Make sure the initial size of the shuffle list is even.
24862 if (Shuffles.size() % 2)
24863 Shuffles.push_back(DAG.getUNDEF(VT));
24864
24865 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
24866 if (CurSize % 2) {
24867 Shuffles[CurSize] = DAG.getUNDEF(VT);
24868 CurSize++;
24869 }
24870 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
24871 int Left = 2 * In;
24872 int Right = 2 * In + 1;
24873 SmallVector<int, 8> Mask(NumElems, -1);
24874 SDValue L = Shuffles[Left];
24875 ArrayRef<int> LMask;
24876 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
24877 L.use_empty() && L.getOperand(1).isUndef() &&
24878 L.getOperand(0).getValueType() == L.getValueType();
24879 if (IsLeftShuffle) {
24880 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
24881 L = L.getOperand(0);
24882 }
24883 SDValue R = Shuffles[Right];
24884 ArrayRef<int> RMask;
24885 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
24886 R.use_empty() && R.getOperand(1).isUndef() &&
24887 R.getOperand(0).getValueType() == R.getValueType();
24888 if (IsRightShuffle) {
24889 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
24890 R = R.getOperand(0);
24891 }
24892 for (unsigned I = 0; I != NumElems; ++I) {
24893 if (VectorMask[I] == Left) {
24894 Mask[I] = I;
24895 if (IsLeftShuffle)
24896 Mask[I] = LMask[I];
24897 VectorMask[I] = In;
24898 } else if (VectorMask[I] == Right) {
24899 Mask[I] = I + NumElems;
24900 if (IsRightShuffle)
24901 Mask[I] = RMask[I] + NumElems;
24902 VectorMask[I] = In;
24903 }
24904 }
24905
24906 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
24907 }
24908 }
24909 return Shuffles[0];
24910}
24911
24912// Try to turn a build vector of zero extends of extract vector elts into a
24913// a vector zero extend and possibly an extract subvector.
24914// TODO: Support sign extend?
24915// TODO: Allow undef elements?
24916SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
24917 if (LegalOperations)
24918 return SDValue();
24919
24920 EVT VT = N->getValueType(0);
24921
24922 bool FoundZeroExtend = false;
24923 SDValue Op0 = N->getOperand(0);
24924 auto checkElem = [&](SDValue Op) -> int64_t {
24925 unsigned Opc = Op.getOpcode();
24926 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
24927 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
24928 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24929 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
24930 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
24931 return C->getZExtValue();
24932 return -1;
24933 };
24934
24935 // Make sure the first element matches
24936 // (zext (extract_vector_elt X, C))
24937 // Offset must be a constant multiple of the
24938 // known-minimum vector length of the result type.
24939 int64_t Offset = checkElem(Op0);
24940 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
24941 return SDValue();
24942
24943 unsigned NumElems = N->getNumOperands();
24944 SDValue In = Op0.getOperand(0).getOperand(0);
24945 EVT InSVT = In.getValueType().getScalarType();
24946 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
24947
24948 // Don't create an illegal input type after type legalization.
24949 if (LegalTypes && !TLI.isTypeLegal(InVT))
24950 return SDValue();
24951
24952 // Ensure all the elements come from the same vector and are adjacent.
24953 for (unsigned i = 1; i != NumElems; ++i) {
24954 if ((Offset + i) != checkElem(N->getOperand(i)))
24955 return SDValue();
24956 }
24957
24958 SDLoc DL(N);
24959 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
24960 Op0.getOperand(0).getOperand(1));
24961 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
24962 VT, In);
24963}
24964
24965// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
24966// and all other elements being constant zero's, granularize the BUILD_VECTOR's
24967// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
24968// This patten can appear during legalization.
24969//
24970// NOTE: This can be generalized to allow more than a single
24971// non-constant-zero op, UNDEF's, and to be KnownBits-based,
24972SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
24973 // Don't run this after legalization. Targets may have other preferences.
24974 if (Level >= AfterLegalizeDAG)
24975 return SDValue();
24976
24977 // FIXME: support big-endian.
24978 if (DAG.getDataLayout().isBigEndian())
24979 return SDValue();
24980
24981 EVT VT = N->getValueType(0);
24982 EVT OpVT = N->getOperand(0).getValueType();
24983 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
24984
24985 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
24986
24987 if (!TLI.isTypeLegal(OpIntVT) ||
24988 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
24989 return SDValue();
24990
24991 unsigned EltBitwidth = VT.getScalarSizeInBits();
24992 // NOTE: the actual width of operands may be wider than that!
24993
24994 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
24995 // active bits they all have? We'll want to truncate them all to that width.
24996 unsigned ActiveBits = 0;
24997 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
24998 for (auto I : enumerate(N->ops())) {
24999 SDValue Op = I.value();
25000 // FIXME: support UNDEF elements?
25001 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
25002 unsigned OpActiveBits =
25003 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
25004 if (OpActiveBits == 0) {
25005 KnownZeroOps.setBit(I.index());
25006 continue;
25007 }
25008 // Profitability check: don't allow non-zero constant operands.
25009 return SDValue();
25010 }
25011 // Profitability check: there must only be a single non-zero operand,
25012 // and it must be the first operand of the BUILD_VECTOR.
25013 if (I.index() != 0)
25014 return SDValue();
25015 // The operand must be a zero-extension itself.
25016 // FIXME: this could be generalized to known leading zeros check.
25017 if (Op.getOpcode() != ISD::ZERO_EXTEND)
25018 return SDValue();
25019 unsigned CurrActiveBits =
25020 Op.getOperand(0).getValueSizeInBits().getFixedValue();
25021 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
25022 ActiveBits = CurrActiveBits;
25023 // We want to at least halve the element size.
25024 if (2 * ActiveBits > EltBitwidth)
25025 return SDValue();
25026 }
25027
25028 // This BUILD_VECTOR must have at least one non-constant-zero operand.
25029 if (ActiveBits == 0)
25030 return SDValue();
25031
25032 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
25033 // into how many chunks can we split our element width?
25034 EVT NewScalarIntVT, NewIntVT;
25035 std::optional<unsigned> Factor;
25036 // We can split the element into at least two chunks, but not into more
25037 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
25038 // for which the element width is a multiple of it,
25039 // and the resulting types/operations on that chunk width are legal.
25040 assert(2 * ActiveBits <= EltBitwidth &&
25041 "We know that half or less bits of the element are active.");
25042 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
25043 if (EltBitwidth % Scale != 0)
25044 continue;
25045 unsigned ChunkBitwidth = EltBitwidth / Scale;
25046 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
25047 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
25048 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
25049 Scale * N->getNumOperands());
25050 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
25051 (LegalOperations &&
25052 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
25054 continue;
25055 Factor = Scale;
25056 break;
25057 }
25058 if (!Factor)
25059 return SDValue();
25060
25061 SDLoc DL(N);
25062 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
25063
25064 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
25066 NewOps.reserve(NewIntVT.getVectorNumElements());
25067 for (auto I : enumerate(N->ops())) {
25068 SDValue Op = I.value();
25069 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
25070 unsigned SrcOpIdx = I.index();
25071 if (KnownZeroOps[SrcOpIdx]) {
25072 NewOps.append(*Factor, ZeroOp);
25073 continue;
25074 }
25075 Op = DAG.getBitcast(OpIntVT, Op);
25076 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
25077 NewOps.emplace_back(Op);
25078 NewOps.append(*Factor - 1, ZeroOp);
25079 }
25080 assert(NewOps.size() == NewIntVT.getVectorNumElements());
25081 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
25082 NewBV = DAG.getBitcast(VT, NewBV);
25083 return NewBV;
25084}
25085
25086SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
25087 EVT VT = N->getValueType(0);
25088
25089 // A vector built entirely of undefs is undef.
25091 return DAG.getUNDEF(VT);
25092
25093 // If this is a splat of a bitcast from another vector, change to a
25094 // concat_vector.
25095 // For example:
25096 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
25097 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
25098 //
25099 // If X is a build_vector itself, the concat can become a larger build_vector.
25100 // TODO: Maybe this is useful for non-splat too?
25101 if (!LegalOperations) {
25102 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
25103 // Only change build_vector to a concat_vector if the splat value type is
25104 // same as the vector element type.
25105 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
25107 EVT SrcVT = Splat.getValueType();
25108 if (SrcVT.isVector()) {
25109 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
25110 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
25111 SrcVT.getVectorElementType(), NumElts);
25112 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
25113 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
25114 SDValue Concat =
25115 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
25116 return DAG.getBitcast(VT, Concat);
25117 }
25118 }
25119 }
25120 }
25121
25122 // Check if we can express BUILD VECTOR via subvector extract.
25123 if (!LegalTypes && (N->getNumOperands() > 1)) {
25124 SDValue Op0 = N->getOperand(0);
25125 auto checkElem = [&](SDValue Op) -> uint64_t {
25126 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
25127 (Op0.getOperand(0) == Op.getOperand(0)))
25128 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
25129 return CNode->getZExtValue();
25130 return -1;
25131 };
25132
25133 int Offset = checkElem(Op0);
25134 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
25135 if (Offset + i != checkElem(N->getOperand(i))) {
25136 Offset = -1;
25137 break;
25138 }
25139 }
25140
25141 if ((Offset == 0) &&
25142 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
25143 return Op0.getOperand(0);
25144 if ((Offset != -1) &&
25145 ((Offset % N->getValueType(0).getVectorNumElements()) ==
25146 0)) // IDX must be multiple of output size.
25147 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
25148 Op0.getOperand(0), Op0.getOperand(1));
25149 }
25150
25151 if (SDValue V = convertBuildVecZextToZext(N))
25152 return V;
25153
25154 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
25155 return V;
25156
25157 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
25158 return V;
25159
25160 if (SDValue V = reduceBuildVecTruncToBitCast(N))
25161 return V;
25162
25163 if (SDValue V = reduceBuildVecToShuffle(N))
25164 return V;
25165
25166 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
25167 // Do this late as some of the above may replace the splat.
25170 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
25171 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
25172 }
25173
25174 return SDValue();
25175}
25176
25178 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25179 EVT OpVT = N->getOperand(0).getValueType();
25180
25181 // If the operands are legal vectors, leave them alone.
25182 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
25183 return SDValue();
25184
25185 SDLoc DL(N);
25186 EVT VT = N->getValueType(0);
25188 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25189
25190 // Keep track of what we encounter.
25191 EVT AnyFPVT;
25192
25193 for (const SDValue &Op : N->ops()) {
25194 if (ISD::BITCAST == Op.getOpcode() &&
25195 !Op.getOperand(0).getValueType().isVector())
25196 Ops.push_back(Op.getOperand(0));
25197 else if (Op.isUndef())
25198 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
25199 else
25200 return SDValue();
25201
25202 // Note whether we encounter an integer or floating point scalar.
25203 // If it's neither, bail out, it could be something weird like x86mmx.
25204 EVT LastOpVT = Ops.back().getValueType();
25205 if (LastOpVT.isFloatingPoint())
25206 AnyFPVT = LastOpVT;
25207 else if (!LastOpVT.isInteger())
25208 return SDValue();
25209 }
25210
25211 // If any of the operands is a floating point scalar bitcast to a vector,
25212 // use floating point types throughout, and bitcast everything.
25213 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
25214 if (AnyFPVT != EVT()) {
25215 SVT = AnyFPVT;
25216 for (SDValue &Op : Ops) {
25217 if (Op.getValueType() == SVT)
25218 continue;
25219 if (Op.isUndef())
25220 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
25221 else
25222 Op = DAG.getBitcast(SVT, Op);
25223 }
25224 }
25225
25226 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
25227 VT.getSizeInBits() / SVT.getSizeInBits());
25228 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
25229}
25230
25231// Attempt to merge nested concat_vectors/undefs.
25232// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
25233// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
25235 SelectionDAG &DAG) {
25236 EVT VT = N->getValueType(0);
25237
25238 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
25239 EVT SubVT;
25240 SDValue FirstConcat;
25241 for (const SDValue &Op : N->ops()) {
25242 if (Op.isUndef())
25243 continue;
25244 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
25245 return SDValue();
25246 if (!FirstConcat) {
25247 SubVT = Op.getOperand(0).getValueType();
25248 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
25249 return SDValue();
25250 FirstConcat = Op;
25251 continue;
25252 }
25253 if (SubVT != Op.getOperand(0).getValueType())
25254 return SDValue();
25255 }
25256 assert(FirstConcat && "Concat of all-undefs found");
25257
25258 SmallVector<SDValue> ConcatOps;
25259 for (const SDValue &Op : N->ops()) {
25260 if (Op.isUndef()) {
25261 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
25262 continue;
25263 }
25264 ConcatOps.append(Op->op_begin(), Op->op_end());
25265 }
25266 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
25267}
25268
25269// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
25270// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
25271// most two distinct vectors the same size as the result, attempt to turn this
25272// into a legal shuffle.
25274 EVT VT = N->getValueType(0);
25275 EVT OpVT = N->getOperand(0).getValueType();
25276
25277 // We currently can't generate an appropriate shuffle for a scalable vector.
25278 if (VT.isScalableVector())
25279 return SDValue();
25280
25281 int NumElts = VT.getVectorNumElements();
25282 int NumOpElts = OpVT.getVectorNumElements();
25283
25284 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
25286
25287 for (SDValue Op : N->ops()) {
25289
25290 // UNDEF nodes convert to UNDEF shuffle mask values.
25291 if (Op.isUndef()) {
25292 Mask.append((unsigned)NumOpElts, -1);
25293 continue;
25294 }
25295
25296 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25297 return SDValue();
25298
25299 // What vector are we extracting the subvector from and at what index?
25300 SDValue ExtVec = Op.getOperand(0);
25301 int ExtIdx = Op.getConstantOperandVal(1);
25302
25303 // We want the EVT of the original extraction to correctly scale the
25304 // extraction index.
25305 EVT ExtVT = ExtVec.getValueType();
25306 ExtVec = peekThroughBitcasts(ExtVec);
25307
25308 // UNDEF nodes convert to UNDEF shuffle mask values.
25309 if (ExtVec.isUndef()) {
25310 Mask.append((unsigned)NumOpElts, -1);
25311 continue;
25312 }
25313
25314 // Ensure that we are extracting a subvector from a vector the same
25315 // size as the result.
25316 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
25317 return SDValue();
25318
25319 // Scale the subvector index to account for any bitcast.
25320 int NumExtElts = ExtVT.getVectorNumElements();
25321 if (0 == (NumExtElts % NumElts))
25322 ExtIdx /= (NumExtElts / NumElts);
25323 else if (0 == (NumElts % NumExtElts))
25324 ExtIdx *= (NumElts / NumExtElts);
25325 else
25326 return SDValue();
25327
25328 // At most we can reference 2 inputs in the final shuffle.
25329 if (SV0.isUndef() || SV0 == ExtVec) {
25330 SV0 = ExtVec;
25331 for (int i = 0; i != NumOpElts; ++i)
25332 Mask.push_back(i + ExtIdx);
25333 } else if (SV1.isUndef() || SV1 == ExtVec) {
25334 SV1 = ExtVec;
25335 for (int i = 0; i != NumOpElts; ++i)
25336 Mask.push_back(i + ExtIdx + NumElts);
25337 } else {
25338 return SDValue();
25339 }
25340 }
25341
25342 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25343 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
25344 DAG.getBitcast(VT, SV1), Mask, DAG);
25345}
25346
25348 unsigned CastOpcode = N->getOperand(0).getOpcode();
25349 switch (CastOpcode) {
25350 case ISD::SINT_TO_FP:
25351 case ISD::UINT_TO_FP:
25352 case ISD::FP_TO_SINT:
25353 case ISD::FP_TO_UINT:
25354 // TODO: Allow more opcodes?
25355 // case ISD::BITCAST:
25356 // case ISD::TRUNCATE:
25357 // case ISD::ZERO_EXTEND:
25358 // case ISD::SIGN_EXTEND:
25359 // case ISD::FP_EXTEND:
25360 break;
25361 default:
25362 return SDValue();
25363 }
25364
25365 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
25366 if (!SrcVT.isVector())
25367 return SDValue();
25368
25369 // All operands of the concat must be the same kind of cast from the same
25370 // source type.
25372 for (SDValue Op : N->ops()) {
25373 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
25374 Op.getOperand(0).getValueType() != SrcVT)
25375 return SDValue();
25376 SrcOps.push_back(Op.getOperand(0));
25377 }
25378
25379 // The wider cast must be supported by the target. This is unusual because
25380 // the operation support type parameter depends on the opcode. In addition,
25381 // check the other type in the cast to make sure this is really legal.
25382 EVT VT = N->getValueType(0);
25383 EVT SrcEltVT = SrcVT.getVectorElementType();
25384 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
25385 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
25386 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25387 switch (CastOpcode) {
25388 case ISD::SINT_TO_FP:
25389 case ISD::UINT_TO_FP:
25390 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
25391 !TLI.isTypeLegal(VT))
25392 return SDValue();
25393 break;
25394 case ISD::FP_TO_SINT:
25395 case ISD::FP_TO_UINT:
25396 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
25397 !TLI.isTypeLegal(ConcatSrcVT))
25398 return SDValue();
25399 break;
25400 default:
25401 llvm_unreachable("Unexpected cast opcode");
25402 }
25403
25404 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
25405 SDLoc DL(N);
25406 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
25407 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
25408}
25409
25410// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
25411// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
25412// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
25414 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25415 bool LegalOperations) {
25416 EVT VT = N->getValueType(0);
25417 EVT OpVT = N->getOperand(0).getValueType();
25418 if (VT.isScalableVector())
25419 return SDValue();
25420
25421 // For now, only allow simple 2-operand concatenations.
25422 if (N->getNumOperands() != 2)
25423 return SDValue();
25424
25425 // Don't create illegal types/shuffles when not allowed to.
25426 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25427 (LegalOperations &&
25429 return SDValue();
25430
25431 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
25432 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
25433 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
25434 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
25435 // (4) and for now, the SHUFFLE_VECTOR must be unary.
25436 ShuffleVectorSDNode *SVN = nullptr;
25437 for (SDValue Op : N->ops()) {
25438 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
25439 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
25440 all_of(N->ops(), [CurSVN](SDValue Op) {
25441 // FIXME: can we allow UNDEF operands?
25442 return !Op.isUndef() &&
25443 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
25444 })) {
25445 SVN = CurSVN;
25446 break;
25447 }
25448 }
25449 if (!SVN)
25450 return SDValue();
25451
25452 // We are going to pad the shuffle operands, so any indice, that was picking
25453 // from the second operand, must be adjusted.
25454 SmallVector<int, 16> AdjustedMask(SVN->getMask());
25455 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
25456
25457 // Identity masks for the operands of the (padded) shuffle.
25458 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
25459 MutableArrayRef<int> FirstShufOpIdentityMask =
25460 MutableArrayRef<int>(IdentityMask)
25462 MutableArrayRef<int> SecondShufOpIdentityMask =
25464 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
25465 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
25467
25468 // New combined shuffle mask.
25470 Mask.reserve(VT.getVectorNumElements());
25471 for (SDValue Op : N->ops()) {
25472 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
25473 if (Op.getNode() == SVN) {
25474 append_range(Mask, AdjustedMask);
25475 continue;
25476 }
25477 if (Op == SVN->getOperand(0)) {
25478 append_range(Mask, FirstShufOpIdentityMask);
25479 continue;
25480 }
25481 if (Op == SVN->getOperand(1)) {
25482 append_range(Mask, SecondShufOpIdentityMask);
25483 continue;
25484 }
25485 llvm_unreachable("Unexpected operand!");
25486 }
25487
25488 // Don't create illegal shuffle masks.
25489 if (!TLI.isShuffleMaskLegal(Mask, VT))
25490 return SDValue();
25491
25492 // Pad the shuffle operands with UNDEF.
25493 SDLoc dl(N);
25494 std::array<SDValue, 2> ShufOps;
25495 for (auto I : zip(SVN->ops(), ShufOps)) {
25496 SDValue ShufOp = std::get<0>(I);
25497 SDValue &NewShufOp = std::get<1>(I);
25498 if (ShufOp.isUndef())
25499 NewShufOp = DAG.getUNDEF(VT);
25500 else {
25501 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
25502 DAG.getUNDEF(OpVT));
25503 ShufOpParts[0] = ShufOp;
25504 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
25505 }
25506 }
25507 // Finally, create the new wide shuffle.
25508 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
25509}
25510
25512 const TargetLowering &TLI,
25513 bool LegalTypes,
25514 bool LegalOperations) {
25515 EVT VT = N->getValueType(0);
25516
25517 // Post-legalization we can only create wider SPLAT_VECTOR operations if both
25518 // the type and operation is legal. The Hexagon target has custom
25519 // legalization for SPLAT_VECTOR that splits the operation into two parts and
25520 // concatenates them. Therefore, custom lowering must also be rejected in
25521 // order to avoid an infinite loop.
25522 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25523 (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
25524 return SDValue();
25525
25526 SDValue Op0 = N->getOperand(0);
25527 if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
25528 return SDValue();
25529
25530 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
25531}
25532
25533SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
25534 // If we only have one input vector, we don't need to do any concatenation.
25535 if (N->getNumOperands() == 1)
25536 return N->getOperand(0);
25537
25538 // Check if all of the operands are undefs.
25539 EVT VT = N->getValueType(0);
25541 return DAG.getUNDEF(VT);
25542
25543 // Optimize concat_vectors where all but the first of the vectors are undef.
25544 if (all_of(drop_begin(N->ops()),
25545 [](const SDValue &Op) { return Op.isUndef(); })) {
25546 SDValue In = N->getOperand(0);
25547 assert(In.getValueType().isVector() && "Must concat vectors");
25548
25549 // If the input is a concat_vectors, just make a larger concat by padding
25550 // with smaller undefs.
25551 //
25552 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
25553 // here could cause an infinite loop. That legalizing happens when LegalDAG
25554 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
25555 // scalable.
25556 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
25557 !(LegalDAG && In.getValueType().isScalableVector())) {
25558 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
25560 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
25561 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25562 }
25563
25565
25566 // concat_vectors(scalar_to_vector(scalar), undef) ->
25567 // scalar_to_vector(scalar)
25568 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25569 Scalar.hasOneUse()) {
25570 EVT SVT = Scalar.getValueType().getVectorElementType();
25571 if (SVT == Scalar.getOperand(0).getValueType())
25572 Scalar = Scalar.getOperand(0);
25573 }
25574
25575 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
25576 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
25577 // If the bitcast type isn't legal, it might be a trunc of a legal type;
25578 // look through the trunc so we can still do the transform:
25579 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
25580 if (Scalar->getOpcode() == ISD::TRUNCATE &&
25581 !TLI.isTypeLegal(Scalar.getValueType()) &&
25582 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
25583 Scalar = Scalar->getOperand(0);
25584
25585 EVT SclTy = Scalar.getValueType();
25586
25587 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
25588 return SDValue();
25589
25590 // Bail out if the vector size is not a multiple of the scalar size.
25591 if (VT.getSizeInBits() % SclTy.getSizeInBits())
25592 return SDValue();
25593
25594 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
25595 if (VNTNumElms < 2)
25596 return SDValue();
25597
25598 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
25599 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
25600 return SDValue();
25601
25602 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
25603 return DAG.getBitcast(VT, Res);
25604 }
25605 }
25606
25607 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
25608 // We have already tested above for an UNDEF only concatenation.
25609 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
25610 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
25611 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
25612 return Op.isUndef() || ISD::BUILD_VECTOR == Op.getOpcode();
25613 };
25614 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
25616 EVT SVT = VT.getScalarType();
25617
25618 EVT MinVT = SVT;
25619 if (!SVT.isFloatingPoint()) {
25620 // If BUILD_VECTOR are from built from integer, they may have different
25621 // operand types. Get the smallest type and truncate all operands to it.
25622 bool FoundMinVT = false;
25623 for (const SDValue &Op : N->ops())
25624 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25625 EVT OpSVT = Op.getOperand(0).getValueType();
25626 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
25627 FoundMinVT = true;
25628 }
25629 assert(FoundMinVT && "Concat vector type mismatch");
25630 }
25631
25632 for (const SDValue &Op : N->ops()) {
25633 EVT OpVT = Op.getValueType();
25634 unsigned NumElts = OpVT.getVectorNumElements();
25635
25636 if (Op.isUndef())
25637 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
25638
25639 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25640 if (SVT.isFloatingPoint()) {
25641 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
25642 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
25643 } else {
25644 for (unsigned i = 0; i != NumElts; ++i)
25645 Opnds.push_back(
25646 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
25647 }
25648 }
25649 }
25650
25651 assert(VT.getVectorNumElements() == Opnds.size() &&
25652 "Concat vector type mismatch");
25653 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
25654 }
25655
25656 if (SDValue V =
25657 combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
25658 return V;
25659
25660 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
25661 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
25663 return V;
25664
25665 if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
25666 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
25668 return V;
25669
25670 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
25672 return V;
25673 }
25674
25675 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
25676 return V;
25677
25679 N, DAG, TLI, LegalTypes, LegalOperations))
25680 return V;
25681
25682 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
25683 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
25684 // operands and look for a CONCAT operations that place the incoming vectors
25685 // at the exact same location.
25686 //
25687 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
25688 SDValue SingleSource = SDValue();
25689 unsigned PartNumElem =
25690 N->getOperand(0).getValueType().getVectorMinNumElements();
25691
25692 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
25693 SDValue Op = N->getOperand(i);
25694
25695 if (Op.isUndef())
25696 continue;
25697
25698 // Check if this is the identity extract:
25699 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25700 return SDValue();
25701
25702 // Find the single incoming vector for the extract_subvector.
25703 if (SingleSource.getNode()) {
25704 if (Op.getOperand(0) != SingleSource)
25705 return SDValue();
25706 } else {
25707 SingleSource = Op.getOperand(0);
25708
25709 // Check the source type is the same as the type of the result.
25710 // If not, this concat may extend the vector, so we can not
25711 // optimize it away.
25712 if (SingleSource.getValueType() != N->getValueType(0))
25713 return SDValue();
25714 }
25715
25716 // Check that we are reading from the identity index.
25717 unsigned IdentityIndex = i * PartNumElem;
25718 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
25719 return SDValue();
25720 }
25721
25722 if (SingleSource.getNode())
25723 return SingleSource;
25724
25725 return SDValue();
25726}
25727
25728SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
25729 // Check to see if all operands are identical.
25730 if (!llvm::all_equal(N->op_values()))
25731 return SDValue();
25732
25733 // Check to see if the identical operand is a splat.
25734 if (!DAG.isSplatValue(N->getOperand(0)))
25735 return SDValue();
25736
25737 // interleave splat(X), splat(X).... --> splat(X), splat(X)....
25739 Ops.append(N->op_values().begin(), N->op_values().end());
25740 return CombineTo(N, &Ops);
25741}
25742
25743// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
25744// if the subvector can be sourced for free.
25745static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
25746 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
25747 V.getOperand(1).getValueType() == SubVT &&
25748 V.getConstantOperandAPInt(2) == Index) {
25749 return V.getOperand(1);
25750 }
25751 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
25752 V.getOperand(0).getValueType() == SubVT &&
25753 (Index % SubVT.getVectorMinNumElements()) == 0) {
25754 uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
25755 return V.getOperand(SubIdx);
25756 }
25757 return SDValue();
25758}
25759
25761 unsigned Index, const SDLoc &DL,
25762 SelectionDAG &DAG,
25763 bool LegalOperations) {
25764 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25765 unsigned BinOpcode = BinOp.getOpcode();
25766 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
25767 return SDValue();
25768
25769 EVT VecVT = BinOp.getValueType();
25770 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
25771 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
25772 return SDValue();
25773 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
25774 return SDValue();
25775
25776 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
25777 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
25778
25779 // TODO: We could handle the case where only 1 operand is being inserted by
25780 // creating an extract of the other operand, but that requires checking
25781 // number of uses and/or costs.
25782 if (!Sub0 || !Sub1)
25783 return SDValue();
25784
25785 // We are inserting both operands of the wide binop only to extract back
25786 // to the narrow vector size. Eliminate all of the insert/extract:
25787 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
25788 return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
25789}
25790
25791/// If we are extracting a subvector produced by a wide binary operator try
25792/// to use a narrow binary operator and/or avoid concatenation and extraction.
25793static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
25794 const SDLoc &DL, SelectionDAG &DAG,
25795 bool LegalOperations) {
25796 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
25797 // some of these bailouts with other transforms.
25798
25799 if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
25800 LegalOperations))
25801 return V;
25802
25803 // We are looking for an optionally bitcasted wide vector binary operator
25804 // feeding an extract subvector.
25805 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25806 SDValue BinOp = peekThroughBitcasts(Src);
25807 unsigned BOpcode = BinOp.getOpcode();
25808 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
25809 return SDValue();
25810
25811 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
25812 // reduced to the unary fneg when it is visited, and we probably want to deal
25813 // with fneg in a target-specific way.
25814 if (BOpcode == ISD::FSUB) {
25815 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
25816 if (C && C->getValueAPF().isNegZero())
25817 return SDValue();
25818 }
25819
25820 // The binop must be a vector type, so we can extract some fraction of it.
25821 EVT WideBVT = BinOp.getValueType();
25822 // The optimisations below currently assume we are dealing with fixed length
25823 // vectors. It is possible to add support for scalable vectors, but at the
25824 // moment we've done no analysis to prove whether they are profitable or not.
25825 if (!WideBVT.isFixedLengthVector())
25826 return SDValue();
25827
25828 assert((Index % VT.getVectorNumElements()) == 0 &&
25829 "Extract index is not a multiple of the vector length.");
25830
25831 // Bail out if this is not a proper multiple width extraction.
25832 unsigned WideWidth = WideBVT.getSizeInBits();
25833 unsigned NarrowWidth = VT.getSizeInBits();
25834 if (WideWidth % NarrowWidth != 0)
25835 return SDValue();
25836
25837 // Bail out if we are extracting a fraction of a single operation. This can
25838 // occur because we potentially looked through a bitcast of the binop.
25839 unsigned NarrowingRatio = WideWidth / NarrowWidth;
25840 unsigned WideNumElts = WideBVT.getVectorNumElements();
25841 if (WideNumElts % NarrowingRatio != 0)
25842 return SDValue();
25843
25844 // Bail out if the target does not support a narrower version of the binop.
25845 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
25846 WideNumElts / NarrowingRatio);
25847 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
25848 LegalOperations))
25849 return SDValue();
25850
25851 // If extraction is cheap, we don't need to look at the binop operands
25852 // for concat ops. The narrow binop alone makes this transform profitable.
25853 // We can't just reuse the original extract index operand because we may have
25854 // bitcasted.
25855 unsigned ConcatOpNum = Index / VT.getVectorNumElements();
25856 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
25857 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
25858 BinOp.hasOneUse() && Src->hasOneUse()) {
25859 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
25860 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25861 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25862 BinOp.getOperand(0), NewExtIndex);
25863 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25864 BinOp.getOperand(1), NewExtIndex);
25865 SDValue NarrowBinOp =
25866 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
25867 return DAG.getBitcast(VT, NarrowBinOp);
25868 }
25869
25870 // Only handle the case where we are doubling and then halving. A larger ratio
25871 // may require more than two narrow binops to replace the wide binop.
25872 if (NarrowingRatio != 2)
25873 return SDValue();
25874
25875 // TODO: The motivating case for this transform is an x86 AVX1 target. That
25876 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
25877 // flavors, but no other 256-bit integer support. This could be extended to
25878 // handle any binop, but that may require fixing/adding other folds to avoid
25879 // codegen regressions.
25880 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
25881 return SDValue();
25882
25883 // We need at least one concatenation operation of a binop operand to make
25884 // this transform worthwhile. The concat must double the input vector sizes.
25885 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
25886 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
25887 return V.getOperand(ConcatOpNum);
25888 return SDValue();
25889 };
25890 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
25891 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
25892
25893 if (SubVecL || SubVecR) {
25894 // If a binop operand was not the result of a concat, we must extract a
25895 // half-sized operand for our new narrow binop:
25896 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
25897 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
25898 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
25899 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
25900 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
25901 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25902 BinOp.getOperand(0), IndexC);
25903
25904 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
25905 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
25906 BinOp.getOperand(1), IndexC);
25907
25908 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
25909 return DAG.getBitcast(VT, NarrowBinOp);
25910 }
25911
25912 return SDValue();
25913}
25914
25915/// If we are extracting a subvector from a wide vector load, convert to a
25916/// narrow load to eliminate the extraction:
25917/// (extract_subvector (load wide vector)) --> (load narrow vector)
25918static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
25919 const SDLoc &DL, SelectionDAG &DAG) {
25920 // TODO: Add support for big-endian. The offset calculation must be adjusted.
25921 if (DAG.getDataLayout().isBigEndian())
25922 return SDValue();
25923
25924 auto *Ld = dyn_cast<LoadSDNode>(Src);
25925 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
25926 return SDValue();
25927
25928 // We can only create byte sized loads.
25929 if (!VT.isByteSized())
25930 return SDValue();
25931
25932 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25933 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, VT))
25934 return SDValue();
25935
25936 unsigned NumElts = VT.getVectorMinNumElements();
25937 // A fixed length vector being extracted from a scalable vector
25938 // may not be any *smaller* than the scalable one.
25939 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
25940 return SDValue();
25941
25942 // The definition of EXTRACT_SUBVECTOR states that the index must be a
25943 // multiple of the minimum number of elements in the result type.
25944 assert(Index % NumElts == 0 && "The extract subvector index is not a "
25945 "multiple of the result's element count");
25946
25947 // It's fine to use TypeSize here as we know the offset will not be negative.
25948 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
25949 std::optional<unsigned> ByteOffset;
25950 if (Offset.isFixed())
25951 ByteOffset = Offset.getFixedValue();
25952
25953 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
25954 return SDValue();
25955
25956 // The narrow load will be offset from the base address of the old load if
25957 // we are extracting from something besides index 0 (little-endian).
25958 // TODO: Use "BaseIndexOffset" to make this more effective.
25959 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
25960
25962 MachineMemOperand *MMO;
25963 if (Offset.isScalable()) {
25964 MachinePointerInfo MPI =
25966 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, VT.getStoreSize());
25967 } else
25968 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
25969 VT.getStoreSize());
25970
25971 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
25972 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
25973 return NewLd;
25974}
25975
25976/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
25977/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
25978/// EXTRACT_SUBVECTOR(Op?, ?),
25979/// Mask'))
25980/// iff it is legal and profitable to do so. Notably, the trimmed mask
25981/// (containing only the elements that are extracted)
25982/// must reference at most two subvectors.
25984 unsigned Index,
25985 const SDLoc &DL,
25986 SelectionDAG &DAG,
25987 bool LegalOperations) {
25988 // Only deal with non-scalable vectors.
25989 EVT WideVT = Src.getValueType();
25990 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
25991 return SDValue();
25992
25993 // The operand must be a shufflevector.
25994 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src);
25995 if (!WideShuffleVector)
25996 return SDValue();
25997
25998 // The old shuffleneeds to go away.
25999 if (!WideShuffleVector->hasOneUse())
26000 return SDValue();
26001
26002 // And the narrow shufflevector that we'll form must be legal.
26003 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26004 if (LegalOperations &&
26006 return SDValue();
26007
26008 int NumEltsExtracted = NarrowVT.getVectorNumElements();
26009 assert((Index % NumEltsExtracted) == 0 &&
26010 "Extract index is not a multiple of the output vector length.");
26011
26012 int WideNumElts = WideVT.getVectorNumElements();
26013
26014 SmallVector<int, 16> NewMask;
26015 NewMask.reserve(NumEltsExtracted);
26016 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
26017 DemandedSubvectors;
26018
26019 // Try to decode the wide mask into narrow mask from at most two subvectors.
26020 for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
26021 assert((M >= -1) && (M < (2 * WideNumElts)) &&
26022 "Out-of-bounds shuffle mask?");
26023
26024 if (M < 0) {
26025 // Does not depend on operands, does not require adjustment.
26026 NewMask.emplace_back(M);
26027 continue;
26028 }
26029
26030 // From which operand of the shuffle does this shuffle mask element pick?
26031 int WideShufOpIdx = M / WideNumElts;
26032 // Which element of that operand is picked?
26033 int OpEltIdx = M % WideNumElts;
26034
26035 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
26036 "Shuffle mask vector decomposition failure.");
26037
26038 // And which NumEltsExtracted-sized subvector of that operand is that?
26039 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
26040 // And which element within that subvector of that operand is that?
26041 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
26042
26043 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
26044 "Shuffle mask subvector decomposition failure.");
26045
26046 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
26047 WideShufOpIdx * WideNumElts) == M &&
26048 "Shuffle mask full decomposition failure.");
26049
26050 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
26051
26052 if (Op.isUndef()) {
26053 // Picking from an undef operand. Let's adjust mask instead.
26054 NewMask.emplace_back(-1);
26055 continue;
26056 }
26057
26058 const std::pair<SDValue, int> DemandedSubvector =
26059 std::make_pair(Op, OpSubvecIdx);
26060
26061 if (DemandedSubvectors.insert(DemandedSubvector)) {
26062 if (DemandedSubvectors.size() > 2)
26063 return SDValue(); // We can't handle more than two subvectors.
26064 // How many elements into the WideVT does this subvector start?
26065 int Index = NumEltsExtracted * OpSubvecIdx;
26066 // Bail out if the extraction isn't going to be cheap.
26067 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
26068 return SDValue();
26069 }
26070
26071 // Ok, but from which operand of the new shuffle will this element pick?
26072 int NewOpIdx =
26073 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
26074 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
26075
26076 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
26077 NewMask.emplace_back(AdjM);
26078 }
26079 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
26080 assert(DemandedSubvectors.size() <= 2 &&
26081 "Should have ended up demanding at most two subvectors.");
26082
26083 // Did we discover that the shuffle does not actually depend on operands?
26084 if (DemandedSubvectors.empty())
26085 return DAG.getUNDEF(NarrowVT);
26086
26087 // Profitability check: only deal with extractions from the first subvector
26088 // unless the mask becomes an identity mask.
26089 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
26090 any_of(NewMask, [](int M) { return M < 0; }))
26091 for (auto &DemandedSubvector : DemandedSubvectors)
26092 if (DemandedSubvector.second != 0)
26093 return SDValue();
26094
26095 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
26096 // operand[s]/index[es], so there is no point in checking for it's legality.
26097
26098 // Do not turn a legal shuffle into an illegal one.
26099 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
26100 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
26101 return SDValue();
26102
26104 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
26105 &DemandedSubvector : DemandedSubvectors) {
26106 // How many elements into the WideVT does this subvector start?
26107 int Index = NumEltsExtracted * DemandedSubvector.second;
26108 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
26109 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
26110 DemandedSubvector.first, IndexC));
26111 }
26112 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
26113 "Should end up with either one or two ops");
26114
26115 // If we ended up with only one operand, pad with an undef.
26116 if (NewOps.size() == 1)
26117 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
26118
26119 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
26120}
26121
26122SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
26123 EVT NVT = N->getValueType(0);
26124 SDValue V = N->getOperand(0);
26125 uint64_t ExtIdx = N->getConstantOperandVal(1);
26126 SDLoc DL(N);
26127
26128 // Extract from UNDEF is UNDEF.
26129 if (V.isUndef())
26130 return DAG.getUNDEF(NVT);
26131
26132 if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
26133 return NarrowLoad;
26134
26135 // Combine an extract of an extract into a single extract_subvector.
26136 // ext (ext X, C), 0 --> ext X, C
26137 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
26138 // The index has to be a multiple of the new result type's known minimum
26139 // vector length.
26140 if (V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
26141 TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
26142 V.getConstantOperandVal(1)) &&
26144 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
26145 V.getOperand(1));
26146 }
26147 }
26148
26149 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
26150 if (V.getOpcode() == ISD::SPLAT_VECTOR)
26151 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
26152 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
26153 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
26154
26155 // extract_subvector(insert_subvector(x,y,c1),c2)
26156 // --> extract_subvector(y,c2-c1)
26157 // iff we're just extracting from the inserted subvector.
26158 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26159 SDValue InsSub = V.getOperand(1);
26160 EVT InsSubVT = InsSub.getValueType();
26161 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
26162 unsigned InsIdx = V.getConstantOperandVal(2);
26163 unsigned NumSubElts = NVT.getVectorMinNumElements();
26164 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
26165 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
26166 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
26167 V.getValueType().isFixedLengthVector())
26168 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
26169 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
26170 }
26171
26172 // Try to move vector bitcast after extract_subv by scaling extraction index:
26173 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
26174 if (V.getOpcode() == ISD::BITCAST &&
26175 V.getOperand(0).getValueType().isVector() &&
26176 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
26177 SDValue SrcOp = V.getOperand(0);
26178 EVT SrcVT = SrcOp.getValueType();
26179 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
26180 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
26181 if ((SrcNumElts % DestNumElts) == 0) {
26182 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
26183 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
26184 EVT NewExtVT =
26185 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
26187 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
26188 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26189 V.getOperand(0), NewIndex);
26190 return DAG.getBitcast(NVT, NewExtract);
26191 }
26192 }
26193 if ((DestNumElts % SrcNumElts) == 0) {
26194 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
26195 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
26196 ElementCount NewExtEC =
26197 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
26198 EVT ScalarVT = SrcVT.getScalarType();
26199 if ((ExtIdx % DestSrcRatio) == 0) {
26200 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
26201 EVT NewExtVT =
26202 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
26204 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26205 SDValue NewExtract =
26206 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26207 V.getOperand(0), NewIndex);
26208 return DAG.getBitcast(NVT, NewExtract);
26209 }
26210 if (NewExtEC.isScalar() &&
26212 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26213 SDValue NewExtract =
26214 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
26215 V.getOperand(0), NewIndex);
26216 return DAG.getBitcast(NVT, NewExtract);
26217 }
26218 }
26219 }
26220 }
26221 }
26222
26223 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
26224 unsigned ExtNumElts = NVT.getVectorMinNumElements();
26225 EVT ConcatSrcVT = V.getOperand(0).getValueType();
26226 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
26227 "Concat and extract subvector do not change element type");
26228
26229 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
26230 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
26231
26232 // If the concatenated source types match this extract, it's a direct
26233 // simplification:
26234 // extract_subvec (concat V1, V2, ...), i --> Vi
26235 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
26236 return V.getOperand(ConcatOpIdx);
26237
26238 // If the concatenated source vectors are a multiple length of this extract,
26239 // then extract a fraction of one of those source vectors directly from a
26240 // concat operand. Example:
26241 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
26242 // v2i8 extract_subvec v8i8 Y, 6
26243 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
26244 ConcatSrcNumElts % ExtNumElts == 0) {
26245 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
26246 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
26247 "Trying to extract from >1 concat operand?");
26248 assert(NewExtIdx % ExtNumElts == 0 &&
26249 "Extract index is not a multiple of the input vector length.");
26250 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
26251 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
26252 V.getOperand(ConcatOpIdx), NewIndexC);
26253 }
26254 }
26255
26257 NVT, V, ExtIdx, DL, DAG, LegalOperations))
26258 return Shuffle;
26259
26260 if (SDValue NarrowBOp =
26261 narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
26262 return NarrowBOp;
26263
26265
26266 // If the input is a build vector. Try to make a smaller build vector.
26267 if (V.getOpcode() == ISD::BUILD_VECTOR) {
26268 EVT InVT = V.getValueType();
26269 unsigned ExtractSize = NVT.getSizeInBits();
26270 unsigned EltSize = InVT.getScalarSizeInBits();
26271 // Only do this if we won't split any elements.
26272 if (ExtractSize % EltSize == 0) {
26273 unsigned NumElems = ExtractSize / EltSize;
26274 EVT EltVT = InVT.getVectorElementType();
26275 EVT ExtractVT =
26276 NumElems == 1 ? EltVT
26277 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
26278 if ((Level < AfterLegalizeDAG ||
26279 (NumElems == 1 ||
26280 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
26281 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
26282 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
26283
26284 if (NumElems == 1) {
26285 SDValue Src = V->getOperand(IdxVal);
26286 if (EltVT != Src.getValueType())
26287 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
26288 return DAG.getBitcast(NVT, Src);
26289 }
26290
26291 // Extract the pieces from the original build_vector.
26292 SDValue BuildVec =
26293 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
26294 return DAG.getBitcast(NVT, BuildVec);
26295 }
26296 }
26297 }
26298
26299 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26300 // Handle only simple case where vector being inserted and vector
26301 // being extracted are of same size.
26302 EVT SmallVT = V.getOperand(1).getValueType();
26303 if (NVT.bitsEq(SmallVT)) {
26304 // Combine:
26305 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
26306 // Into:
26307 // indices are equal or bit offsets are equal => V1
26308 // otherwise => (extract_subvec V1, ExtIdx)
26309 uint64_t InsIdx = V.getConstantOperandVal(2);
26310 if (InsIdx * SmallVT.getScalarSizeInBits() ==
26311 ExtIdx * NVT.getScalarSizeInBits()) {
26312 if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
26313 return DAG.getBitcast(NVT, V.getOperand(1));
26314 } else {
26315 return DAG.getNode(
26317 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
26318 N->getOperand(1));
26319 }
26320 }
26321 }
26322
26323 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
26324 // simplify it based on the (valid) extractions.
26325 if (!V.getValueType().isScalableVector() &&
26326 llvm::all_of(V->users(), [&](SDNode *Use) {
26327 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26328 Use->getOperand(0) == V;
26329 })) {
26330 unsigned NumElts = V.getValueType().getVectorNumElements();
26331 APInt DemandedElts = APInt::getZero(NumElts);
26332 for (SDNode *User : V->users()) {
26333 unsigned ExtIdx = User->getConstantOperandVal(1);
26334 unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
26335 DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
26336 }
26337 if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
26338 // We simplified the vector operand of this extract subvector. If this
26339 // extract is not dead, visit it again so it is folded properly.
26340 if (N->getOpcode() != ISD::DELETED_NODE)
26341 AddToWorklist(N);
26342 return SDValue(N, 0);
26343 }
26344 } else {
26346 return SDValue(N, 0);
26347 }
26348
26349 return SDValue();
26350}
26351
26352/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
26353/// followed by concatenation. Narrow vector ops may have better performance
26354/// than wide ops, and this can unlock further narrowing of other vector ops.
26355/// Targets can invert this transform later if it is not profitable.
26357 SelectionDAG &DAG) {
26358 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
26359 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
26360 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
26361 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
26362 return SDValue();
26363
26364 // Split the wide shuffle mask into halves. Any mask element that is accessing
26365 // operand 1 is offset down to account for narrowing of the vectors.
26366 ArrayRef<int> Mask = Shuf->getMask();
26367 EVT VT = Shuf->getValueType(0);
26368 unsigned NumElts = VT.getVectorNumElements();
26369 unsigned HalfNumElts = NumElts / 2;
26370 SmallVector<int, 16> Mask0(HalfNumElts, -1);
26371 SmallVector<int, 16> Mask1(HalfNumElts, -1);
26372 for (unsigned i = 0; i != NumElts; ++i) {
26373 if (Mask[i] == -1)
26374 continue;
26375 // If we reference the upper (undef) subvector then the element is undef.
26376 if ((Mask[i] % NumElts) >= HalfNumElts)
26377 continue;
26378 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
26379 if (i < HalfNumElts)
26380 Mask0[i] = M;
26381 else
26382 Mask1[i - HalfNumElts] = M;
26383 }
26384
26385 // Ask the target if this is a valid transform.
26386 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26387 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
26388 HalfNumElts);
26389 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
26390 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
26391 return SDValue();
26392
26393 // shuffle (concat X, undef), (concat Y, undef), Mask -->
26394 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
26395 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
26396 SDLoc DL(Shuf);
26397 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
26398 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
26399 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
26400}
26401
26402// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
26403// or turn a shuffle of a single concat into simpler shuffle then concat.
26405 EVT VT = N->getValueType(0);
26406 unsigned NumElts = VT.getVectorNumElements();
26407
26408 SDValue N0 = N->getOperand(0);
26409 SDValue N1 = N->getOperand(1);
26411 ArrayRef<int> Mask = SVN->getMask();
26412
26414 EVT ConcatVT = N0.getOperand(0).getValueType();
26415 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
26416 unsigned NumConcats = NumElts / NumElemsPerConcat;
26417
26418 auto IsUndefMaskElt = [](int i) { return i == -1; };
26419
26420 // Special case: shuffle(concat(A,B)) can be more efficiently represented
26421 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
26422 // half vector elements.
26423 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
26424 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
26425 IsUndefMaskElt)) {
26426 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
26427 N0.getOperand(1),
26428 Mask.slice(0, NumElemsPerConcat));
26429 N1 = DAG.getUNDEF(ConcatVT);
26430 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
26431 }
26432
26433 // Look at every vector that's inserted. We're looking for exact
26434 // subvector-sized copies from a concatenated vector
26435 for (unsigned I = 0; I != NumConcats; ++I) {
26436 unsigned Begin = I * NumElemsPerConcat;
26437 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
26438
26439 // Make sure we're dealing with a copy.
26440 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
26441 Ops.push_back(DAG.getUNDEF(ConcatVT));
26442 continue;
26443 }
26444
26445 int OpIdx = -1;
26446 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
26447 if (IsUndefMaskElt(SubMask[i]))
26448 continue;
26449 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
26450 return SDValue();
26451 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
26452 if (0 <= OpIdx && EltOpIdx != OpIdx)
26453 return SDValue();
26454 OpIdx = EltOpIdx;
26455 }
26456 assert(0 <= OpIdx && "Unknown concat_vectors op");
26457
26458 if (OpIdx < (int)N0.getNumOperands())
26459 Ops.push_back(N0.getOperand(OpIdx));
26460 else
26461 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
26462 }
26463
26464 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26465}
26466
26467// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26468// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26469//
26470// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
26471// a simplification in some sense, but it isn't appropriate in general: some
26472// BUILD_VECTORs are substantially cheaper than others. The general case
26473// of a BUILD_VECTOR requires inserting each element individually (or
26474// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
26475// all constants is a single constant pool load. A BUILD_VECTOR where each
26476// element is identical is a splat. A BUILD_VECTOR where most of the operands
26477// are undef lowers to a small number of element insertions.
26478//
26479// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
26480// We don't fold shuffles where one side is a non-zero constant, and we don't
26481// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
26482// non-constant operands. This seems to work out reasonably well in practice.
26484 SelectionDAG &DAG,
26485 const TargetLowering &TLI) {
26486 EVT VT = SVN->getValueType(0);
26487 unsigned NumElts = VT.getVectorNumElements();
26488 SDValue N0 = SVN->getOperand(0);
26489 SDValue N1 = SVN->getOperand(1);
26490
26491 if (!N0->hasOneUse())
26492 return SDValue();
26493
26494 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
26495 // discussed above.
26496 if (!N1.isUndef()) {
26497 if (!N1->hasOneUse())
26498 return SDValue();
26499
26500 bool N0AnyConst = isAnyConstantBuildVector(N0);
26501 bool N1AnyConst = isAnyConstantBuildVector(N1);
26502 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
26503 return SDValue();
26504 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
26505 return SDValue();
26506 }
26507
26508 // If both inputs are splats of the same value then we can safely merge this
26509 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
26510 bool IsSplat = false;
26511 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
26512 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
26513 if (BV0 && BV1)
26514 if (SDValue Splat0 = BV0->getSplatValue())
26515 IsSplat = (Splat0 == BV1->getSplatValue());
26516
26518 SmallSet<SDValue, 16> DuplicateOps;
26519 for (int M : SVN->getMask()) {
26520 SDValue Op = DAG.getUNDEF(VT.getScalarType());
26521 if (M >= 0) {
26522 int Idx = M < (int)NumElts ? M : M - NumElts;
26523 SDValue &S = (M < (int)NumElts ? N0 : N1);
26524 if (S.getOpcode() == ISD::BUILD_VECTOR) {
26525 Op = S.getOperand(Idx);
26526 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
26527 SDValue Op0 = S.getOperand(0);
26528 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
26529 } else {
26530 // Operand can't be combined - bail out.
26531 return SDValue();
26532 }
26533 }
26534
26535 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
26536 // generating a splat; semantically, this is fine, but it's likely to
26537 // generate low-quality code if the target can't reconstruct an appropriate
26538 // shuffle.
26539 if (!Op.isUndef() && !isIntOrFPConstant(Op))
26540 if (!IsSplat && !DuplicateOps.insert(Op).second)
26541 return SDValue();
26542
26543 Ops.push_back(Op);
26544 }
26545
26546 // BUILD_VECTOR requires all inputs to be of the same type, find the
26547 // maximum type and extend them all.
26548 EVT SVT = VT.getScalarType();
26549 if (SVT.isInteger())
26550 for (SDValue &Op : Ops)
26551 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
26552 if (SVT != VT.getScalarType())
26553 for (SDValue &Op : Ops)
26554 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
26555 : (TLI.isZExtFree(Op.getValueType(), SVT)
26556 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
26557 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
26558 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
26559}
26560
26561// Match shuffles that can be converted to *_vector_extend_in_reg.
26562// This is often generated during legalization.
26563// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
26564// and returns the EVT to which the extension should be performed.
26565// NOTE: this assumes that the src is the first operand of the shuffle.
26567 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
26568 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
26569 bool LegalOperations) {
26570 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26571
26572 // TODO Add support for big-endian when we have a test case.
26573 if (!VT.isInteger() || IsBigEndian)
26574 return std::nullopt;
26575
26576 unsigned NumElts = VT.getVectorNumElements();
26577 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26578
26579 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
26580 // power-of-2 extensions as they are the most likely.
26581 // FIXME: should try Scale == NumElts case too,
26582 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
26583 // The vector width must be a multiple of Scale.
26584 if (NumElts % Scale != 0)
26585 continue;
26586
26587 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
26588 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
26589
26590 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
26591 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
26592 continue;
26593
26594 if (Match(Scale))
26595 return OutVT;
26596 }
26597
26598 return std::nullopt;
26599}
26600
26601// Match shuffles that can be converted to any_vector_extend_in_reg.
26602// This is often generated during legalization.
26603// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
26605 SelectionDAG &DAG,
26606 const TargetLowering &TLI,
26607 bool LegalOperations) {
26608 EVT VT = SVN->getValueType(0);
26609 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26610
26611 // TODO Add support for big-endian when we have a test case.
26612 if (!VT.isInteger() || IsBigEndian)
26613 return SDValue();
26614
26615 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
26616 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
26617 Mask = SVN->getMask()](unsigned Scale) {
26618 for (unsigned i = 0; i != NumElts; ++i) {
26619 if (Mask[i] < 0)
26620 continue;
26621 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
26622 continue;
26623 return false;
26624 }
26625 return true;
26626 };
26627
26628 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
26629 SDValue N0 = SVN->getOperand(0);
26630 // Never create an illegal type. Only create unsupported operations if we
26631 // are pre-legalization.
26632 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26633 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
26634 if (!OutVT)
26635 return SDValue();
26636 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
26637}
26638
26639// Match shuffles that can be converted to zero_extend_vector_inreg.
26640// This is often generated during legalization.
26641// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
26643 SelectionDAG &DAG,
26644 const TargetLowering &TLI,
26645 bool LegalOperations) {
26646 bool LegalTypes = true;
26647 EVT VT = SVN->getValueType(0);
26648 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
26649 unsigned NumElts = VT.getVectorNumElements();
26650 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26651
26652 // TODO: add support for big-endian when we have a test case.
26653 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26654 if (!VT.isInteger() || IsBigEndian)
26655 return SDValue();
26656
26657 SmallVector<int, 16> Mask(SVN->getMask());
26658 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
26659 for (int &Indice : Mask) {
26660 if (Indice < 0)
26661 continue;
26662 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
26663 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
26664 Fn(Indice, OpIdx, OpEltIdx);
26665 }
26666 };
26667
26668 // Which elements of which operand does this shuffle demand?
26669 std::array<APInt, 2> OpsDemandedElts;
26670 for (APInt &OpDemandedElts : OpsDemandedElts)
26671 OpDemandedElts = APInt::getZero(NumElts);
26672 ForEachDecomposedIndice(
26673 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
26674 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
26675 });
26676
26677 // Element-wise(!), which of these demanded elements are know to be zero?
26678 std::array<APInt, 2> OpsKnownZeroElts;
26679 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
26680 std::get<2>(I) =
26681 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
26682
26683 // Manifest zeroable element knowledge in the shuffle mask.
26684 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
26685 // this is a local invention, but it won't leak into DAG.
26686 // FIXME: should we not manifest them, but just check when matching?
26687 bool HadZeroableElts = false;
26688 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
26689 int &Indice, int OpIdx, int OpEltIdx) {
26690 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
26691 Indice = -2; // Zeroable element.
26692 HadZeroableElts = true;
26693 }
26694 });
26695
26696 // Don't proceed unless we've refined at least one zeroable mask indice.
26697 // If we didn't, then we are still trying to match the same shuffle mask
26698 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
26699 // and evidently failed. Proceeding will lead to endless combine loops.
26700 if (!HadZeroableElts)
26701 return SDValue();
26702
26703 // The shuffle may be more fine-grained than we want. Widen elements first.
26704 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
26705 SmallVector<int, 16> ScaledMask;
26706 getShuffleMaskWithWidestElts(Mask, ScaledMask);
26707 assert(Mask.size() >= ScaledMask.size() &&
26708 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
26709 int Prescale = Mask.size() / ScaledMask.size();
26710
26711 NumElts = ScaledMask.size();
26712 EltSizeInBits *= Prescale;
26713
26714 EVT PrescaledVT = EVT::getVectorVT(
26715 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
26716 NumElts);
26717
26718 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
26719 return SDValue();
26720
26721 // For example,
26722 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
26723 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
26724 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
26725 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
26726 "Unexpected mask scaling factor.");
26727 ArrayRef<int> Mask = ScaledMask;
26728 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
26729 SrcElt != NumSrcElts; ++SrcElt) {
26730 // Analyze the shuffle mask in Scale-sized chunks.
26731 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
26732 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
26733 Mask = Mask.drop_front(MaskChunk.size());
26734 // The first indice in this chunk must be SrcElt, but not zero!
26735 // FIXME: undef should be fine, but that results in more-defined result.
26736 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
26737 return false;
26738 // The rest of the indices in this chunk must be zeros.
26739 // FIXME: undef should be fine, but that results in more-defined result.
26740 if (!all_of(MaskChunk.drop_front(1),
26741 [](int Indice) { return Indice == -2; }))
26742 return false;
26743 }
26744 assert(Mask.empty() && "Did not process the whole mask?");
26745 return true;
26746 };
26747
26748 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
26749 for (bool Commuted : {false, true}) {
26750 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
26751 if (Commuted)
26753 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26754 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
26755 LegalOperations);
26756 if (OutVT)
26757 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
26758 DAG.getBitcast(PrescaledVT, Op)));
26759 }
26760 return SDValue();
26761}
26762
26763// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
26764// each source element of a large type into the lowest elements of a smaller
26765// destination type. This is often generated during legalization.
26766// If the source node itself was a '*_extend_vector_inreg' node then we should
26767// then be able to remove it.
26769 SelectionDAG &DAG) {
26770 EVT VT = SVN->getValueType(0);
26771 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26772
26773 // TODO Add support for big-endian when we have a test case.
26774 if (!VT.isInteger() || IsBigEndian)
26775 return SDValue();
26776
26778
26779 unsigned Opcode = N0.getOpcode();
26780 if (!ISD::isExtVecInRegOpcode(Opcode))
26781 return SDValue();
26782
26783 SDValue N00 = N0.getOperand(0);
26784 ArrayRef<int> Mask = SVN->getMask();
26785 unsigned NumElts = VT.getVectorNumElements();
26786 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26787 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
26788 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
26789
26790 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
26791 return SDValue();
26792 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
26793
26794 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
26795 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
26796 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
26797 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
26798 for (unsigned i = 0; i != NumElts; ++i) {
26799 if (Mask[i] < 0)
26800 continue;
26801 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
26802 continue;
26803 return false;
26804 }
26805 return true;
26806 };
26807
26808 // At the moment we just handle the case where we've truncated back to the
26809 // same size as before the extension.
26810 // TODO: handle more extension/truncation cases as cases arise.
26811 if (EltSizeInBits != ExtSrcSizeInBits)
26812 return SDValue();
26813
26814 // We can remove *extend_vector_inreg only if the truncation happens at
26815 // the same scale as the extension.
26816 if (isTruncate(ExtScale))
26817 return DAG.getBitcast(VT, N00);
26818
26819 return SDValue();
26820}
26821
26822// Combine shuffles of splat-shuffles of the form:
26823// shuffle (shuffle V, undef, splat-mask), undef, M
26824// If splat-mask contains undef elements, we need to be careful about
26825// introducing undef's in the folded mask which are not the result of composing
26826// the masks of the shuffles.
26828 SelectionDAG &DAG) {
26829 EVT VT = Shuf->getValueType(0);
26830 unsigned NumElts = VT.getVectorNumElements();
26831
26832 if (!Shuf->getOperand(1).isUndef())
26833 return SDValue();
26834
26835 // See if this unary non-splat shuffle actually *is* a splat shuffle,
26836 // in disguise, with all demanded elements being identical.
26837 // FIXME: this can be done per-operand.
26838 if (!Shuf->isSplat()) {
26839 APInt DemandedElts(NumElts, 0);
26840 for (int Idx : Shuf->getMask()) {
26841 if (Idx < 0)
26842 continue; // Ignore sentinel indices.
26843 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
26844 DemandedElts.setBit(Idx);
26845 }
26846 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
26847 APInt UndefElts;
26848 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
26849 // Even if all demanded elements are splat, some of them could be undef.
26850 // Which lowest demanded element is *not* known-undef?
26851 std::optional<unsigned> MinNonUndefIdx;
26852 for (int Idx : Shuf->getMask()) {
26853 if (Idx < 0 || UndefElts[Idx])
26854 continue; // Ignore sentinel indices, and undef elements.
26855 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
26856 }
26857 if (!MinNonUndefIdx)
26858 return DAG.getUNDEF(VT); // All undef - result is undef.
26859 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
26860 SmallVector<int, 8> SplatMask(Shuf->getMask());
26861 for (int &Idx : SplatMask) {
26862 if (Idx < 0)
26863 continue; // Passthrough sentinel indices.
26864 // Otherwise, just pick the lowest demanded non-undef element.
26865 // Or sentinel undef, if we know we'd pick a known-undef element.
26866 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
26867 }
26868 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
26869 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
26870 Shuf->getOperand(1), SplatMask);
26871 }
26872 }
26873
26874 // If the inner operand is a known splat with no undefs, just return that directly.
26875 // TODO: Create DemandedElts mask from Shuf's mask.
26876 // TODO: Allow undef elements and merge with the shuffle code below.
26877 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
26878 return Shuf->getOperand(0);
26879
26881 if (!Splat || !Splat->isSplat())
26882 return SDValue();
26883
26884 ArrayRef<int> ShufMask = Shuf->getMask();
26885 ArrayRef<int> SplatMask = Splat->getMask();
26886 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
26887
26888 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
26889 // every undef mask element in the splat-shuffle has a corresponding undef
26890 // element in the user-shuffle's mask or if the composition of mask elements
26891 // would result in undef.
26892 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
26893 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
26894 // In this case it is not legal to simplify to the splat-shuffle because we
26895 // may be exposing the users of the shuffle an undef element at index 1
26896 // which was not there before the combine.
26897 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
26898 // In this case the composition of masks yields SplatMask, so it's ok to
26899 // simplify to the splat-shuffle.
26900 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
26901 // In this case the composed mask includes all undef elements of SplatMask
26902 // and in addition sets element zero to undef. It is safe to simplify to
26903 // the splat-shuffle.
26904 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
26905 ArrayRef<int> SplatMask) {
26906 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
26907 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
26908 SplatMask[UserMask[i]] != -1)
26909 return false;
26910 return true;
26911 };
26912 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
26913 return Shuf->getOperand(0);
26914
26915 // Create a new shuffle with a mask that is composed of the two shuffles'
26916 // masks.
26917 SmallVector<int, 32> NewMask;
26918 for (int Idx : ShufMask)
26919 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
26920
26921 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
26922 Splat->getOperand(0), Splat->getOperand(1),
26923 NewMask);
26924}
26925
26926// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
26927// the mask can be treated as a larger type.
26929 SelectionDAG &DAG,
26930 const TargetLowering &TLI,
26931 bool LegalOperations) {
26932 SDValue Op0 = SVN->getOperand(0);
26933 SDValue Op1 = SVN->getOperand(1);
26934 EVT VT = SVN->getValueType(0);
26935 if (Op0.getOpcode() != ISD::BITCAST)
26936 return SDValue();
26937 EVT InVT = Op0.getOperand(0).getValueType();
26938 if (!InVT.isVector() ||
26939 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
26940 Op1.getOperand(0).getValueType() != InVT)))
26941 return SDValue();
26943 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
26944 return SDValue();
26945
26946 int VTLanes = VT.getVectorNumElements();
26947 int InLanes = InVT.getVectorNumElements();
26948 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
26949 (LegalOperations &&
26951 return SDValue();
26952 int Factor = VTLanes / InLanes;
26953
26954 // Check that each group of lanes in the mask are either undef or make a valid
26955 // mask for the wider lane type.
26956 ArrayRef<int> Mask = SVN->getMask();
26957 SmallVector<int> NewMask;
26958 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
26959 return SDValue();
26960
26961 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
26962 return SDValue();
26963
26964 // Create the new shuffle with the new mask and bitcast it back to the
26965 // original type.
26966 SDLoc DL(SVN);
26967 Op0 = Op0.getOperand(0);
26968 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
26969 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
26970 return DAG.getBitcast(VT, NewShuf);
26971}
26972
26973/// Combine shuffle of shuffle of the form:
26974/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
26976 SelectionDAG &DAG) {
26977 if (!OuterShuf->getOperand(1).isUndef())
26978 return SDValue();
26979 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
26980 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
26981 return SDValue();
26982
26983 ArrayRef<int> OuterMask = OuterShuf->getMask();
26984 ArrayRef<int> InnerMask = InnerShuf->getMask();
26985 unsigned NumElts = OuterMask.size();
26986 assert(NumElts == InnerMask.size() && "Mask length mismatch");
26987 SmallVector<int, 32> CombinedMask(NumElts, -1);
26988 int SplatIndex = -1;
26989 for (unsigned i = 0; i != NumElts; ++i) {
26990 // Undef lanes remain undef.
26991 int OuterMaskElt = OuterMask[i];
26992 if (OuterMaskElt == -1)
26993 continue;
26994
26995 // Peek through the shuffle masks to get the underlying source element.
26996 int InnerMaskElt = InnerMask[OuterMaskElt];
26997 if (InnerMaskElt == -1)
26998 continue;
26999
27000 // Initialize the splatted element.
27001 if (SplatIndex == -1)
27002 SplatIndex = InnerMaskElt;
27003
27004 // Non-matching index - this is not a splat.
27005 if (SplatIndex != InnerMaskElt)
27006 return SDValue();
27007
27008 CombinedMask[i] = InnerMaskElt;
27009 }
27010 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
27011 getSplatIndex(CombinedMask) != -1) &&
27012 "Expected a splat mask");
27013
27014 // TODO: The transform may be a win even if the mask is not legal.
27015 EVT VT = OuterShuf->getValueType(0);
27016 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
27017 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
27018 return SDValue();
27019
27020 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
27021 InnerShuf->getOperand(1), CombinedMask);
27022}
27023
27024/// If the shuffle mask is taking exactly one element from the first vector
27025/// operand and passing through all other elements from the second vector
27026/// operand, return the index of the mask element that is choosing an element
27027/// from the first operand. Otherwise, return -1.
27029 int MaskSize = Mask.size();
27030 int EltFromOp0 = -1;
27031 // TODO: This does not match if there are undef elements in the shuffle mask.
27032 // Should we ignore undefs in the shuffle mask instead? The trade-off is
27033 // removing an instruction (a shuffle), but losing the knowledge that some
27034 // vector lanes are not needed.
27035 for (int i = 0; i != MaskSize; ++i) {
27036 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
27037 // We're looking for a shuffle of exactly one element from operand 0.
27038 if (EltFromOp0 != -1)
27039 return -1;
27040 EltFromOp0 = i;
27041 } else if (Mask[i] != i + MaskSize) {
27042 // Nothing from operand 1 can change lanes.
27043 return -1;
27044 }
27045 }
27046 return EltFromOp0;
27047}
27048
27049/// If a shuffle inserts exactly one element from a source vector operand into
27050/// another vector operand and we can access the specified element as a scalar,
27051/// then we can eliminate the shuffle.
27052SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
27053 // First, check if we are taking one element of a vector and shuffling that
27054 // element into another vector.
27055 ArrayRef<int> Mask = Shuf->getMask();
27056 SmallVector<int, 16> CommutedMask(Mask);
27057 SDValue Op0 = Shuf->getOperand(0);
27058 SDValue Op1 = Shuf->getOperand(1);
27059 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
27060 if (ShufOp0Index == -1) {
27061 // Commute mask and check again.
27063 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
27064 if (ShufOp0Index == -1)
27065 return SDValue();
27066 // Commute operands to match the commuted shuffle mask.
27067 std::swap(Op0, Op1);
27068 Mask = CommutedMask;
27069 }
27070
27071 // The shuffle inserts exactly one element from operand 0 into operand 1.
27072 // Now see if we can access that element as a scalar via a real insert element
27073 // instruction.
27074 // TODO: We can try harder to locate the element as a scalar. Examples: it
27075 // could be an operand of BUILD_VECTOR, or a constant.
27076 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
27077 "Shuffle mask value must be from operand 0");
27078
27079 SDValue Elt;
27080 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
27081 m_SpecificInt(Mask[ShufOp0Index])))) {
27082 // There's an existing insertelement with constant insertion index, so we
27083 // don't need to check the legality/profitability of a replacement operation
27084 // that differs at most in the constant value. The target should be able to
27085 // lower any of those in a similar way. If not, legalization will expand
27086 // this to a scalar-to-vector plus shuffle.
27087 //
27088 // Note that the shuffle may move the scalar from the position that the
27089 // insert element used. Therefore, our new insert element occurs at the
27090 // shuffle's mask index value, not the insert's index value.
27091 //
27092 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
27093 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27094 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27095 Op1, Elt, NewInsIndex);
27096 }
27097
27098 if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
27099 return SDValue();
27100
27102 Mask[ShufOp0Index] == 0) {
27103 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27104 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27105 Op1, Elt, NewInsIndex);
27106 }
27107
27108 return SDValue();
27109}
27110
27111/// If we have a unary shuffle of a shuffle, see if it can be folded away
27112/// completely. This has the potential to lose undef knowledge because the first
27113/// shuffle may not have an undef mask element where the second one does. So
27114/// only call this after doing simplifications based on demanded elements.
27116 // shuf (shuf0 X, Y, Mask0), undef, Mask
27117 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
27118 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
27119 return SDValue();
27120
27121 ArrayRef<int> Mask = Shuf->getMask();
27122 ArrayRef<int> Mask0 = Shuf0->getMask();
27123 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
27124 // Ignore undef elements.
27125 if (Mask[i] == -1)
27126 continue;
27127 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
27128
27129 // Is the element of the shuffle operand chosen by this shuffle the same as
27130 // the element chosen by the shuffle operand itself?
27131 if (Mask0[Mask[i]] != Mask0[i])
27132 return SDValue();
27133 }
27134 // Every element of this shuffle is identical to the result of the previous
27135 // shuffle, so we can replace this value.
27136 return Shuf->getOperand(0);
27137}
27138
27139SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
27140 EVT VT = N->getValueType(0);
27141 unsigned NumElts = VT.getVectorNumElements();
27142
27143 SDValue N0 = N->getOperand(0);
27144 SDValue N1 = N->getOperand(1);
27145
27146 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
27147
27148 // Canonicalize shuffle undef, undef -> undef
27149 if (N0.isUndef() && N1.isUndef())
27150 return DAG.getUNDEF(VT);
27151
27152 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
27153
27154 // Canonicalize shuffle v, v -> v, undef
27155 if (N0 == N1)
27156 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
27157 createUnaryMask(SVN->getMask(), NumElts));
27158
27159 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
27160 if (N0.isUndef())
27161 return DAG.getCommutedVectorShuffle(*SVN);
27162
27163 // Remove references to rhs if it is undef
27164 if (N1.isUndef()) {
27165 bool Changed = false;
27166 SmallVector<int, 8> NewMask;
27167 for (unsigned i = 0; i != NumElts; ++i) {
27168 int Idx = SVN->getMaskElt(i);
27169 if (Idx >= (int)NumElts) {
27170 Idx = -1;
27171 Changed = true;
27172 }
27173 NewMask.push_back(Idx);
27174 }
27175 if (Changed)
27176 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
27177 }
27178
27179 if (SDValue InsElt = replaceShuffleOfInsert(SVN))
27180 return InsElt;
27181
27182 // A shuffle of a single vector that is a splatted value can always be folded.
27183 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
27184 return V;
27185
27186 if (SDValue V = formSplatFromShuffles(SVN, DAG))
27187 return V;
27188
27189 // If it is a splat, check if the argument vector is another splat or a
27190 // build_vector.
27191 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
27192 int SplatIndex = SVN->getSplatIndex();
27193 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
27194 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
27195 // splat (vector_bo L, R), Index -->
27196 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
27197 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
27198 SDLoc DL(N);
27199 EVT EltVT = VT.getScalarType();
27200 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
27201 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
27202 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
27203 SDValue NewBO =
27204 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
27205 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
27206 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
27207 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
27208 }
27209
27210 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
27211 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
27212 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
27213 N0.hasOneUse()) {
27214 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
27215 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
27216
27218 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
27219 if (Idx->getAPIntValue() == SplatIndex)
27220 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
27221
27222 // Look through a bitcast if LE and splatting lane 0, through to a
27223 // scalar_to_vector or a build_vector.
27224 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
27225 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
27228 EVT N00VT = N0.getOperand(0).getValueType();
27229 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
27230 VT.isInteger() && N00VT.isInteger()) {
27231 EVT InVT =
27234 SDLoc(N), InVT);
27235 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
27236 }
27237 }
27238 }
27239
27240 // If this is a bit convert that changes the element type of the vector but
27241 // not the number of vector elements, look through it. Be careful not to
27242 // look though conversions that change things like v4f32 to v2f64.
27243 SDNode *V = N0.getNode();
27244 if (V->getOpcode() == ISD::BITCAST) {
27245 SDValue ConvInput = V->getOperand(0);
27246 if (ConvInput.getValueType().isVector() &&
27247 ConvInput.getValueType().getVectorNumElements() == NumElts)
27248 V = ConvInput.getNode();
27249 }
27250
27251 if (V->getOpcode() == ISD::BUILD_VECTOR) {
27252 assert(V->getNumOperands() == NumElts &&
27253 "BUILD_VECTOR has wrong number of operands");
27254 SDValue Base;
27255 bool AllSame = true;
27256 for (unsigned i = 0; i != NumElts; ++i) {
27257 if (!V->getOperand(i).isUndef()) {
27258 Base = V->getOperand(i);
27259 break;
27260 }
27261 }
27262 // Splat of <u, u, u, u>, return <u, u, u, u>
27263 if (!Base.getNode())
27264 return N0;
27265 for (unsigned i = 0; i != NumElts; ++i) {
27266 if (V->getOperand(i) != Base) {
27267 AllSame = false;
27268 break;
27269 }
27270 }
27271 // Splat of <x, x, x, x>, return <x, x, x, x>
27272 if (AllSame)
27273 return N0;
27274
27275 // Canonicalize any other splat as a build_vector, but avoid defining any
27276 // undefined elements in the mask.
27277 SDValue Splatted = V->getOperand(SplatIndex);
27278 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
27279 EVT EltVT = Splatted.getValueType();
27280
27281 for (unsigned i = 0; i != NumElts; ++i) {
27282 if (SVN->getMaskElt(i) < 0)
27283 Ops[i] = DAG.getUNDEF(EltVT);
27284 }
27285
27286 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
27287
27288 // We may have jumped through bitcasts, so the type of the
27289 // BUILD_VECTOR may not match the type of the shuffle.
27290 if (V->getValueType(0) != VT)
27291 NewBV = DAG.getBitcast(VT, NewBV);
27292 return NewBV;
27293 }
27294 }
27295
27296 // Simplify source operands based on shuffle mask.
27298 return SDValue(N, 0);
27299
27300 // This is intentionally placed after demanded elements simplification because
27301 // it could eliminate knowledge of undef elements created by this shuffle.
27302 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
27303 return ShufOp;
27304
27305 // Match shuffles that can be converted to any_vector_extend_in_reg.
27306 if (SDValue V =
27307 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
27308 return V;
27309
27310 // Combine "truncate_vector_in_reg" style shuffles.
27311 if (SDValue V = combineTruncationShuffle(SVN, DAG))
27312 return V;
27313
27314 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
27315 Level < AfterLegalizeVectorOps &&
27316 (N1.isUndef() ||
27317 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
27318 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
27319 if (SDValue V = partitionShuffleOfConcats(N, DAG))
27320 return V;
27321 }
27322
27323 // A shuffle of a concat of the same narrow vector can be reduced to use
27324 // only low-half elements of a concat with undef:
27325 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
27326 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
27327 N0.getNumOperands() == 2 &&
27328 N0.getOperand(0) == N0.getOperand(1)) {
27329 int HalfNumElts = (int)NumElts / 2;
27330 SmallVector<int, 8> NewMask;
27331 for (unsigned i = 0; i != NumElts; ++i) {
27332 int Idx = SVN->getMaskElt(i);
27333 if (Idx >= HalfNumElts) {
27334 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
27335 Idx -= HalfNumElts;
27336 }
27337 NewMask.push_back(Idx);
27338 }
27339 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
27340 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
27341 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
27342 N0.getOperand(0), UndefVec);
27343 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
27344 }
27345 }
27346
27347 // See if we can replace a shuffle with an insert_subvector.
27348 // e.g. v2i32 into v8i32:
27349 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
27350 // --> insert_subvector(lhs,rhs1,4).
27351 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
27353 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
27354 // Ensure RHS subvectors are legal.
27355 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
27356 EVT SubVT = RHS.getOperand(0).getValueType();
27357 int NumSubVecs = RHS.getNumOperands();
27358 int NumSubElts = SubVT.getVectorNumElements();
27359 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
27360 if (!TLI.isTypeLegal(SubVT))
27361 return SDValue();
27362
27363 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
27364 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
27365 return SDValue();
27366
27367 // Search [NumSubElts] spans for RHS sequence.
27368 // TODO: Can we avoid nested loops to increase performance?
27369 SmallVector<int> InsertionMask(NumElts);
27370 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
27371 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
27372 // Reset mask to identity.
27373 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
27374
27375 // Add subvector insertion.
27376 std::iota(InsertionMask.begin() + SubIdx,
27377 InsertionMask.begin() + SubIdx + NumSubElts,
27378 NumElts + (SubVec * NumSubElts));
27379
27380 // See if the shuffle mask matches the reference insertion mask.
27381 bool MatchingShuffle = true;
27382 for (int i = 0; i != (int)NumElts; ++i) {
27383 int ExpectIdx = InsertionMask[i];
27384 int ActualIdx = Mask[i];
27385 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
27386 MatchingShuffle = false;
27387 break;
27388 }
27389 }
27390
27391 if (MatchingShuffle)
27392 return DAG.getInsertSubvector(SDLoc(N), LHS, RHS.getOperand(SubVec),
27393 SubIdx);
27394 }
27395 }
27396 return SDValue();
27397 };
27398 ArrayRef<int> Mask = SVN->getMask();
27399 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
27400 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
27401 return InsertN1;
27402 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
27403 SmallVector<int> CommuteMask(Mask);
27405 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
27406 return InsertN0;
27407 }
27408 }
27409
27410 // If we're not performing a select/blend shuffle, see if we can convert the
27411 // shuffle into a AND node, with all the out-of-lane elements are known zero.
27412 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27413 bool IsInLaneMask = true;
27414 ArrayRef<int> Mask = SVN->getMask();
27415 SmallVector<int, 16> ClearMask(NumElts, -1);
27416 APInt DemandedLHS = APInt::getZero(NumElts);
27417 APInt DemandedRHS = APInt::getZero(NumElts);
27418 for (int I = 0; I != (int)NumElts; ++I) {
27419 int M = Mask[I];
27420 if (M < 0)
27421 continue;
27422 ClearMask[I] = M == I ? I : (I + NumElts);
27423 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
27424 if (M != I) {
27425 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
27426 Demanded.setBit(M % NumElts);
27427 }
27428 }
27429 // TODO: Should we try to mask with N1 as well?
27430 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
27431 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
27432 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
27433 SDLoc DL(N);
27434 EVT IntVT = VT.changeVectorElementTypeToInteger();
27435 EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
27436 // Transform the type to a legal type so that the buildvector constant
27437 // elements are not illegal. Make sure that the result is larger than the
27438 // original type, incase the value is split into two (eg i64->i32).
27439 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
27440 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
27441 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
27442 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
27443 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
27444 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
27445 for (int I = 0; I != (int)NumElts; ++I)
27446 if (0 <= Mask[I])
27447 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
27448
27449 // See if a clear mask is legal instead of going via
27450 // XformToShuffleWithZero which loses UNDEF mask elements.
27451 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
27452 return DAG.getBitcast(
27453 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
27454 DAG.getConstant(0, DL, IntVT), ClearMask));
27455
27456 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
27457 return DAG.getBitcast(
27458 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
27459 DAG.getBuildVector(IntVT, DL, AndMask)));
27460 }
27461 }
27462 }
27463
27464 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27465 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27466 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
27467 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
27468 return Res;
27469
27470 // If this shuffle only has a single input that is a bitcasted shuffle,
27471 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
27472 // back to their original types.
27473 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
27474 N1.isUndef() && Level < AfterLegalizeVectorOps &&
27475 TLI.isTypeLegal(VT)) {
27476
27478 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
27479 EVT SVT = VT.getScalarType();
27480 EVT InnerVT = BC0->getValueType(0);
27481 EVT InnerSVT = InnerVT.getScalarType();
27482
27483 // Determine which shuffle works with the smaller scalar type.
27484 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
27485 EVT ScaleSVT = ScaleVT.getScalarType();
27486
27487 if (TLI.isTypeLegal(ScaleVT) &&
27488 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
27489 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
27490 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27491 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27492
27493 // Scale the shuffle masks to the smaller scalar type.
27494 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
27495 SmallVector<int, 8> InnerMask;
27496 SmallVector<int, 8> OuterMask;
27497 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
27498 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
27499
27500 // Merge the shuffle masks.
27501 SmallVector<int, 8> NewMask;
27502 for (int M : OuterMask)
27503 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
27504
27505 // Test for shuffle mask legality over both commutations.
27506 SDValue SV0 = BC0->getOperand(0);
27507 SDValue SV1 = BC0->getOperand(1);
27508 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27509 if (!LegalMask) {
27510 std::swap(SV0, SV1);
27512 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27513 }
27514
27515 if (LegalMask) {
27516 SV0 = DAG.getBitcast(ScaleVT, SV0);
27517 SV1 = DAG.getBitcast(ScaleVT, SV1);
27518 return DAG.getBitcast(
27519 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
27520 }
27521 }
27522 }
27523 }
27524
27525 // Match shuffles of bitcasts, so long as the mask can be treated as the
27526 // larger type.
27527 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
27528 return V;
27529
27530 // Compute the combined shuffle mask for a shuffle with SV0 as the first
27531 // operand, and SV1 as the second operand.
27532 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
27533 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
27534 auto MergeInnerShuffle =
27535 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
27536 ShuffleVectorSDNode *OtherSVN, SDValue N1,
27537 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
27538 SmallVectorImpl<int> &Mask) -> bool {
27539 // Don't try to fold splats; they're likely to simplify somehow, or they
27540 // might be free.
27541 if (OtherSVN->isSplat())
27542 return false;
27543
27544 SV0 = SV1 = SDValue();
27545 Mask.clear();
27546
27547 for (unsigned i = 0; i != NumElts; ++i) {
27548 int Idx = SVN->getMaskElt(i);
27549 if (Idx < 0) {
27550 // Propagate Undef.
27551 Mask.push_back(Idx);
27552 continue;
27553 }
27554
27555 if (Commute)
27556 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
27557
27558 SDValue CurrentVec;
27559 if (Idx < (int)NumElts) {
27560 // This shuffle index refers to the inner shuffle N0. Lookup the inner
27561 // shuffle mask to identify which vector is actually referenced.
27562 Idx = OtherSVN->getMaskElt(Idx);
27563 if (Idx < 0) {
27564 // Propagate Undef.
27565 Mask.push_back(Idx);
27566 continue;
27567 }
27568 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
27569 : OtherSVN->getOperand(1);
27570 } else {
27571 // This shuffle index references an element within N1.
27572 CurrentVec = N1;
27573 }
27574
27575 // Simple case where 'CurrentVec' is UNDEF.
27576 if (CurrentVec.isUndef()) {
27577 Mask.push_back(-1);
27578 continue;
27579 }
27580
27581 // Canonicalize the shuffle index. We don't know yet if CurrentVec
27582 // will be the first or second operand of the combined shuffle.
27583 Idx = Idx % NumElts;
27584 if (!SV0.getNode() || SV0 == CurrentVec) {
27585 // Ok. CurrentVec is the left hand side.
27586 // Update the mask accordingly.
27587 SV0 = CurrentVec;
27588 Mask.push_back(Idx);
27589 continue;
27590 }
27591 if (!SV1.getNode() || SV1 == CurrentVec) {
27592 // Ok. CurrentVec is the right hand side.
27593 // Update the mask accordingly.
27594 SV1 = CurrentVec;
27595 Mask.push_back(Idx + NumElts);
27596 continue;
27597 }
27598
27599 // Last chance - see if the vector is another shuffle and if it
27600 // uses one of the existing candidate shuffle ops.
27601 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
27602 int InnerIdx = CurrentSVN->getMaskElt(Idx);
27603 if (InnerIdx < 0) {
27604 Mask.push_back(-1);
27605 continue;
27606 }
27607 SDValue InnerVec = (InnerIdx < (int)NumElts)
27608 ? CurrentSVN->getOperand(0)
27609 : CurrentSVN->getOperand(1);
27610 if (InnerVec.isUndef()) {
27611 Mask.push_back(-1);
27612 continue;
27613 }
27614 InnerIdx %= NumElts;
27615 if (InnerVec == SV0) {
27616 Mask.push_back(InnerIdx);
27617 continue;
27618 }
27619 if (InnerVec == SV1) {
27620 Mask.push_back(InnerIdx + NumElts);
27621 continue;
27622 }
27623 }
27624
27625 // Bail out if we cannot convert the shuffle pair into a single shuffle.
27626 return false;
27627 }
27628
27629 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27630 return true;
27631
27632 // Avoid introducing shuffles with illegal mask.
27633 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27634 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27635 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27636 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
27637 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
27638 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
27639 if (TLI.isShuffleMaskLegal(Mask, VT))
27640 return true;
27641
27642 std::swap(SV0, SV1);
27644 return TLI.isShuffleMaskLegal(Mask, VT);
27645 };
27646
27647 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27648 // Canonicalize shuffles according to rules:
27649 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
27650 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
27651 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
27652 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27654 // The incoming shuffle must be of the same type as the result of the
27655 // current shuffle.
27656 assert(N1->getOperand(0).getValueType() == VT &&
27657 "Shuffle types don't match");
27658
27659 SDValue SV0 = N1->getOperand(0);
27660 SDValue SV1 = N1->getOperand(1);
27661 bool HasSameOp0 = N0 == SV0;
27662 bool IsSV1Undef = SV1.isUndef();
27663 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
27664 // Commute the operands of this shuffle so merging below will trigger.
27665 return DAG.getCommutedVectorShuffle(*SVN);
27666 }
27667
27668 // Canonicalize splat shuffles to the RHS to improve merging below.
27669 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
27670 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
27671 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27672 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
27673 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
27674 return DAG.getCommutedVectorShuffle(*SVN);
27675 }
27676
27677 // Try to fold according to rules:
27678 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27679 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27680 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27681 // Don't try to fold shuffles with illegal type.
27682 // Only fold if this shuffle is the only user of the other shuffle.
27683 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
27684 for (int i = 0; i != 2; ++i) {
27685 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
27686 N->isOnlyUserOf(N->getOperand(i).getNode())) {
27687 // The incoming shuffle must be of the same type as the result of the
27688 // current shuffle.
27689 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
27690 assert(OtherSV->getOperand(0).getValueType() == VT &&
27691 "Shuffle types don't match");
27692
27693 SDValue SV0, SV1;
27694 SmallVector<int, 4> Mask;
27695 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
27696 SV0, SV1, Mask)) {
27697 // Check if all indices in Mask are Undef. In case, propagate Undef.
27698 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27699 return DAG.getUNDEF(VT);
27700
27701 return DAG.getVectorShuffle(VT, SDLoc(N),
27702 SV0 ? SV0 : DAG.getUNDEF(VT),
27703 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
27704 }
27705 }
27706 }
27707
27708 // Merge shuffles through binops if we are able to merge it with at least
27709 // one other shuffles.
27710 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
27711 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
27712 unsigned SrcOpcode = N0.getOpcode();
27713 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
27714 (N1.isUndef() ||
27715 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
27716 // Get binop source ops, or just pass on the undef.
27717 SDValue Op00 = N0.getOperand(0);
27718 SDValue Op01 = N0.getOperand(1);
27719 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
27720 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
27721 // TODO: We might be able to relax the VT check but we don't currently
27722 // have any isBinOp() that has different result/ops VTs so play safe until
27723 // we have test coverage.
27724 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
27725 Op01.getValueType() == VT && Op11.getValueType() == VT &&
27726 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
27727 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
27728 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
27729 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
27730 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
27731 SmallVectorImpl<int> &Mask, bool LeftOp,
27732 bool Commute) {
27733 SDValue InnerN = Commute ? N1 : N0;
27734 SDValue Op0 = LeftOp ? Op00 : Op01;
27735 SDValue Op1 = LeftOp ? Op10 : Op11;
27736 if (Commute)
27737 std::swap(Op0, Op1);
27738 // Only accept the merged shuffle if we don't introduce undef elements,
27739 // or the inner shuffle already contained undef elements.
27740 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
27741 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
27742 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
27743 Mask) &&
27744 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
27745 llvm::none_of(Mask, [](int M) { return M < 0; }));
27746 };
27747
27748 // Ensure we don't increase the number of shuffles - we must merge a
27749 // shuffle from at least one of the LHS and RHS ops.
27750 bool MergedLeft = false;
27751 SDValue LeftSV0, LeftSV1;
27752 SmallVector<int, 4> LeftMask;
27753 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
27754 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
27755 MergedLeft = true;
27756 } else {
27757 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27758 LeftSV0 = Op00, LeftSV1 = Op10;
27759 }
27760
27761 bool MergedRight = false;
27762 SDValue RightSV0, RightSV1;
27763 SmallVector<int, 4> RightMask;
27764 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
27765 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
27766 MergedRight = true;
27767 } else {
27768 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27769 RightSV0 = Op01, RightSV1 = Op11;
27770 }
27771
27772 if (MergedLeft || MergedRight) {
27773 SDLoc DL(N);
27775 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
27776 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
27778 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
27779 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
27780 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
27781 }
27782 }
27783 }
27784 }
27785
27786 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
27787 return V;
27788
27789 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
27790 // Perform this really late, because it could eliminate knowledge
27791 // of undef elements created by this shuffle.
27792 if (Level < AfterLegalizeTypes)
27793 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
27794 LegalOperations))
27795 return V;
27796
27797 return SDValue();
27798}
27799
27800SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
27801 EVT VT = N->getValueType(0);
27802 if (!VT.isFixedLengthVector())
27803 return SDValue();
27804
27805 // Try to convert a scalar binop with an extracted vector element to a vector
27806 // binop. This is intended to reduce potentially expensive register moves.
27807 // TODO: Check if both operands are extracted.
27808 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
27809 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
27810 SDValue Scalar = N->getOperand(0);
27811 unsigned Opcode = Scalar.getOpcode();
27812 EVT VecEltVT = VT.getScalarType();
27813 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
27814 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
27815 Scalar.getOperand(0).getValueType() == VecEltVT &&
27816 Scalar.getOperand(1).getValueType() == VecEltVT &&
27817 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
27818 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
27819 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
27820 // Match an extract element and get a shuffle mask equivalent.
27821 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
27822
27823 for (int i : {0, 1}) {
27824 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
27825 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
27826 SDValue EE = Scalar.getOperand(i);
27827 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
27828 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
27829 EE.getOperand(0).getValueType() == VT &&
27831 // Mask = {ExtractIndex, undef, undef....}
27832 ShufMask[0] = EE.getConstantOperandVal(1);
27833 // Make sure the shuffle is legal if we are crossing lanes.
27834 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
27835 SDLoc DL(N);
27836 SDValue V[] = {EE.getOperand(0),
27837 DAG.getConstant(C->getAPIntValue(), DL, VT)};
27838 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
27839 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
27840 ShufMask);
27841 }
27842 }
27843 }
27844 }
27845
27846 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
27847 // with a VECTOR_SHUFFLE and possible truncate.
27848 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
27849 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
27850 return SDValue();
27851
27852 // If we have an implicit truncate, truncate here if it is legal.
27853 if (VecEltVT != Scalar.getValueType() &&
27854 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
27855 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
27856 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
27857 }
27858
27859 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
27860 if (!ExtIndexC)
27861 return SDValue();
27862
27863 SDValue SrcVec = Scalar.getOperand(0);
27864 EVT SrcVT = SrcVec.getValueType();
27865 unsigned SrcNumElts = SrcVT.getVectorNumElements();
27866 unsigned VTNumElts = VT.getVectorNumElements();
27867 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
27868 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
27869 SmallVector<int, 8> Mask(SrcNumElts, -1);
27870 Mask[0] = ExtIndexC->getZExtValue();
27871 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
27872 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
27873 if (!LegalShuffle)
27874 return SDValue();
27875
27876 // If the initial vector is the same size, the shuffle is the result.
27877 if (VT == SrcVT)
27878 return LegalShuffle;
27879
27880 // If not, shorten the shuffled vector.
27881 if (VTNumElts != SrcNumElts) {
27882 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
27883 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
27884 SrcVT.getVectorElementType(), VTNumElts);
27885 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
27886 ZeroIdx);
27887 }
27888 }
27889
27890 return SDValue();
27891}
27892
27893SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
27894 EVT VT = N->getValueType(0);
27895 SDValue N0 = N->getOperand(0);
27896 SDValue N1 = N->getOperand(1);
27897 SDValue N2 = N->getOperand(2);
27898 uint64_t InsIdx = N->getConstantOperandVal(2);
27899
27900 // Remove insert of UNDEF/POISON.
27901 if (N1.isUndef()) {
27902 if (N1.getOpcode() == ISD::POISON || N0.getOpcode() == ISD::UNDEF)
27903 return N0;
27904 return DAG.getFreeze(N0);
27905 }
27906
27907 // If this is an insert of an extracted vector into an undef/poison vector, we
27908 // can just use the input to the extract if the types match, and can simplify
27909 // in some cases even if they don't.
27910 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27911 N1.getOperand(1) == N2) {
27912 EVT N1VT = N1.getValueType();
27913 EVT SrcVT = N1.getOperand(0).getValueType();
27914 if (SrcVT == VT) {
27915 // Need to ensure that result isn't more poisonous if skipping both the
27916 // extract+insert.
27917 if (N0.getOpcode() == ISD::POISON)
27918 return N1.getOperand(0);
27919 if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
27920 unsigned SubVecNumElts = N1VT.getVectorNumElements();
27921 APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
27922 InsIdx + SubVecNumElts);
27923 if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
27924 return N1.getOperand(0);
27925 } else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
27926 return N1.getOperand(0);
27927 }
27928 // TODO: To remove the zero check, need to adjust the offset to
27929 // a multiple of the new src type.
27930 if (isNullConstant(N2)) {
27931 if (VT.knownBitsGE(SrcVT) &&
27932 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
27933 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27934 VT, N0, N1.getOperand(0), N2);
27935 else if (VT.knownBitsLE(SrcVT) &&
27936 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
27937 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
27938 VT, N1.getOperand(0), N2);
27939 }
27940 }
27941
27942 // Handle case where we've ended up inserting back into the source vector
27943 // we extracted the subvector from.
27944 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
27945 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
27946 N1.getOperand(1) == N2)
27947 return N0;
27948
27949 // Simplify scalar inserts into an undef vector:
27950 // insert_subvector undef, (splat X), N2 -> splat X
27951 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
27952 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
27953 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
27954
27955 // insert_subvector (splat X), (splat X), N2 -> splat X
27956 if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27957 N0.getOperand(0) == N1.getOperand(0))
27958 return N0;
27959
27960 // If we are inserting a bitcast value into an undef, with the same
27961 // number of elements, just use the bitcast input of the extract.
27962 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
27963 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
27964 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
27966 N1.getOperand(0).getOperand(1) == N2 &&
27968 VT.getVectorElementCount() &&
27970 VT.getSizeInBits()) {
27971 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
27972 }
27973
27974 // If both N1 and N2 are bitcast values on which insert_subvector
27975 // would makes sense, pull the bitcast through.
27976 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
27977 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
27978 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
27979 SDValue CN0 = N0.getOperand(0);
27980 SDValue CN1 = N1.getOperand(0);
27981 EVT CN0VT = CN0.getValueType();
27982 EVT CN1VT = CN1.getValueType();
27983 if (CN0VT.isVector() && CN1VT.isVector() &&
27984 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
27986 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
27987 CN0.getValueType(), CN0, CN1, N2);
27988 return DAG.getBitcast(VT, NewINSERT);
27989 }
27990 }
27991
27992 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
27993 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
27994 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
27995 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
27996 N0.getOperand(1).getValueType() == N1.getValueType() &&
27997 N0.getOperand(2) == N2)
27998 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
27999 N1, N2);
28000
28001 // Eliminate an intermediate insert into an undef vector:
28002 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
28003 // insert_subvector undef, X, 0
28004 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
28005 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
28006 isNullConstant(N2))
28007 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
28008 N1.getOperand(1), N2);
28009
28010 // Push subvector bitcasts to the output, adjusting the index as we go.
28011 // insert_subvector(bitcast(v), bitcast(s), c1)
28012 // -> bitcast(insert_subvector(v, s, c2))
28013 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
28014 N1.getOpcode() == ISD::BITCAST) {
28015 SDValue N0Src = peekThroughBitcasts(N0);
28016 SDValue N1Src = peekThroughBitcasts(N1);
28017 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
28018 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
28019 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
28020 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
28021 EVT NewVT;
28022 SDLoc DL(N);
28023 SDValue NewIdx;
28024 LLVMContext &Ctx = *DAG.getContext();
28025 ElementCount NumElts = VT.getVectorElementCount();
28026 unsigned EltSizeInBits = VT.getScalarSizeInBits();
28027 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
28028 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
28029 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
28030 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
28031 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
28032 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
28033 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
28034 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
28035 NumElts.divideCoefficientBy(Scale));
28036 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
28037 }
28038 }
28039 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
28040 SDValue Res = DAG.getBitcast(NewVT, N0Src);
28041 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
28042 return DAG.getBitcast(VT, Res);
28043 }
28044 }
28045 }
28046
28047 // Canonicalize insert_subvector dag nodes.
28048 // Example:
28049 // (insert_subvector (insert_subvector A, Idx0), Idx1)
28050 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
28051 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
28052 N1.getValueType() == N0.getOperand(1).getValueType()) {
28053 unsigned OtherIdx = N0.getConstantOperandVal(2);
28054 if (InsIdx < OtherIdx) {
28055 // Swap nodes.
28056 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
28057 N0.getOperand(0), N1, N2);
28058 AddToWorklist(NewOp.getNode());
28059 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
28060 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
28061 }
28062 }
28063
28064 // If the input vector is a concatenation, and the insert replaces
28065 // one of the pieces, we can optimize into a single concat_vectors.
28066 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
28067 N0.getOperand(0).getValueType() == N1.getValueType() &&
28070 unsigned Factor = N1.getValueType().getVectorMinNumElements();
28072 Ops[InsIdx / Factor] = N1;
28073 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
28074 }
28075
28076 // Simplify source operands based on insertion.
28078 return SDValue(N, 0);
28079
28080 return SDValue();
28081}
28082
28083SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
28084 SDValue N0 = N->getOperand(0);
28085
28086 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
28087 if (N0->getOpcode() == ISD::FP16_TO_FP)
28088 return N0->getOperand(0);
28089
28090 return SDValue();
28091}
28092
28093SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
28094 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28095 auto Op = N->getOpcode();
28096 assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
28097 "opcode should be FP16_TO_FP or BF16_TO_FP.");
28098 SDValue N0 = N->getOperand(0);
28099
28100 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
28101 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28102 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
28103 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
28104 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
28105 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
28106 }
28107 }
28108
28109 if (SDValue CastEliminated = eliminateFPCastPair(N))
28110 return CastEliminated;
28111
28112 // Sometimes constants manage to survive very late in the pipeline, e.g.,
28113 // because they are wrapped inside the <1 x f16> type. Try one last time to
28114 // get rid of them.
28115 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
28116 N->getValueType(0), {N0});
28117 return Folded;
28118}
28119
28120SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
28121 SDValue N0 = N->getOperand(0);
28122
28123 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
28124 if (N0->getOpcode() == ISD::BF16_TO_FP)
28125 return N0->getOperand(0);
28126
28127 return SDValue();
28128}
28129
28130SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
28131 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28132 return visitFP16_TO_FP(N);
28133}
28134
28135SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
28136 SDValue N0 = N->getOperand(0);
28137 EVT VT = N0.getValueType();
28138 unsigned Opcode = N->getOpcode();
28139
28140 // VECREDUCE over 1-element vector is just an extract.
28141 if (VT.getVectorElementCount().isScalar()) {
28142 SDLoc dl(N);
28143 SDValue Res =
28145 DAG.getVectorIdxConstant(0, dl));
28146 if (Res.getValueType() != N->getValueType(0))
28147 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
28148 return Res;
28149 }
28150
28151 // On an boolean vector an and/or reduction is the same as a umin/umax
28152 // reduction. Convert them if the latter is legal while the former isn't.
28153 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
28154 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
28155 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
28156 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
28157 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
28159 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
28160 }
28161
28162 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
28163 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
28164 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28165 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
28166 SDValue Vec = N0.getOperand(0);
28167 SDValue Subvec = N0.getOperand(1);
28168 if ((Opcode == ISD::VECREDUCE_OR &&
28169 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
28170 (Opcode == ISD::VECREDUCE_AND &&
28171 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
28172 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
28173 }
28174
28175 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
28176 // Same for zext and anyext, and for and/or/xor reductions.
28177 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
28178 Opcode == ISD::VECREDUCE_XOR) &&
28179 (N0.getOpcode() == ISD::SIGN_EXTEND ||
28180 N0.getOpcode() == ISD::ZERO_EXTEND ||
28181 N0.getOpcode() == ISD::ANY_EXTEND) &&
28182 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
28183 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
28185 N0.getOperand(0));
28186 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
28187 }
28188 return SDValue();
28189}
28190
28191SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
28192 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28193
28194 // FSUB -> FMA combines:
28195 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
28196 AddToWorklist(Fused.getNode());
28197 return Fused;
28198 }
28199 return SDValue();
28200}
28201
28202SDValue DAGCombiner::visitVPOp(SDNode *N) {
28203
28204 if (N->getOpcode() == ISD::VP_GATHER)
28205 if (SDValue SD = visitVPGATHER(N))
28206 return SD;
28207
28208 if (N->getOpcode() == ISD::VP_SCATTER)
28209 if (SDValue SD = visitVPSCATTER(N))
28210 return SD;
28211
28212 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
28213 if (SDValue SD = visitVP_STRIDED_LOAD(N))
28214 return SD;
28215
28216 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
28217 if (SDValue SD = visitVP_STRIDED_STORE(N))
28218 return SD;
28219
28220 // VP operations in which all vector elements are disabled - either by
28221 // determining that the mask is all false or that the EVL is 0 - can be
28222 // eliminated.
28223 bool AreAllEltsDisabled = false;
28224 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
28225 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
28226 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
28227 AreAllEltsDisabled |=
28228 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
28229
28230 // This is the only generic VP combine we support for now.
28231 if (!AreAllEltsDisabled) {
28232 switch (N->getOpcode()) {
28233 case ISD::VP_FADD:
28234 return visitVP_FADD(N);
28235 case ISD::VP_FSUB:
28236 return visitVP_FSUB(N);
28237 case ISD::VP_FMA:
28238 return visitFMA<VPMatchContext>(N);
28239 case ISD::VP_SELECT:
28240 return visitVP_SELECT(N);
28241 case ISD::VP_MUL:
28242 return visitMUL<VPMatchContext>(N);
28243 case ISD::VP_SUB:
28244 return foldSubCtlzNot<VPMatchContext>(N, DAG);
28245 default:
28246 break;
28247 }
28248 return SDValue();
28249 }
28250
28251 // Binary operations can be replaced by UNDEF.
28252 if (ISD::isVPBinaryOp(N->getOpcode()))
28253 return DAG.getUNDEF(N->getValueType(0));
28254
28255 // VP Memory operations can be replaced by either the chain (stores) or the
28256 // chain + undef (loads).
28257 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
28258 if (MemSD->writeMem())
28259 return MemSD->getChain();
28260 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
28261 }
28262
28263 // Reduction operations return the start operand when no elements are active.
28264 if (ISD::isVPReduction(N->getOpcode()))
28265 return N->getOperand(0);
28266
28267 return SDValue();
28268}
28269
28270SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
28271 SDValue Chain = N->getOperand(0);
28272 SDValue Ptr = N->getOperand(1);
28273 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28274
28275 // Check if the memory, where FP state is written to, is used only in a single
28276 // load operation.
28277 LoadSDNode *LdNode = nullptr;
28278 for (auto *U : Ptr->users()) {
28279 if (U == N)
28280 continue;
28281 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
28282 if (LdNode && LdNode != Ld)
28283 return SDValue();
28284 LdNode = Ld;
28285 continue;
28286 }
28287 return SDValue();
28288 }
28289 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28290 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28292 return SDValue();
28293
28294 // Check if the loaded value is used only in a store operation.
28295 StoreSDNode *StNode = nullptr;
28296 for (SDUse &U : LdNode->uses()) {
28297 if (U.getResNo() == 0) {
28298 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
28299 if (StNode)
28300 return SDValue();
28301 StNode = St;
28302 } else {
28303 return SDValue();
28304 }
28305 }
28306 }
28307 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28308 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28309 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28310 return SDValue();
28311
28312 // Create new node GET_FPENV_MEM, which uses the store address to write FP
28313 // environment.
28314 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
28315 StNode->getMemOperand());
28316 CombineTo(StNode, Res, false);
28317 return Res;
28318}
28319
28320SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
28321 SDValue Chain = N->getOperand(0);
28322 SDValue Ptr = N->getOperand(1);
28323 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28324
28325 // Check if the address of FP state is used also in a store operation only.
28326 StoreSDNode *StNode = nullptr;
28327 for (auto *U : Ptr->users()) {
28328 if (U == N)
28329 continue;
28330 if (auto *St = dyn_cast<StoreSDNode>(U)) {
28331 if (StNode && StNode != St)
28332 return SDValue();
28333 StNode = St;
28334 continue;
28335 }
28336 return SDValue();
28337 }
28338 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28339 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28340 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
28341 return SDValue();
28342
28343 // Check if the stored value is loaded from some location and the loaded
28344 // value is used only in the store operation.
28345 SDValue StValue = StNode->getValue();
28346 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
28347 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28348 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28349 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28350 return SDValue();
28351
28352 // Create new node SET_FPENV_MEM, which uses the load address to read FP
28353 // environment.
28354 SDValue Res =
28355 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
28356 LdNode->getMemOperand());
28357 return Res;
28358}
28359
28360/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
28361/// with the destination vector and a zero vector.
28362/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
28363/// vector_shuffle V, Zero, <0, 4, 2, 4>
28364SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
28365 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
28366
28367 EVT VT = N->getValueType(0);
28368 SDValue LHS = N->getOperand(0);
28369 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
28370 SDLoc DL(N);
28371
28372 // Make sure we're not running after operation legalization where it
28373 // may have custom lowered the vector shuffles.
28374 if (LegalOperations)
28375 return SDValue();
28376
28377 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
28378 return SDValue();
28379
28380 EVT RVT = RHS.getValueType();
28381 unsigned NumElts = RHS.getNumOperands();
28382
28383 // Attempt to create a valid clear mask, splitting the mask into
28384 // sub elements and checking to see if each is
28385 // all zeros or all ones - suitable for shuffle masking.
28386 auto BuildClearMask = [&](int Split) {
28387 int NumSubElts = NumElts * Split;
28388 int NumSubBits = RVT.getScalarSizeInBits() / Split;
28389
28390 SmallVector<int, 8> Indices;
28391 for (int i = 0; i != NumSubElts; ++i) {
28392 int EltIdx = i / Split;
28393 int SubIdx = i % Split;
28394 SDValue Elt = RHS.getOperand(EltIdx);
28395 // X & undef --> 0 (not undef). So this lane must be converted to choose
28396 // from the zero constant vector (same as if the element had all 0-bits).
28397 if (Elt.isUndef()) {
28398 Indices.push_back(i + NumSubElts);
28399 continue;
28400 }
28401
28402 std::optional<APInt> Bits = Elt->bitcastToAPInt();
28403 if (!Bits)
28404 return SDValue();
28405
28406 // Extract the sub element from the constant bit mask.
28407 if (DAG.getDataLayout().isBigEndian())
28408 *Bits =
28409 Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
28410 else
28411 *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits);
28412
28413 if (Bits->isAllOnes())
28414 Indices.push_back(i);
28415 else if (*Bits == 0)
28416 Indices.push_back(i + NumSubElts);
28417 else
28418 return SDValue();
28419 }
28420
28421 // Let's see if the target supports this vector_shuffle.
28422 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
28423 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
28424 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
28425 return SDValue();
28426
28427 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
28428 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
28429 DAG.getBitcast(ClearVT, LHS),
28430 Zero, Indices));
28431 };
28432
28433 // Determine maximum split level (byte level masking).
28434 int MaxSplit = 1;
28435 if (RVT.getScalarSizeInBits() % 8 == 0)
28436 MaxSplit = RVT.getScalarSizeInBits() / 8;
28437
28438 for (int Split = 1; Split <= MaxSplit; ++Split)
28439 if (RVT.getScalarSizeInBits() % Split == 0)
28440 if (SDValue S = BuildClearMask(Split))
28441 return S;
28442
28443 return SDValue();
28444}
28445
28446/// If a vector binop is performed on splat values, it may be profitable to
28447/// extract, scalarize, and insert/splat.
28449 const SDLoc &DL, bool LegalTypes) {
28450 SDValue N0 = N->getOperand(0);
28451 SDValue N1 = N->getOperand(1);
28452 unsigned Opcode = N->getOpcode();
28453 EVT VT = N->getValueType(0);
28454 EVT EltVT = VT.getVectorElementType();
28455 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28456
28457 // TODO: Remove/replace the extract cost check? If the elements are available
28458 // as scalars, then there may be no extract cost. Should we ask if
28459 // inserting a scalar back into a vector is cheap instead?
28460 int Index0, Index1;
28461 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28462 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
28463 // Extract element from splat_vector should be free.
28464 // TODO: use DAG.isSplatValue instead?
28465 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
28467 if (!Src0 || !Src1 || Index0 != Index1 ||
28468 Src0.getValueType().getVectorElementType() != EltVT ||
28469 Src1.getValueType().getVectorElementType() != EltVT ||
28470 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
28471 // If before type legalization, allow scalar types that will eventually be
28472 // made legal.
28474 Opcode, LegalTypes
28475 ? EltVT
28476 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
28477 return SDValue();
28478
28479 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
28480 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
28481 return SDValue();
28482
28483 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
28484 // All but one element should have an undef input, which will fold to a
28485 // constant or undef. Avoid splatting which would over-define potentially
28486 // undefined elements.
28487
28488 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
28489 // build_vec ..undef, (bo X, Y), undef...
28490 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
28491 DAG.ExtractVectorElements(Src0, EltsX);
28492 DAG.ExtractVectorElements(Src1, EltsY);
28493
28494 for (auto [X, Y] : zip(EltsX, EltsY))
28495 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
28496 return DAG.getBuildVector(VT, DL, EltsResult);
28497 }
28498
28499 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28500 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
28501 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
28502 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
28503
28504 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
28505 return DAG.getSplat(VT, DL, ScalarBO);
28506}
28507
28508/// Visit a vector cast operation, like FP_EXTEND.
28509SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
28510 EVT VT = N->getValueType(0);
28511 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
28512 EVT EltVT = VT.getVectorElementType();
28513 unsigned Opcode = N->getOpcode();
28514
28515 SDValue N0 = N->getOperand(0);
28516 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28517
28518 // TODO: promote operation might be also good here?
28519 int Index0;
28520 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28521 if (Src0 &&
28522 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
28523 TLI.isExtractVecEltCheap(VT, Index0)) &&
28524 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
28525 TLI.preferScalarizeSplat(N)) {
28526 EVT SrcVT = N0.getValueType();
28527 EVT SrcEltVT = SrcVT.getVectorElementType();
28528 if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) {
28529 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28530 SDValue Elt =
28531 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
28532 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
28533 if (VT.isScalableVector())
28534 return DAG.getSplatVector(VT, DL, ScalarBO);
28536 return DAG.getBuildVector(VT, DL, Ops);
28537 }
28538 }
28539
28540 return SDValue();
28541}
28542
28543/// Visit a binary vector operation, like ADD.
28544SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
28545 EVT VT = N->getValueType(0);
28546 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
28547
28548 SDValue LHS = N->getOperand(0);
28549 SDValue RHS = N->getOperand(1);
28550 unsigned Opcode = N->getOpcode();
28551 SDNodeFlags Flags = N->getFlags();
28552
28553 // Move unary shuffles with identical masks after a vector binop:
28554 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
28555 // --> shuffle (VBinOp A, B), Undef, Mask
28556 // This does not require type legality checks because we are creating the
28557 // same types of operations that are in the original sequence. We do have to
28558 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
28559 // though. This code is adapted from the identical transform in instcombine.
28560 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
28561 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
28562 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
28563 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
28564 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
28565 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
28566 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
28567 RHS.getOperand(0), Flags);
28568 SDValue UndefV = LHS.getOperand(1);
28569 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
28570 }
28571
28572 // Try to sink a splat shuffle after a binop with a uniform constant.
28573 // This is limited to cases where neither the shuffle nor the constant have
28574 // undefined elements because that could be poison-unsafe or inhibit
28575 // demanded elements analysis. It is further limited to not change a splat
28576 // of an inserted scalar because that may be optimized better by
28577 // load-folding or other target-specific behaviors.
28578 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
28579 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
28580 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28581 // binop (splat X), (splat C) --> splat (binop X, C)
28582 SDValue X = Shuf0->getOperand(0);
28583 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
28584 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28585 Shuf0->getMask());
28586 }
28587 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
28588 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
28589 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28590 // binop (splat C), (splat X) --> splat (binop C, X)
28591 SDValue X = Shuf1->getOperand(0);
28592 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
28593 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28594 Shuf1->getMask());
28595 }
28596 }
28597
28598 // The following pattern is likely to emerge with vector reduction ops. Moving
28599 // the binary operation ahead of insertion may allow using a narrower vector
28600 // instruction that has better performance than the wide version of the op:
28601 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
28602 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
28603 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
28604 LHS.getOperand(2) == RHS.getOperand(2) &&
28605 (LHS.hasOneUse() || RHS.hasOneUse())) {
28606 SDValue X = LHS.getOperand(1);
28607 SDValue Y = RHS.getOperand(1);
28608 SDValue Z = LHS.getOperand(2);
28609 EVT NarrowVT = X.getValueType();
28610 if (NarrowVT == Y.getValueType() &&
28611 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
28612 LegalOperations)) {
28613 // (binop undef, undef) may not return undef, so compute that result.
28614 SDValue VecC =
28615 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
28616 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
28617 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
28618 }
28619 }
28620
28621 // Make sure all but the first op are undef or constant.
28622 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
28623 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
28624 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
28625 return Op.isUndef() ||
28626 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
28627 });
28628 };
28629
28630 // The following pattern is likely to emerge with vector reduction ops. Moving
28631 // the binary operation ahead of the concat may allow using a narrower vector
28632 // instruction that has better performance than the wide version of the op:
28633 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
28634 // concat (VBinOp X, Y), VecC
28635 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
28636 (LHS.hasOneUse() || RHS.hasOneUse())) {
28637 EVT NarrowVT = LHS.getOperand(0).getValueType();
28638 if (NarrowVT == RHS.getOperand(0).getValueType() &&
28639 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
28640 unsigned NumOperands = LHS.getNumOperands();
28641 SmallVector<SDValue, 4> ConcatOps;
28642 for (unsigned i = 0; i != NumOperands; ++i) {
28643 // This constant fold for operands 1 and up.
28644 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
28645 RHS.getOperand(i)));
28646 }
28647
28648 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
28649 }
28650 }
28651
28652 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
28653 return V;
28654
28655 return SDValue();
28656}
28657
28658SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
28659 SDValue N2) {
28660 assert(N0.getOpcode() == ISD::SETCC &&
28661 "First argument must be a SetCC node!");
28662
28663 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
28664 cast<CondCodeSDNode>(N0.getOperand(2))->get());
28665
28666 // If we got a simplified select_cc node back from SimplifySelectCC, then
28667 // break it down into a new SETCC node, and a new SELECT node, and then return
28668 // the SELECT node, since we were called with a SELECT node.
28669 if (SCC.getNode()) {
28670 // Check to see if we got a select_cc back (to turn into setcc/select).
28671 // Otherwise, just return whatever node we got back, like fabs.
28672 if (SCC.getOpcode() == ISD::SELECT_CC) {
28673 const SDNodeFlags Flags = N0->getFlags();
28674 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
28675 N0.getValueType(),
28676 SCC.getOperand(0), SCC.getOperand(1),
28677 SCC.getOperand(4), Flags);
28678 AddToWorklist(SETCC.getNode());
28679 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
28680 SCC.getOperand(2), SCC.getOperand(3), Flags);
28681 }
28682
28683 return SCC;
28684 }
28685 return SDValue();
28686}
28687
28688/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
28689/// being selected between, see if we can simplify the select. Callers of this
28690/// should assume that TheSelect is deleted if this returns true. As such, they
28691/// should return the appropriate thing (e.g. the node) back to the top-level of
28692/// the DAG combiner loop to avoid it being looked at.
28693bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
28694 SDValue RHS) {
28695 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28696 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
28697 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
28698 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
28699 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
28700 SDValue Sqrt = RHS;
28701 ISD::CondCode CC;
28702 SDValue CmpLHS;
28703 const ConstantFPSDNode *Zero = nullptr;
28704
28705 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
28706 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
28707 CmpLHS = TheSelect->getOperand(0);
28708 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
28709 } else {
28710 // SELECT or VSELECT
28711 SDValue Cmp = TheSelect->getOperand(0);
28712 if (Cmp.getOpcode() == ISD::SETCC) {
28713 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
28714 CmpLHS = Cmp.getOperand(0);
28715 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
28716 }
28717 }
28718 if (Zero && Zero->isZero() &&
28719 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
28720 CC == ISD::SETULT || CC == ISD::SETLT)) {
28721 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28722 CombineTo(TheSelect, Sqrt);
28723 return true;
28724 }
28725 }
28726 }
28727 // Cannot simplify select with vector condition
28728 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
28729
28730 // If this is a select from two identical things, try to pull the operation
28731 // through the select.
28732 if (LHS.getOpcode() != RHS.getOpcode() ||
28733 !LHS.hasOneUse() || !RHS.hasOneUse())
28734 return false;
28735
28736 // If this is a load and the token chain is identical, replace the select
28737 // of two loads with a load through a select of the address to load from.
28738 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
28739 // constants have been dropped into the constant pool.
28740 if (LHS.getOpcode() == ISD::LOAD) {
28741 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
28742 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
28743
28744 // Token chains must be identical.
28745 if (LHS.getOperand(0) != RHS.getOperand(0) ||
28746 // Do not let this transformation reduce the number of volatile loads.
28747 // Be conservative for atomics for the moment
28748 // TODO: This does appear to be legal for unordered atomics (see D66309)
28749 !LLD->isSimple() || !RLD->isSimple() ||
28750 // FIXME: If either is a pre/post inc/dec load,
28751 // we'd need to split out the address adjustment.
28752 LLD->isIndexed() || RLD->isIndexed() ||
28753 // If this is an EXTLOAD, the VT's must match.
28754 LLD->getMemoryVT() != RLD->getMemoryVT() ||
28755 // If this is an EXTLOAD, the kind of extension must match.
28756 (LLD->getExtensionType() != RLD->getExtensionType() &&
28757 // The only exception is if one of the extensions is anyext.
28758 LLD->getExtensionType() != ISD::EXTLOAD &&
28759 RLD->getExtensionType() != ISD::EXTLOAD) ||
28760 // FIXME: this discards src value information. This is
28761 // over-conservative. It would be beneficial to be able to remember
28762 // both potential memory locations. Since we are discarding
28763 // src value info, don't do the transformation if the memory
28764 // locations are not in the default address space.
28765 LLD->getPointerInfo().getAddrSpace() != 0 ||
28766 RLD->getPointerInfo().getAddrSpace() != 0 ||
28767 // We can't produce a CMOV of a TargetFrameIndex since we won't
28768 // generate the address generation required.
28771 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
28772 LLD->getBasePtr().getValueType()))
28773 return false;
28774
28775 // The loads must not depend on one another.
28776 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
28777 return false;
28778
28779 // Check that the select condition doesn't reach either load. If so,
28780 // folding this will induce a cycle into the DAG. If not, this is safe to
28781 // xform, so create a select of the addresses.
28782
28783 SmallPtrSet<const SDNode *, 32> Visited;
28785
28786 // Always fail if LLD and RLD are not independent. TheSelect is a
28787 // predecessor to all Nodes in question so we need not search past it.
28788
28789 Visited.insert(TheSelect);
28790 Worklist.push_back(LLD);
28791 Worklist.push_back(RLD);
28792
28793 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
28794 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
28795 return false;
28796
28797 SDValue Addr;
28798 if (TheSelect->getOpcode() == ISD::SELECT) {
28799 // We cannot do this optimization if any pair of {RLD, LLD} is a
28800 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
28801 // Loads, we only need to check if CondNode is a successor to one of the
28802 // loads. We can further avoid this if there's no use of their chain
28803 // value.
28804 SDNode *CondNode = TheSelect->getOperand(0).getNode();
28805 Worklist.push_back(CondNode);
28806
28807 if ((LLD->hasAnyUseOfValue(1) &&
28808 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28809 (RLD->hasAnyUseOfValue(1) &&
28810 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28811 return false;
28812
28813 Addr = DAG.getSelect(SDLoc(TheSelect),
28814 LLD->getBasePtr().getValueType(),
28815 TheSelect->getOperand(0), LLD->getBasePtr(),
28816 RLD->getBasePtr());
28817 } else { // Otherwise SELECT_CC
28818 // We cannot do this optimization if any pair of {RLD, LLD} is a
28819 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
28820 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
28821 // one of the loads. We can further avoid this if there's no use of their
28822 // chain value.
28823
28824 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
28825 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
28826 Worklist.push_back(CondLHS);
28827 Worklist.push_back(CondRHS);
28828
28829 if ((LLD->hasAnyUseOfValue(1) &&
28830 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
28831 (RLD->hasAnyUseOfValue(1) &&
28832 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
28833 return false;
28834
28835 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
28836 LLD->getBasePtr().getValueType(),
28837 TheSelect->getOperand(0),
28838 TheSelect->getOperand(1),
28839 LLD->getBasePtr(), RLD->getBasePtr(),
28840 TheSelect->getOperand(4));
28841 }
28842
28843 SDValue Load;
28844 // It is safe to replace the two loads if they have different alignments,
28845 // but the new load must be the minimum (most restrictive) alignment of the
28846 // inputs.
28847 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
28848 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
28849 if (!RLD->isInvariant())
28850 MMOFlags &= ~MachineMemOperand::MOInvariant;
28851 if (!RLD->isDereferenceable())
28852 MMOFlags &= ~MachineMemOperand::MODereferenceable;
28853 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
28854 // FIXME: Discards pointer and AA info.
28855 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
28856 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
28857 MMOFlags);
28858 } else {
28859 // FIXME: Discards pointer and AA info.
28860 Load = DAG.getExtLoad(
28862 : LLD->getExtensionType(),
28863 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
28864 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
28865 }
28866
28867 // Users of the select now use the result of the load.
28868 CombineTo(TheSelect, Load);
28869
28870 // Users of the old loads now use the new load's chain. We know the
28871 // old-load value is dead now.
28872 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
28873 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
28874 return true;
28875 }
28876
28877 return false;
28878}
28879
28880/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
28881/// bitwise 'and'.
28882SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
28883 SDValue N1, SDValue N2, SDValue N3,
28884 ISD::CondCode CC) {
28885 // If this is a select where the false operand is zero and the compare is a
28886 // check of the sign bit, see if we can perform the "gzip trick":
28887 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
28888 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
28889 EVT XType = N0.getValueType();
28890 EVT AType = N2.getValueType();
28891 if (!isNullConstant(N3) || !XType.bitsGE(AType))
28892 return SDValue();
28893
28894 // If the comparison is testing for a positive value, we have to invert
28895 // the sign bit mask, so only do that transform if the target has a bitwise
28896 // 'and not' instruction (the invert is free).
28897 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
28898 // (X > -1) ? A : 0
28899 // (X > 0) ? X : 0 <-- This is canonical signed max.
28900 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
28901 return SDValue();
28902 } else if (CC == ISD::SETLT) {
28903 // (X < 0) ? A : 0
28904 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
28905 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
28906 return SDValue();
28907 } else {
28908 return SDValue();
28909 }
28910
28911 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
28912 // constant.
28913 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
28914 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
28915 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
28916 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
28917 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28918 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
28919 AddToWorklist(Shift.getNode());
28920
28921 if (XType.bitsGT(AType)) {
28922 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28923 AddToWorklist(Shift.getNode());
28924 }
28925
28926 if (CC == ISD::SETGT)
28927 Shift = DAG.getNOT(DL, Shift, AType);
28928
28929 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28930 }
28931 }
28932
28933 unsigned ShCt = XType.getSizeInBits() - 1;
28934 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
28935 return SDValue();
28936
28937 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
28938 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
28939 AddToWorklist(Shift.getNode());
28940
28941 if (XType.bitsGT(AType)) {
28942 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
28943 AddToWorklist(Shift.getNode());
28944 }
28945
28946 if (CC == ISD::SETGT)
28947 Shift = DAG.getNOT(DL, Shift, AType);
28948
28949 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
28950}
28951
28952// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
28953SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
28954 SDValue N0 = N->getOperand(0);
28955 SDValue N1 = N->getOperand(1);
28956 SDValue N2 = N->getOperand(2);
28957 SDLoc DL(N);
28958
28959 unsigned BinOpc = N1.getOpcode();
28960 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
28961 (N1.getResNo() != N2.getResNo()))
28962 return SDValue();
28963
28964 // The use checks are intentionally on SDNode because we may be dealing
28965 // with opcodes that produce more than one SDValue.
28966 // TODO: Do we really need to check N0 (the condition operand of the select)?
28967 // But removing that clause could cause an infinite loop...
28968 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
28969 return SDValue();
28970
28971 // Binops may include opcodes that return multiple values, so all values
28972 // must be created/propagated from the newly created binops below.
28973 SDVTList OpVTs = N1->getVTList();
28974
28975 // Fold select(cond, binop(x, y), binop(z, y))
28976 // --> binop(select(cond, x, z), y)
28977 if (N1.getOperand(1) == N2.getOperand(1)) {
28978 SDValue N10 = N1.getOperand(0);
28979 SDValue N20 = N2.getOperand(0);
28980 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
28981 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
28982 SDValue NewBinOp =
28983 DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags);
28984 return SDValue(NewBinOp.getNode(), N1.getResNo());
28985 }
28986
28987 // Fold select(cond, binop(x, y), binop(x, z))
28988 // --> binop(x, select(cond, y, z))
28989 if (N1.getOperand(0) == N2.getOperand(0)) {
28990 SDValue N11 = N1.getOperand(1);
28991 SDValue N21 = N2.getOperand(1);
28992 // Second op VT might be different (e.g. shift amount type)
28993 if (N11.getValueType() == N21.getValueType()) {
28994 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
28995 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
28996 SDValue NewBinOp =
28997 DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags);
28998 return SDValue(NewBinOp.getNode(), N1.getResNo());
28999 }
29000 }
29001
29002 // TODO: Handle isCommutativeBinOp patterns as well?
29003 return SDValue();
29004}
29005
29006// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
29007SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
29008 SDValue N0 = N->getOperand(0);
29009 EVT VT = N->getValueType(0);
29010 bool IsFabs = N->getOpcode() == ISD::FABS;
29011 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
29012
29013 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
29014 return SDValue();
29015
29016 SDValue Int = N0.getOperand(0);
29017 EVT IntVT = Int.getValueType();
29018
29019 // The operand to cast should be integer.
29020 if (!IntVT.isInteger() || IntVT.isVector())
29021 return SDValue();
29022
29023 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
29024 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
29025 APInt SignMask;
29026 if (N0.getValueType().isVector()) {
29027 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
29028 // 0x7f...) per element and splat it.
29030 if (IsFabs)
29031 SignMask = ~SignMask;
29032 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
29033 } else {
29034 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
29035 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
29036 if (IsFabs)
29037 SignMask = ~SignMask;
29038 }
29039 SDLoc DL(N0);
29040 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
29041 DAG.getConstant(SignMask, DL, IntVT));
29042 AddToWorklist(Int.getNode());
29043 return DAG.getBitcast(VT, Int);
29044}
29045
29046/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
29047/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
29048/// in it. This may be a win when the constant is not otherwise available
29049/// because it replaces two constant pool loads with one.
29050SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
29051 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
29052 ISD::CondCode CC) {
29054 return SDValue();
29055
29056 // If we are before legalize types, we want the other legalization to happen
29057 // first (for example, to avoid messing with soft float).
29058 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
29059 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
29060 EVT VT = N2.getValueType();
29061 if (!TV || !FV || !TLI.isTypeLegal(VT))
29062 return SDValue();
29063
29064 // If a constant can be materialized without loads, this does not make sense.
29066 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
29067 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
29068 return SDValue();
29069
29070 // If both constants have multiple uses, then we won't need to do an extra
29071 // load. The values are likely around in registers for other users.
29072 if (!TV->hasOneUse() && !FV->hasOneUse())
29073 return SDValue();
29074
29075 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
29076 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
29077 Type *FPTy = Elts[0]->getType();
29078 const DataLayout &TD = DAG.getDataLayout();
29079
29080 // Create a ConstantArray of the two constants.
29081 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
29082 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
29083 TD.getPrefTypeAlign(FPTy));
29084 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
29085
29086 // Get offsets to the 0 and 1 elements of the array, so we can select between
29087 // them.
29088 SDValue Zero = DAG.getIntPtrConstant(0, DL);
29089 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
29090 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
29091 SDValue Cond =
29092 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
29093 AddToWorklist(Cond.getNode());
29094 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
29095 AddToWorklist(CstOffset.getNode());
29096 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
29097 AddToWorklist(CPIdx.getNode());
29098 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
29100 DAG.getMachineFunction()), Alignment);
29101}
29102
29103/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
29104/// where 'cond' is the comparison specified by CC.
29105SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
29106 SDValue N2, SDValue N3, ISD::CondCode CC,
29107 bool NotExtCompare) {
29108 // (x ? y : y) -> y.
29109 if (N2 == N3) return N2;
29110
29111 EVT CmpOpVT = N0.getValueType();
29112 EVT CmpResVT = getSetCCResultType(CmpOpVT);
29113 EVT VT = N2.getValueType();
29114 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
29115 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
29116 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
29117
29118 // Determine if the condition we're dealing with is constant.
29119 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
29120 AddToWorklist(SCC.getNode());
29121 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
29122 // fold select_cc true, x, y -> x
29123 // fold select_cc false, x, y -> y
29124 return !(SCCC->isZero()) ? N2 : N3;
29125 }
29126 }
29127
29128 if (SDValue V =
29129 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
29130 return V;
29131
29132 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
29133 return V;
29134
29135 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
29136 // where y is has a single bit set.
29137 // A plaintext description would be, we can turn the SELECT_CC into an AND
29138 // when the condition can be materialized as an all-ones register. Any
29139 // single bit-test can be materialized as an all-ones register with
29140 // shift-left and shift-right-arith.
29141 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
29142 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
29143 SDValue AndLHS = N0->getOperand(0);
29144 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
29145 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
29146 // Shift the tested bit over the sign bit.
29147 const APInt &AndMask = ConstAndRHS->getAPIntValue();
29148 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
29149 unsigned ShCt = AndMask.getBitWidth() - 1;
29150 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
29151 SDLoc(AndLHS));
29152 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
29153
29154 // Now arithmetic right shift it all the way over, so the result is
29155 // either all-ones, or zero.
29156 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
29157 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
29158
29159 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
29160 }
29161 }
29162 }
29163
29164 // fold select C, 16, 0 -> shl C, 4
29165 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
29166 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
29167
29168 if ((Fold || Swap) &&
29169 TLI.getBooleanContents(CmpOpVT) ==
29171 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
29173
29174 if (Swap) {
29175 CC = ISD::getSetCCInverse(CC, CmpOpVT);
29176 std::swap(N2C, N3C);
29177 }
29178
29179 // If the caller doesn't want us to simplify this into a zext of a compare,
29180 // don't do it.
29181 if (NotExtCompare && N2C->isOne())
29182 return SDValue();
29183
29184 SDValue Temp, SCC;
29185 // zext (setcc n0, n1)
29186 if (LegalTypes) {
29187 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
29188 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
29189 } else {
29190 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
29191 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
29192 }
29193
29194 AddToWorklist(SCC.getNode());
29195 AddToWorklist(Temp.getNode());
29196
29197 if (N2C->isOne())
29198 return Temp;
29199
29200 unsigned ShCt = N2C->getAPIntValue().logBase2();
29201 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
29202 return SDValue();
29203
29204 // shl setcc result by log2 n2c
29205 return DAG.getNode(
29206 ISD::SHL, DL, N2.getValueType(), Temp,
29207 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
29208 }
29209
29210 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
29211 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
29212 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
29213 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
29214 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
29215 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
29216 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
29217 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
29218 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
29219 SDValue ValueOnZero = N2;
29220 SDValue Count = N3;
29221 // If the condition is NE instead of E, swap the operands.
29222 if (CC == ISD::SETNE)
29223 std::swap(ValueOnZero, Count);
29224 // Check if the value on zero is a constant equal to the bits in the type.
29225 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
29226 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
29227 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
29228 // legal, combine to just cttz.
29229 if ((Count.getOpcode() == ISD::CTTZ ||
29230 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
29231 N0 == Count.getOperand(0) &&
29232 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
29233 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
29234 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
29235 // legal, combine to just ctlz.
29236 if ((Count.getOpcode() == ISD::CTLZ ||
29237 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
29238 N0 == Count.getOperand(0) &&
29239 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
29240 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
29241 }
29242 }
29243 }
29244
29245 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
29246 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
29247 if (!NotExtCompare && N1C && N2C && N3C &&
29248 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
29249 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
29250 (N1C->isZero() && CC == ISD::SETLT)) &&
29251 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
29252 SDValue ASHR =
29253 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29255 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29256 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29257 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
29258 }
29259
29260 // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
29261 if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
29262 N2C->isOne() && N3C->isAllOnes() &&
29263 !TLI.shouldAvoidTransformToShift(CmpOpVT,
29264 CmpOpVT.getScalarSizeInBits() - 1)) {
29265 SDValue ASHR =
29266 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29268 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29269 return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29270 DAG.getConstant(1, DL, VT));
29271 }
29272
29273 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29274 return S;
29275 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29276 return S;
29277 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
29278 return ABD;
29279
29280 return SDValue();
29281}
29282
29284 const TargetLowering &TLI) {
29285 // Match a pattern such as:
29286 // (X | (X >> C0) | (X >> C1) | ...) & Mask
29287 // This extracts contiguous parts of X and ORs them together before comparing.
29288 // We can optimize this so that we directly check (X & SomeMask) instead,
29289 // eliminating the shifts.
29290
29291 EVT VT = Root.getValueType();
29292
29293 // TODO: Support vectors?
29294 if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
29295 return SDValue();
29296
29297 SDValue N0 = Root.getOperand(0);
29298 SDValue N1 = Root.getOperand(1);
29299
29300 if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
29301 return SDValue();
29302
29303 APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
29304
29305 SDValue Src;
29306 const auto IsSrc = [&](SDValue V) {
29307 if (!Src) {
29308 Src = V;
29309 return true;
29310 }
29311
29312 return Src == V;
29313 };
29314
29315 SmallVector<SDValue> Worklist = {N0};
29316 APInt PartsMask(VT.getSizeInBits(), 0);
29317 while (!Worklist.empty()) {
29318 SDValue V = Worklist.pop_back_val();
29319 if (!V.hasOneUse() && (Src && Src != V))
29320 return SDValue();
29321
29322 if (V.getOpcode() == ISD::OR) {
29323 Worklist.push_back(V.getOperand(0));
29324 Worklist.push_back(V.getOperand(1));
29325 continue;
29326 }
29327
29328 if (V.getOpcode() == ISD::SRL) {
29329 SDValue ShiftSrc = V.getOperand(0);
29330 SDValue ShiftAmt = V.getOperand(1);
29331
29332 if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
29333 return SDValue();
29334
29335 auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
29336 if (ShiftAmtVal > RootMask.getBitWidth())
29337 return SDValue();
29338
29339 PartsMask |= (RootMask << ShiftAmtVal);
29340 continue;
29341 }
29342
29343 if (IsSrc(V)) {
29344 PartsMask |= RootMask;
29345 continue;
29346 }
29347
29348 return SDValue();
29349 }
29350
29351 if (!Src)
29352 return SDValue();
29353
29354 SDLoc DL(Root);
29355 return DAG.getNode(ISD::AND, DL, VT,
29356 {Src, DAG.getConstant(PartsMask, DL, VT)});
29357}
29358
29359/// This is a stub for TargetLowering::SimplifySetCC.
29360SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
29361 ISD::CondCode Cond, const SDLoc &DL,
29362 bool foldBooleans) {
29363 TargetLowering::DAGCombinerInfo
29364 DagCombineInfo(DAG, Level, false, this);
29365 if (SDValue C =
29366 TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
29367 return C;
29368
29370 isNullConstant(N1)) {
29371
29372 if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
29373 return DAG.getSetCC(DL, VT, Res, N1, Cond);
29374 }
29375
29376 return SDValue();
29377}
29378
29379/// Given an ISD::SDIV node expressing a divide by constant, return
29380/// a DAG expression to select that will generate the same value by multiplying
29381/// by a magic number.
29382/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29383SDValue DAGCombiner::BuildSDIV(SDNode *N) {
29384 // when optimising for minimum size, we don't want to expand a div to a mul
29385 // and a shift.
29387 return SDValue();
29388
29390 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29391 for (SDNode *N : Built)
29392 AddToWorklist(N);
29393 return S;
29394 }
29395
29396 return SDValue();
29397}
29398
29399/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
29400/// DAG expression that will generate the same value by right shifting.
29401SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
29402 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29403 if (!C)
29404 return SDValue();
29405
29406 // Avoid division by zero.
29407 if (C->isZero())
29408 return SDValue();
29409
29411 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
29412 for (SDNode *N : Built)
29413 AddToWorklist(N);
29414 return S;
29415 }
29416
29417 return SDValue();
29418}
29419
29420/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
29421/// expression that will generate the same value by multiplying by a magic
29422/// number.
29423/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29424SDValue DAGCombiner::BuildUDIV(SDNode *N) {
29425 // when optimising for minimum size, we don't want to expand a div to a mul
29426 // and a shift.
29428 return SDValue();
29429
29431 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29432 for (SDNode *N : Built)
29433 AddToWorklist(N);
29434 return S;
29435 }
29436
29437 return SDValue();
29438}
29439
29440/// Given an ISD::SREM node expressing a remainder by constant power of 2,
29441/// return a DAG expression that will generate the same value.
29442SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
29443 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29444 if (!C)
29445 return SDValue();
29446
29447 // Avoid division by zero.
29448 if (C->isZero())
29449 return SDValue();
29450
29452 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
29453 for (SDNode *N : Built)
29454 AddToWorklist(N);
29455 return S;
29456 }
29457
29458 return SDValue();
29459}
29460
29461// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
29462//
29463// Returns the node that represents `Log2(Op)`. This may create a new node. If
29464// we are unable to compute `Log2(Op)` its return `SDValue()`.
29465//
29466// All nodes will be created at `DL` and the output will be of type `VT`.
29467//
29468// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
29469// `AssumeNonZero` if this function should simply assume (not require proving
29470// `Op` is non-zero).
29472 SDValue Op, unsigned Depth,
29473 bool AssumeNonZero) {
29474 assert(VT.isInteger() && "Only integer types are supported!");
29475
29476 auto PeekThroughCastsAndTrunc = [](SDValue V) {
29477 while (true) {
29478 switch (V.getOpcode()) {
29479 case ISD::TRUNCATE:
29480 case ISD::ZERO_EXTEND:
29481 V = V.getOperand(0);
29482 break;
29483 default:
29484 return V;
29485 }
29486 }
29487 };
29488
29489 if (VT.isScalableVector())
29490 return SDValue();
29491
29492 Op = PeekThroughCastsAndTrunc(Op);
29493
29494 // Helper for determining whether a value is a power-2 constant scalar or a
29495 // vector of such elements.
29496 SmallVector<APInt> Pow2Constants;
29497 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
29498 if (C->isZero() || C->isOpaque())
29499 return false;
29500 // TODO: We may also be able to support negative powers of 2 here.
29501 if (C->getAPIntValue().isPowerOf2()) {
29502 Pow2Constants.emplace_back(C->getAPIntValue());
29503 return true;
29504 }
29505 return false;
29506 };
29507
29508 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
29509 if (!VT.isVector())
29510 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
29511 // We need to create a build vector
29512 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
29513 return DAG.getSplat(VT, DL,
29514 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
29515 VT.getScalarType()));
29516 SmallVector<SDValue> Log2Ops;
29517 for (const APInt &Pow2 : Pow2Constants)
29518 Log2Ops.emplace_back(
29519 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
29520 return DAG.getBuildVector(VT, DL, Log2Ops);
29521 }
29522
29523 if (Depth >= DAG.MaxRecursionDepth)
29524 return SDValue();
29525
29526 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
29527 // Peek through zero extend. We can't peek through truncates since this
29528 // function is called on a shift amount. We must ensure that all of the bits
29529 // above the original shift amount are zeroed by this function.
29530 while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
29531 ToCast = ToCast.getOperand(0);
29532 EVT CurVT = ToCast.getValueType();
29533 if (NewVT == CurVT)
29534 return ToCast;
29535
29536 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
29537 return DAG.getBitcast(NewVT, ToCast);
29538
29539 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
29540 };
29541
29542 // log2(X << Y) -> log2(X) + Y
29543 if (Op.getOpcode() == ISD::SHL) {
29544 // 1 << Y and X nuw/nsw << Y are all non-zero.
29545 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
29546 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
29547 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
29548 Depth + 1, AssumeNonZero))
29549 return DAG.getNode(ISD::ADD, DL, VT, LogX,
29550 CastToVT(VT, Op.getOperand(1)));
29551 }
29552
29553 // c ? X : Y -> c ? Log2(X) : Log2(Y)
29554 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
29555 Op.hasOneUse()) {
29556 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
29557 Depth + 1, AssumeNonZero))
29558 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
29559 Depth + 1, AssumeNonZero))
29560 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
29561 }
29562
29563 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
29564 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
29565 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
29566 Op.hasOneUse()) {
29567 // Use AssumeNonZero as false here. Otherwise we can hit case where
29568 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
29569 if (SDValue LogX =
29570 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
29571 /*AssumeNonZero*/ false))
29572 if (SDValue LogY =
29573 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
29574 /*AssumeNonZero*/ false))
29575 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
29576 }
29577
29578 return SDValue();
29579}
29580
29581/// Determines the LogBase2 value for a non-null input value using the
29582/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
29583SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
29584 bool KnownNonZero, bool InexpensiveOnly,
29585 std::optional<EVT> OutVT) {
29586 EVT VT = OutVT ? *OutVT : V.getValueType();
29587 SDValue InexpensiveLogBase2 =
29588 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
29589 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
29590 return InexpensiveLogBase2;
29591
29592 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
29593 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
29594 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
29595 return LogBase2;
29596}
29597
29598/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29599/// For the reciprocal, we need to find the zero of the function:
29600/// F(X) = 1/X - A [which has a zero at X = 1/A]
29601/// =>
29602/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
29603/// does not require additional intermediate precision]
29604/// For the last iteration, put numerator N into it to gain more precision:
29605/// Result = N X_i + X_i (N - N A X_i)
29606SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
29607 SDNodeFlags Flags) {
29608 if (LegalDAG)
29609 return SDValue();
29610
29611 // TODO: Handle extended types?
29612 EVT VT = Op.getValueType();
29613 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29614 VT.getScalarType() != MVT::f64)
29615 return SDValue();
29616
29617 // If estimates are explicitly disabled for this function, we're done.
29618 MachineFunction &MF = DAG.getMachineFunction();
29619 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
29620 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29621 return SDValue();
29622
29623 // Estimates may be explicitly enabled for this type with a custom number of
29624 // refinement steps.
29625 int Iterations = TLI.getDivRefinementSteps(VT, MF);
29626 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
29627 AddToWorklist(Est.getNode());
29628
29629 SDLoc DL(Op);
29630 if (Iterations) {
29631 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
29632
29633 // Newton iterations: Est = Est + Est (N - Arg * Est)
29634 // If this is the last iteration, also multiply by the numerator.
29635 for (int i = 0; i < Iterations; ++i) {
29636 SDValue MulEst = Est;
29637
29638 if (i == Iterations - 1) {
29639 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
29640 AddToWorklist(MulEst.getNode());
29641 }
29642
29643 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
29644 AddToWorklist(NewEst.getNode());
29645
29646 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
29647 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
29648 AddToWorklist(NewEst.getNode());
29649
29650 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29651 AddToWorklist(NewEst.getNode());
29652
29653 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
29654 AddToWorklist(Est.getNode());
29655 }
29656 } else {
29657 // If no iterations are available, multiply with N.
29658 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
29659 AddToWorklist(Est.getNode());
29660 }
29661
29662 return Est;
29663 }
29664
29665 return SDValue();
29666}
29667
29668/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29669/// For the reciprocal sqrt, we need to find the zero of the function:
29670/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29671/// =>
29672/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
29673/// As a result, we precompute A/2 prior to the iteration loop.
29674SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
29675 unsigned Iterations,
29676 SDNodeFlags Flags, bool Reciprocal) {
29677 EVT VT = Arg.getValueType();
29678 SDLoc DL(Arg);
29679 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
29680
29681 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
29682 // this entire sequence requires only one FP constant.
29683 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
29684 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
29685
29686 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
29687 for (unsigned i = 0; i < Iterations; ++i) {
29688 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
29689 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
29690 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
29691 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29692 }
29693
29694 // If non-reciprocal square root is requested, multiply the result by Arg.
29695 if (!Reciprocal)
29696 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
29697
29698 return Est;
29699}
29700
29701/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29702/// For the reciprocal sqrt, we need to find the zero of the function:
29703/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29704/// =>
29705/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
29706SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
29707 unsigned Iterations,
29708 SDNodeFlags Flags, bool Reciprocal) {
29709 EVT VT = Arg.getValueType();
29710 SDLoc DL(Arg);
29711 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
29712 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
29713
29714 // This routine must enter the loop below to work correctly
29715 // when (Reciprocal == false).
29716 assert(Iterations > 0);
29717
29718 // Newton iterations for reciprocal square root:
29719 // E = (E * -0.5) * ((A * E) * E + -3.0)
29720 for (unsigned i = 0; i < Iterations; ++i) {
29721 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
29722 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
29723 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
29724
29725 // When calculating a square root at the last iteration build:
29726 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
29727 // (notice a common subexpression)
29728 SDValue LHS;
29729 if (Reciprocal || (i + 1) < Iterations) {
29730 // RSQRT: LHS = (E * -0.5)
29731 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
29732 } else {
29733 // SQRT: LHS = (A * E) * -0.5
29734 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
29735 }
29736
29737 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
29738 }
29739
29740 return Est;
29741}
29742
29743/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
29744/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
29745/// Op can be zero.
29746SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
29747 bool Reciprocal) {
29748 if (LegalDAG)
29749 return SDValue();
29750
29751 // TODO: Handle extended types?
29752 EVT VT = Op.getValueType();
29753 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29754 VT.getScalarType() != MVT::f64)
29755 return SDValue();
29756
29757 // If estimates are explicitly disabled for this function, we're done.
29758 MachineFunction &MF = DAG.getMachineFunction();
29759 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
29760 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29761 return SDValue();
29762
29763 // Estimates may be explicitly enabled for this type with a custom number of
29764 // refinement steps.
29765 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
29766
29767 bool UseOneConstNR = false;
29768 if (SDValue Est =
29769 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
29770 Reciprocal)) {
29771 AddToWorklist(Est.getNode());
29772
29773 if (Iterations > 0)
29774 Est = UseOneConstNR
29775 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
29776 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
29777 if (!Reciprocal) {
29778 SDLoc DL(Op);
29779 // Try the target specific test first.
29780 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
29781
29782 // The estimate is now completely wrong if the input was exactly 0.0 or
29783 // possibly a denormal. Force the answer to 0.0 or value provided by
29784 // target for those cases.
29785 Est = DAG.getSelect(DL, VT, Test,
29786 TLI.getSqrtResultForDenormInput(Op, DAG), Est);
29787 }
29788 return Est;
29789 }
29790
29791 return SDValue();
29792}
29793
29794SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29795 return buildSqrtEstimateImpl(Op, Flags, true);
29796}
29797
29798SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
29799 return buildSqrtEstimateImpl(Op, Flags, false);
29800}
29801
29802/// Return true if there is any possibility that the two addresses overlap.
29803bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
29804
29805 struct MemUseCharacteristics {
29806 bool IsVolatile;
29807 bool IsAtomic;
29809 int64_t Offset;
29810 LocationSize NumBytes;
29811 MachineMemOperand *MMO;
29812 };
29813
29814 auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
29815 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
29816 int64_t Offset = 0;
29817 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
29818 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
29819 : (LSN->getAddressingMode() == ISD::PRE_DEC)
29820 ? -1 * C->getSExtValue()
29821 : 0;
29822 TypeSize Size = LSN->getMemoryVT().getStoreSize();
29823 return {LSN->isVolatile(), LSN->isAtomic(),
29824 LSN->getBasePtr(), Offset /*base offset*/,
29825 LocationSize::precise(Size), LSN->getMemOperand()};
29826 }
29827 if (const auto *LN = cast<LifetimeSDNode>(N)) {
29828 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
29829 return {false /*isVolatile*/,
29830 /*isAtomic*/ false,
29831 LN->getOperand(1),
29832 0,
29833 LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())),
29834 (MachineMemOperand *)nullptr};
29835 }
29836 // Default.
29837 return {false /*isvolatile*/,
29838 /*isAtomic*/ false,
29839 SDValue(),
29840 (int64_t)0 /*offset*/,
29842 (MachineMemOperand *)nullptr};
29843 };
29844
29845 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
29846 MUC1 = getCharacteristics(Op1);
29847
29848 // If they are to the same address, then they must be aliases.
29849 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
29850 MUC0.Offset == MUC1.Offset)
29851 return true;
29852
29853 // If they are both volatile then they cannot be reordered.
29854 if (MUC0.IsVolatile && MUC1.IsVolatile)
29855 return true;
29856
29857 // Be conservative about atomics for the moment
29858 // TODO: This is way overconservative for unordered atomics (see D66309)
29859 if (MUC0.IsAtomic && MUC1.IsAtomic)
29860 return true;
29861
29862 if (MUC0.MMO && MUC1.MMO) {
29863 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29864 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29865 return false;
29866 }
29867
29868 // If NumBytes is scalable and offset is not 0, conservatively return may
29869 // alias
29870 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
29871 MUC0.Offset != 0) ||
29872 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
29873 MUC1.Offset != 0))
29874 return true;
29875 // Try to prove that there is aliasing, or that there is no aliasing. Either
29876 // way, we can return now. If nothing can be proved, proceed with more tests.
29877 bool IsAlias;
29878 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
29879 DAG, IsAlias))
29880 return IsAlias;
29881
29882 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
29883 // either are not known.
29884 if (!MUC0.MMO || !MUC1.MMO)
29885 return true;
29886
29887 // If one operation reads from invariant memory, and the other may store, they
29888 // cannot alias. These should really be checking the equivalent of mayWrite,
29889 // but it only matters for memory nodes other than load /store.
29890 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
29891 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
29892 return false;
29893
29894 // If we know required SrcValue1 and SrcValue2 have relatively large
29895 // alignment compared to the size and offset of the access, we may be able
29896 // to prove they do not alias. This check is conservative for now to catch
29897 // cases created by splitting vector types, it only works when the offsets are
29898 // multiples of the size of the data.
29899 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
29900 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
29901 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
29902 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
29903 LocationSize Size0 = MUC0.NumBytes;
29904 LocationSize Size1 = MUC1.NumBytes;
29905
29906 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
29907 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
29908 !Size1.isScalable() && Size0 == Size1 &&
29909 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
29910 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
29911 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
29912 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
29913 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
29914
29915 // There is no overlap between these relatively aligned accesses of
29916 // similar size. Return no alias.
29917 if ((OffAlign0 + static_cast<int64_t>(
29918 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
29919 (OffAlign1 + static_cast<int64_t>(
29920 Size1.getValue().getKnownMinValue())) <= OffAlign0)
29921 return false;
29922 }
29923
29926 : DAG.getSubtarget().useAA();
29927#ifndef NDEBUG
29928 if (CombinerAAOnlyFunc.getNumOccurrences() &&
29930 UseAA = false;
29931#endif
29932
29933 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
29934 Size0.hasValue() && Size1.hasValue() &&
29935 // Can't represent a scalable size + fixed offset in LocationSize
29936 (!Size0.isScalable() || SrcValOffset0 == 0) &&
29937 (!Size1.isScalable() || SrcValOffset1 == 0)) {
29938 // Use alias analysis information.
29939 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
29940 int64_t Overlap0 =
29941 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
29942 int64_t Overlap1 =
29943 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
29944 LocationSize Loc0 =
29945 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
29946 LocationSize Loc1 =
29947 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
29948 if (BatchAA->isNoAlias(
29949 MemoryLocation(MUC0.MMO->getValue(), Loc0,
29950 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
29951 MemoryLocation(MUC1.MMO->getValue(), Loc1,
29952 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
29953 return false;
29954 }
29955
29956 // Otherwise we have to assume they alias.
29957 return true;
29958}
29959
29960/// Walk up chain skipping non-aliasing memory nodes,
29961/// looking for aliasing nodes and adding them to the Aliases vector.
29962void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
29963 SmallVectorImpl<SDValue> &Aliases) {
29964 SmallVector<SDValue, 8> Chains; // List of chains to visit.
29965 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
29966
29967 // Get alias information for node.
29968 // TODO: relax aliasing for unordered atomics (see D66309)
29969 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
29970
29971 // Starting off.
29972 Chains.push_back(OriginalChain);
29973 unsigned Depth = 0;
29974
29975 // Attempt to improve chain by a single step
29976 auto ImproveChain = [&](SDValue &C) -> bool {
29977 switch (C.getOpcode()) {
29978 case ISD::EntryToken:
29979 // No need to mark EntryToken.
29980 C = SDValue();
29981 return true;
29982 case ISD::LOAD:
29983 case ISD::STORE: {
29984 // Get alias information for C.
29985 // TODO: Relax aliasing for unordered atomics (see D66309)
29986 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
29987 cast<LSBaseSDNode>(C.getNode())->isSimple();
29988 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
29989 // Look further up the chain.
29990 C = C.getOperand(0);
29991 return true;
29992 }
29993 // Alias, so stop here.
29994 return false;
29995 }
29996
29997 case ISD::CopyFromReg:
29998 // Always forward past CopyFromReg.
29999 C = C.getOperand(0);
30000 return true;
30001
30002 case ISD::LIFETIME_START:
30003 case ISD::LIFETIME_END: {
30004 // We can forward past any lifetime start/end that can be proven not to
30005 // alias the memory access.
30006 if (!mayAlias(N, C.getNode())) {
30007 // Look further up the chain.
30008 C = C.getOperand(0);
30009 return true;
30010 }
30011 return false;
30012 }
30013 default:
30014 return false;
30015 }
30016 };
30017
30018 // Look at each chain and determine if it is an alias. If so, add it to the
30019 // aliases list. If not, then continue up the chain looking for the next
30020 // candidate.
30021 while (!Chains.empty()) {
30022 SDValue Chain = Chains.pop_back_val();
30023
30024 // Don't bother if we've seen Chain before.
30025 if (!Visited.insert(Chain.getNode()).second)
30026 continue;
30027
30028 // For TokenFactor nodes, look at each operand and only continue up the
30029 // chain until we reach the depth limit.
30030 //
30031 // FIXME: The depth check could be made to return the last non-aliasing
30032 // chain we found before we hit a tokenfactor rather than the original
30033 // chain.
30034 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
30035 Aliases.clear();
30036 Aliases.push_back(OriginalChain);
30037 return;
30038 }
30039
30040 if (Chain.getOpcode() == ISD::TokenFactor) {
30041 // We have to check each of the operands of the token factor for "small"
30042 // token factors, so we queue them up. Adding the operands to the queue
30043 // (stack) in reverse order maintains the original order and increases the
30044 // likelihood that getNode will find a matching token factor (CSE.)
30045 if (Chain.getNumOperands() > 16) {
30046 Aliases.push_back(Chain);
30047 continue;
30048 }
30049 for (unsigned n = Chain.getNumOperands(); n;)
30050 Chains.push_back(Chain.getOperand(--n));
30051 ++Depth;
30052 continue;
30053 }
30054 // Everything else
30055 if (ImproveChain(Chain)) {
30056 // Updated Chain Found, Consider new chain if one exists.
30057 if (Chain.getNode())
30058 Chains.push_back(Chain);
30059 ++Depth;
30060 continue;
30061 }
30062 // No Improved Chain Possible, treat as Alias.
30063 Aliases.push_back(Chain);
30064 }
30065}
30066
30067/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
30068/// (aliasing node.)
30069SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
30070 if (OptLevel == CodeGenOptLevel::None)
30071 return OldChain;
30072
30073 // Ops for replacing token factor.
30075
30076 // Accumulate all the aliases to this node.
30077 GatherAllAliases(N, OldChain, Aliases);
30078
30079 // If no operands then chain to entry token.
30080 if (Aliases.empty())
30081 return DAG.getEntryNode();
30082
30083 // If a single operand then chain to it. We don't need to revisit it.
30084 if (Aliases.size() == 1)
30085 return Aliases[0];
30086
30087 // Construct a custom tailored token factor.
30088 return DAG.getTokenFactor(SDLoc(N), Aliases);
30089}
30090
30091// This function tries to collect a bunch of potentially interesting
30092// nodes to improve the chains of, all at once. This might seem
30093// redundant, as this function gets called when visiting every store
30094// node, so why not let the work be done on each store as it's visited?
30095//
30096// I believe this is mainly important because mergeConsecutiveStores
30097// is unable to deal with merging stores of different sizes, so unless
30098// we improve the chains of all the potential candidates up-front
30099// before running mergeConsecutiveStores, it might only see some of
30100// the nodes that will eventually be candidates, and then not be able
30101// to go from a partially-merged state to the desired final
30102// fully-merged state.
30103
30104bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
30105 SmallVector<StoreSDNode *, 8> ChainedStores;
30106 StoreSDNode *STChain = St;
30107 // Intervals records which offsets from BaseIndex have been covered. In
30108 // the common case, every store writes to the immediately previous address
30109 // space and thus merged with the previous interval at insertion time.
30110
30111 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
30112 IntervalMapHalfOpenInfo<int64_t>>;
30113 IMap::Allocator A;
30114 IMap Intervals(A);
30115
30116 // This holds the base pointer, index, and the offset in bytes from the base
30117 // pointer.
30118 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30119
30120 // We must have a base and an offset.
30121 if (!BasePtr.getBase().getNode())
30122 return false;
30123
30124 // Do not handle stores to undef base pointers.
30125 if (BasePtr.getBase().isUndef())
30126 return false;
30127
30128 // Do not handle stores to opaque types
30129 if (St->getMemoryVT().isZeroSized())
30130 return false;
30131
30132 // BaseIndexOffset assumes that offsets are fixed-size, which
30133 // is not valid for scalable vectors where the offsets are
30134 // scaled by `vscale`, so bail out early.
30135 if (St->getMemoryVT().isScalableVT())
30136 return false;
30137
30138 // Add ST's interval.
30139 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
30140 std::monostate{});
30141
30142 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
30143 if (Chain->getMemoryVT().isScalableVector())
30144 return false;
30145
30146 // If the chain has more than one use, then we can't reorder the mem ops.
30147 if (!SDValue(Chain, 0)->hasOneUse())
30148 break;
30149 // TODO: Relax for unordered atomics (see D66309)
30150 if (!Chain->isSimple() || Chain->isIndexed())
30151 break;
30152
30153 // Find the base pointer and offset for this memory node.
30154 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
30155 // Check that the base pointer is the same as the original one.
30156 int64_t Offset;
30157 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
30158 break;
30159 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
30160 // Make sure we don't overlap with other intervals by checking the ones to
30161 // the left or right before inserting.
30162 auto I = Intervals.find(Offset);
30163 // If there's a next interval, we should end before it.
30164 if (I != Intervals.end() && I.start() < (Offset + Length))
30165 break;
30166 // If there's a previous interval, we should start after it.
30167 if (I != Intervals.begin() && (--I).stop() <= Offset)
30168 break;
30169 Intervals.insert(Offset, Offset + Length, std::monostate{});
30170
30171 ChainedStores.push_back(Chain);
30172 STChain = Chain;
30173 }
30174
30175 // If we didn't find a chained store, exit.
30176 if (ChainedStores.empty())
30177 return false;
30178
30179 // Improve all chained stores (St and ChainedStores members) starting from
30180 // where the store chain ended and return single TokenFactor.
30181 SDValue NewChain = STChain->getChain();
30183 for (unsigned I = ChainedStores.size(); I;) {
30184 StoreSDNode *S = ChainedStores[--I];
30185 SDValue BetterChain = FindBetterChain(S, NewChain);
30187 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
30188 TFOps.push_back(SDValue(S, 0));
30189 ChainedStores[I] = S;
30190 }
30191
30192 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
30193 SDValue BetterChain = FindBetterChain(St, NewChain);
30194 SDValue NewST;
30195 if (St->isTruncatingStore())
30196 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
30197 St->getBasePtr(), St->getMemoryVT(),
30198 St->getMemOperand());
30199 else
30200 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
30201 St->getBasePtr(), St->getMemOperand());
30202
30203 TFOps.push_back(NewST);
30204
30205 // If we improved every element of TFOps, then we've lost the dependence on
30206 // NewChain to successors of St and we need to add it back to TFOps. Do so at
30207 // the beginning to keep relative order consistent with FindBetterChains.
30208 auto hasImprovedChain = [&](SDValue ST) -> bool {
30209 return ST->getOperand(0) != NewChain;
30210 };
30211 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
30212 if (AddNewChain)
30213 TFOps.insert(TFOps.begin(), NewChain);
30214
30215 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
30216 CombineTo(St, TF);
30217
30218 // Add TF and its operands to the worklist.
30219 AddToWorklist(TF.getNode());
30220 for (const SDValue &Op : TF->ops())
30221 AddToWorklist(Op.getNode());
30222 AddToWorklist(STChain);
30223 return true;
30224}
30225
30226bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
30227 if (OptLevel == CodeGenOptLevel::None)
30228 return false;
30229
30230 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30231
30232 // We must have a base and an offset.
30233 if (!BasePtr.getBase().getNode())
30234 return false;
30235
30236 // Do not handle stores to undef base pointers.
30237 if (BasePtr.getBase().isUndef())
30238 return false;
30239
30240 // Directly improve a chain of disjoint stores starting at St.
30241 if (parallelizeChainedStores(St))
30242 return true;
30243
30244 // Improve St's Chain..
30245 SDValue BetterChain = FindBetterChain(St, St->getChain());
30246 if (St->getChain() != BetterChain) {
30247 replaceStoreChain(St, BetterChain);
30248 return true;
30249 }
30250 return false;
30251}
30252
30253/// This is the entry point for the file.
30255 CodeGenOptLevel OptLevel) {
30256 /// This is the main entry point to this class.
30257 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
30258}
return SDValue()
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, const SDLoc &Dl)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static cl::opt< bool > DisableCombines("combiner-disabled", cl::Hidden, cl::init(false), cl::desc("Disable the DAG combiner"))
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL)
Fold "masked merge" expressions like (m & x) | (~m & y) and its DeMorgan variant (~m | x) & (m | y) i...
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, SDValue FVal, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate, bool FromAdd)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue eliminateFPCastPair(SDNode *N)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
ByteProvider< SDNode * > SDByteProvider
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
static MaybeAlign getAlign(Value *Ptr)
iv Induction Variable Users
Definition IVUsers.cpp:48
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr int Concat[]
Value * RHS
Value * LHS
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1120
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1208
bool isNegative() const
Definition APFloat.h:1449
bool isNormal() const
Definition APFloat.h:1453
bool isDenormal() const
Definition APFloat.h:1450
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1432
const fltSemantics & getSemantics() const
Definition APFloat.h:1457
bool isNaN() const
Definition APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1088
APInt bitcastToAPInt() const
Definition APFloat.h:1353
bool isLargest() const
Definition APFloat.h:1465
bool isInfinity() const
Definition APFloat.h:1446
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition APInt.cpp:644
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition APInt.h:466
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
int32_t exactLogBase2() const
Definition APInt.h:1783
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1452
unsigned logBase2() const
Definition APInt.h:1761
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:471
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:200
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static LLVM_ABI bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
LLVM_ABI bool isConstant() const
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< SDNode * > Val, int64_t ByteOffset, int64_t VectorOffset)
Combiner implementation.
Definition Combiner.h:34
ISD::CondCode get() const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:535
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange truncate(uint32_t BitWidth, unsigned NoWrapKind=0) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
const APInt & getUpper() const
Return the upper value for this range.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
bool isBigEndian() const
Definition DataLayout.h:199
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static bool shouldExecute(unsigned CounterName)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
iterator end()
Definition DenseMap.h:81
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const_iterator find(KeyT x) const
find - Return an iterator pointing to the first interval ending at or after x, or end().
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
void clearRanges()
Unset the tracked range metadata.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition ArrayRef.h:424
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:417
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LLVM_ABI bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
std::optional< APInt > bitcastToAPInt() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
LLVM_ABI bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isAnyAdd() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI std::optional< bool > isBoolConstant(SDValue N) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
LLVM_ABI SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
bool isGuaranteedNotToBePoison(SDValue Op, unsigned Depth=0) const
Return true if this function can prove that Op is never poison.
LLVM_ABI SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI void DeleteNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
LLVM_ABI bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
LLVM_ABI SDValue FoldConstantBuildVector(BuildVectorSDNode *BV, const SDLoc &DL, EVT DstEltVT)
Fold BUILD_VECTOR of constants/undefs to the destination type BUILD_VECTOR of constants/undefs elemen...
LLVM_ABI SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
LLVM_ABI OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
LLVM_ABI SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
LLVM_ABI bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
LLVM_ABI void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
value_type pop_back_val()
Definition SetVector.h:296
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, EVT InputVT) const
Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is legal or custom for this targ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool canTransformPtrArithOutOfBounds(const Function &F, EVT PtrVT) const
True if the target allows transformations of in-bounds pointer arithmetic that cause out-of-bounds in...
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const
Returns true if it's profitable to allow merging store of loads when there are functions calls betwee...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
virtual bool isTargetCanonicalSelect(SDNode *N) const
Return true if the given select/vselect should be considered canonical and not be transformed.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
int getNumOccurrences() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ POISON
POISON - A poison node.
Definition ISDOpcodes.h:231
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ TRUNCATE_SSAT_U
Definition ISDOpcodes.h:855
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:809
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ TargetConstantFP
Definition ISDOpcodes.h:175
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ TargetFrameIndex
Definition ISDOpcodes.h:182
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition ISDOpcodes.h:280
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:333
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
bool isIndexTypeSigned(MemIndexType IndexType)
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
LLVM_ABI bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, const FTy &F, const CCTy &CC)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Abs(const Opnd &Op)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
TernaryOpc_match< T0_P, T1_P, T2_P > m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
Not(const Pred &P) -> Not< Pred >
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
constexpr double e
Definition MathExtras.h:47
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:355
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2038
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1607
LLVM_ABI SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
bool operator>=(int64_t V1, const APSInt &V2)
Definition APSInt.h:361
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1534
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
LLVM_ABI bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1545
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
bool operator>(int64_t V1, const APSInt &V2)
Definition APSInt.h:363
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
CombineLevel
Definition DAGCombine.h:15
@ AfterLegalizeDAG
Definition DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
@ AfterLegalizeTypes
Definition DAGCombine.h:17
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2088
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:332
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:328
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:365
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:338
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition ValueTypes.h:279
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:187
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:256
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:268
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowContract(bool b)
bool hasNoUnsignedWrap() const
void setAllowReassociation(bool b)
void setAllowReciprocal(bool b)
bool hasAllowContract() const
bool hasApproximateFuncs() const
void setApproximateFuncs(bool b)
bool hasNoSignedWrap() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...