Codestin Search App

History

3270 lines (2849 loc) · 128 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//===----------------------------------------------------------------------===//

// The implementation for the loop memory dependence that was originally

// developed for the loop vectorizer.

//===----------------------------------------------------------------------===//

#include "llvm/Analysis/LoopAccessAnalysis.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/EquivalenceClasses.h"

#include "llvm/ADT/PointerIntPair.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/AliasSetTracker.h"

#include "llvm/Analysis/AssumeBundleQueries.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/LoopAnalysisManager.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/LoopIterator.h"

#include "llvm/Analysis/MemoryLocation.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/Analysis/VectorUtils.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DebugLoc.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/PassManager.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/IR/ValueHandle.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

#include <algorithm>

#include <cassert>

#include <cstdint>

#include <iterator>

#include <utility>

#include <variant>

#include <vector>

using namespace llvm;

using namespace llvm::SCEVPatternMatch;

#define DEBUG_TYPE "loop-accesses"

static cl::opt<unsigned, true>

VectorizationFactor("force-vector-width", cl::Hidden,

cl::desc("Sets the SIMD width. Zero is autoselect."),

cl::location(VectorizerParams::VectorizationFactor));

unsigned VectorizerParams::VectorizationFactor;

static cl::opt<unsigned, true>

VectorizationInterleave("force-vector-interleave", cl::Hidden,

cl::desc("Sets the vectorization interleave count. "

"Zero is autoselect."),

cl::location(

VectorizerParams::VectorizationInterleave));

unsigned VectorizerParams::VectorizationInterleave;

static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(

"runtime-memory-check-threshold", cl::Hidden,

cl::desc("When performing memory disambiguation checks at runtime do not "

"generate more than this number of comparisons (default = 8)."),

cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));

unsigned VectorizerParams::RuntimeMemoryCheckThreshold;

/// The maximum iterations used to merge memory checks

static cl::opt<unsigned> MemoryCheckMergeThreshold(

"memory-check-merge-threshold", cl::Hidden,

cl::desc("Maximum number of comparisons done when trying to merge "

"runtime memory checks. (default = 100)"),

cl::init(100));

/// Maximum SIMD width.

const unsigned VectorizerParams::MaxVectorWidth = 64;

/// We collect dependences up to this threshold.

static cl::opt<unsigned>

MaxDependences("max-dependences", cl::Hidden,

cl::desc("Maximum number of dependences collected by "

"loop-access analysis (default = 100)"),

cl::init(100));

/// This enables versioning on the strides of symbolically striding memory

/// accesses in code like the following.

/// for (i = 0; i < N; ++i)

/// A[i * Stride1] += B[i * Stride2] ...

///

/// Will be roughly translated to

/// if (Stride1 == 1 && Stride2 == 1) {

/// for (i = 0; i < N; i+=4)

/// A[i:i+3] += ...

/// } else

/// ...

static cl::opt<bool> EnableMemAccessVersioning(

"enable-mem-access-versioning", cl::init(true), cl::Hidden,

cl::desc("Enable symbolic stride memory access versioning"));

/// Enable store-to-load forwarding conflict detection. This option can

/// be disabled for correctness testing.

static cl::opt<bool> EnableForwardingConflictDetection(

"store-to-load-forwarding-conflict-detection", cl::Hidden,

cl::desc("Enable conflict detection in loop-access analysis"),

cl::init(true));

static cl::opt<unsigned> MaxForkedSCEVDepth(

"max-forked-scev-depth", cl::Hidden,

cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),

cl::init(5));

static cl::opt<bool> SpeculateUnitStride(

"laa-speculate-unit-stride", cl::Hidden,

cl::desc("Speculate that non-constant strides are unit in LAA"),

cl::init(true));

static cl::opt<bool, true> HoistRuntimeChecks(

"hoist-runtime-checks", cl::Hidden,

cl::desc(

"Hoist inner loop runtime memory checks to outer loop if possible"),

cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true));

bool VectorizerParams::HoistRuntimeChecks;

bool VectorizerParams::isInterleaveForced() {

return ::VectorizationInterleave.getNumOccurrences() > 0;

}

const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,

const DenseMap<Value *, const SCEV *> &PtrToStride,

Value *Ptr) {

const SCEV *OrigSCEV = PSE.getSCEV(Ptr);

// If there is an entry in the map return the SCEV of the pointer with the

// symbolic stride replaced by one.

const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);

if (!StrideSCEV)

// For a non-symbolic stride, just return the original expression.

return OrigSCEV;

// Note: This assert is both overly strong and overly weak. The actual

// invariant here is that StrideSCEV should be loop invariant. The only

// such invariant strides we happen to speculate right now are unknowns

// and thus this is a reasonable proxy of the actual invariant.

assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");

ScalarEvolution *SE = PSE.getSE();

const SCEV *CT = SE->getOne(StrideSCEV->getType());

PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT));

const SCEV *Expr = PSE.getSCEV(Ptr);

LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV

<< " by: " << *Expr << "\n");

return Expr;

}

RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(

unsigned Index, const RuntimePointerChecking &RtCheck)

: High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),

AddressSpace(RtCheck.Pointers[Index]

.PointerValue->getType()

->getPointerAddressSpace()),

NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {

Members.push_back(Index);

}

/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise

/// return nullptr. \p A and \p B must have the same type.

static const SCEV *addSCEVNoOverflow(const SCEV *A, const SCEV *B,

ScalarEvolution &SE) {

if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B))

return nullptr;

return SE.getAddExpr(A, B);

}

/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise

/// return nullptr. \p A and \p B must have the same type.

static const SCEV *mulSCEVNoOverflow(const SCEV *A, const SCEV *B,

ScalarEvolution &SE) {

if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B))

return nullptr;

return SE.getMulExpr(A, B);

}

/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at

/// \p MaxBTC is guaranteed inbounds of the accessed object.

static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(

const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize,

ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT,

AssumptionCache *AC,

std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {

auto *PointerBase = SE.getPointerBase(AR->getStart());

auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);

if (!StartPtr)

return false;

const Loop *L = AR->getLoop();

bool CheckForNonNull, CheckForFreed;

Value *StartPtrV = StartPtr->getValue();

uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(

DL, CheckForNonNull, CheckForFreed);

if (DerefBytes && (CheckForNonNull || CheckForFreed))

return false;

const SCEV *Step = AR->getStepRecurrence(SE);

Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());

const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);

// Check if we have a suitable dereferencable assumption we can use.

Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();

if (BasicBlock *LoopPred = L->getLoopPredecessor()) {

if (isa<UncondBrInst, CondBrInst>(LoopPred->getTerminator()))

CtxI = LoopPred->getTerminator();

}

RetainedKnowledge DerefRK;

getKnowledgeForValue(StartPtrV, {Attribute::Dereferenceable}, *AC,

[&](RetainedKnowledge RK, Instruction *Assume, auto) {

if (!isValidAssumeForContext(Assume, CtxI, DT))

return false;

if (StartPtrV->canBeFreed() &&

!willNotFreeBetween(Assume, CtxI))

return false;

DerefRK = std::max(DerefRK, RK);

return true;

});

if (DerefRK) {

const SCEV *DerefRKSCEV = SE.getSCEV(DerefRK.IRArgValue);

Type *CommonTy =

SE.getWiderType(DerefBytesSCEV->getType(), DerefRKSCEV->getType());

DerefBytesSCEV = SE.getNoopOrZeroExtend(DerefBytesSCEV, CommonTy);

DerefRKSCEV = SE.getNoopOrZeroExtend(DerefRKSCEV, CommonTy);

DerefBytesSCEV = SE.getUMaxExpr(DerefBytesSCEV, DerefRKSCEV);

}

if (DerefBytesSCEV->isZero())

return false;

bool IsKnownNonNegative = SE.isKnownNonNegative(Step);

if (!IsKnownNonNegative && !SE.isKnownNegative(Step))

return false;

Step = SE.getNoopOrSignExtend(Step, WiderTy);

MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);

// For the computations below, make sure they don't unsigned wrap.

if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr))

return false;

const SCEV *StartOffset = SE.getNoopOrZeroExtend(

SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);

if (!LoopGuards)

LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(AR->getLoop(), SE));

MaxBTC = SE.applyLoopGuards(MaxBTC, *LoopGuards);

const SCEV *OffsetAtLastIter =

mulSCEVNoOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);

if (!OffsetAtLastIter) {

// Re-try with constant max backedge-taken count if using the symbolic one

// failed.

MaxBTC = SE.getConstantMaxBackedgeTakenCount(AR->getLoop());

if (isa<SCEVCouldNotCompute>(MaxBTC))

return false;

MaxBTC = SE.getNoopOrZeroExtend(

MaxBTC, WiderTy);

OffsetAtLastIter =

mulSCEVNoOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);

if (!OffsetAtLastIter)

return false;

}

const SCEV *OffsetEndBytes = addSCEVNoOverflow(

OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);

if (!OffsetEndBytes)

return false;

if (IsKnownNonNegative) {

// For positive steps, check if

// (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,

// while making sure none of the computations unsigned wrap themselves.

const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);

if (!EndBytes)

return false;

DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards);

return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);

}

// For negative steps check if

// * StartOffset >= (MaxBTC * Step + EltSize)

// * StartOffset <= DerefBytes.

assert(SE.isKnownNegative(Step) && "must be known negative");

return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&

SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);

}

std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(

const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,

const SCEV *MaxBTC, ScalarEvolution *SE,

DenseMap<std::pair<const SCEV *, const SCEV *>,

std::pair<const SCEV *, const SCEV *>> *PointerBounds,

DominatorTree *DT, AssumptionCache *AC,

std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {

auto &DL = Lp->getHeader()->getDataLayout();

Type *IdxTy = DL.getIndexType(PtrExpr->getType());

const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);

// Delegate to the SCEV-based overload, passing through the cache.

return getStartAndEndForAccess(Lp, PtrExpr, EltSizeSCEV, BTC, MaxBTC, SE,

PointerBounds, DT, AC, LoopGuards);

}

std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(

const Loop *Lp, const SCEV *PtrExpr, const SCEV *EltSizeSCEV,

const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE,

DenseMap<std::pair<const SCEV *, const SCEV *>,

std::pair<const SCEV *, const SCEV *>> *PointerBounds,

DominatorTree *DT, AssumptionCache *AC,

std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {

std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;

if (PointerBounds) {

auto [Iter, Ins] = PointerBounds->insert(

{{PtrExpr, EltSizeSCEV},

{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});

if (!Ins)

return Iter->second;

PtrBoundsPair = &Iter->second;

}

const SCEV *ScStart;

const SCEV *ScEnd;

auto &DL = Lp->getHeader()->getDataLayout();

if (SE->isLoopInvariant(PtrExpr, Lp)) {

ScStart = ScEnd = PtrExpr;

} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {

ScStart = AR->getStart();

if (!isa<SCEVCouldNotCompute>(BTC))

// Evaluating AR at an exact BTC is safe: LAA separately checks that

// accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then

// the loop either triggers UB when executing a memory access with a

// poison pointer or the wrapping/poisoned pointer is not used.

ScEnd = AR->evaluateAtIteration(BTC, *SE);

else {

// Evaluating AR at MaxBTC may wrap and create an expression that is less

// than the start of the AddRec due to wrapping (for example consider

// MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd

// will get incremented by EltSize before returning, so this effectively

// sets ScEnd to the maximum unsigned value for the type. Note that LAA

// separately checks that accesses cannot not wrap, so unsigned max

// represents an upper bound.

if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,

DT, AC, LoopGuards)) {

ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);

} else {

ScEnd = SE->getAddExpr(

SE->getNegativeSCEV(EltSizeSCEV),

SE->getSCEV(ConstantExpr::getIntToPtr(

ConstantInt::getAllOnesValue(EltSizeSCEV->getType()),

AR->getType())));

}

const SCEV *Step = AR->getStepRecurrence(*SE);

// For expressions with negative step, the upper bound is ScStart and the

// lower bound is ScEnd.

if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {

if (CStep->getValue()->isNegative())

std::swap(ScStart, ScEnd);

} else {

// Fallback case: the step is not constant, but we can still

// get the upper and lower bounds of the interval by using min/max

// expressions.

ScStart = SE->getUMinExpr(ScStart, ScEnd);

ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);

}

} else

return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};

assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");

assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");

// Add the size of the pointed element to ScEnd.

ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);

std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};

if (PointerBounds)

*PtrBoundsPair = Res;

return Res;

}

/// Calculate Start and End points of memory access using

/// getStartAndEndForAccess.

void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,

Type *AccessTy, bool WritePtr,

unsigned DepSetId, unsigned ASId,

PredicatedScalarEvolution &PSE,

bool NeedsFreeze) {

const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();

const SCEV *BTC = PSE.getBackedgeTakenCount();

const auto &[ScStart, ScEnd] = getStartAndEndForAccess(

Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),

&DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);

assert(!isa<SCEVCouldNotCompute>(ScStart) &&

!isa<SCEVCouldNotCompute>(ScEnd) &&

"must be able to compute both start and end expressions");

Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,

NeedsFreeze);

}

bool RuntimePointerChecking::tryToCreateDiffCheck(

const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {

// If either group contains multiple different pointers, bail out.

// TODO: Support multiple pointers by using the minimum or maximum pointer,

// depending on src & sink.

if (CGI.Members.size() != 1 || CGJ.Members.size() != 1)

return false;

const PointerInfo *Src = &Pointers[CGI.Members[0]];

const PointerInfo *Sink = &Pointers[CGJ.Members[0]];

// If either pointer is read and written, multiple checks may be needed. Bail

// out.

if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||

!DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty())

return false;

ArrayRef<unsigned> AccSrc =

DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);

ArrayRef<unsigned> AccSink =

DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);

// If either pointer is accessed multiple times, there may not be a clear

// src/sink relation. Bail out for now.

if (AccSrc.size() != 1 || AccSink.size() != 1)

return false;

// If the sink is accessed before src, swap src/sink.

if (AccSink[0] < AccSrc[0])

std::swap(Src, Sink);

const SCEVConstant *Step;

const SCEV *SrcStart;

const SCEV *SinkStart;

const Loop *InnerLoop = DC.getInnermostLoop();

if (!match(Src->Expr,

m_scev_AffineAddRec(m_SCEV(SrcStart), m_SCEVConstant(Step),

m_SpecificLoop(InnerLoop))) ||

!match(Sink->Expr,

m_scev_AffineAddRec(m_SCEV(SinkStart), m_scev_Specific(Step),

m_SpecificLoop(InnerLoop))))

return false;

SmallVector<Instruction *, 4> SrcInsts =

DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);

SmallVector<Instruction *, 4> SinkInsts =

DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);

Type *SrcTy = getLoadStoreType(SrcInsts[0]);

Type *DstTy = getLoadStoreType(SinkInsts[0]);

if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy))

return false;

const DataLayout &DL = InnerLoop->getHeader()->getDataLayout();

unsigned AllocSize =

std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));

// Only matching constant steps matching the AllocSize are supported at the

// moment. This simplifies the difference computation. Can be extended in the

// future.

if (Step->getAPInt().abs() != AllocSize)

return false;

// When counting down, the dependence distance needs to be swapped.

if (Step->getValue()->isNegative())

std::swap(SinkStart, SrcStart);

const SCEV *SinkStartInt = SE->getPtrToAddrExpr(SinkStart);

const SCEV *SrcStartInt = SE->getPtrToAddrExpr(SrcStart);

if (isa<SCEVCouldNotCompute>(SinkStartInt) ||

isa<SCEVCouldNotCompute>(SrcStartInt))

return false;

// If the start values for both Src and Sink also vary according to an outer

// loop, then it's probably better to avoid creating diff checks because

// they may not be hoisted. We should instead let llvm::addRuntimeChecks

// do the expanded full range overlap checks, which can be hoisted.

if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&

isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) {

auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt);

auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt);

const Loop *StartARLoop = SrcStartAR->getLoop();

if (StartARLoop == SinkStartAR->getLoop() &&

StartARLoop == InnerLoop->getParentLoop() &&

// If the diff check would already be loop invariant (due to the

// recurrences being the same), then we prefer to keep the diff checks

// because they are cheaper.

SrcStartAR->getStepRecurrence(*SE) !=

SinkStartAR->getStepRecurrence(*SE)) {

LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "

"cannot be hoisted out of the outer loop\n");

return false;

}

LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"

<< "SrcStart: " << *SrcStartInt << '\n'

<< "SinkStartInt: " << *SinkStartInt << '\n');

DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,

Src->NeedsFreeze || Sink->NeedsFreeze);

return true;

}

SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {

SmallVector<RuntimePointerCheck, 4> Checks;

for (unsigned I = 0; I < CheckingGroups.size(); ++I) {

for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {

const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];

const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];

if (needsChecking(CGI, CGJ)) {

CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);

Checks.emplace_back(&CGI, &CGJ);

}

return Checks;

}

void RuntimePointerChecking::generateChecks(

MemoryDepChecker::DepCandidates &DepCands) {

assert(Checks.empty() && "Checks is not empty");

groupChecks(DepCands);

Checks = generateChecks();

}

bool RuntimePointerChecking::needsChecking(

const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {

for (const auto &I : M.Members)

for (const auto &J : N.Members)

if (needsChecking(I, J))

return true;

return false;

}

/// Compare \p I and \p J and return the minimum.

/// Return nullptr in case we couldn't find an answer.

static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,

ScalarEvolution *SE) {

std::optional<APInt> Diff = SE->computeConstantDifference(J, I);

if (!Diff)

return nullptr;

return Diff->isNegative() ? J : I;

}

bool RuntimeCheckingPtrGroup::addPointer(

unsigned Index, const RuntimePointerChecking &RtCheck) {

return addPointer(

Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,

RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),

RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);

}

bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,

const SCEV *End, unsigned AS,

bool NeedsFreeze,

ScalarEvolution &SE) {

assert(AddressSpace == AS &&

"all pointers in a checking group must be in the same address space");

// Compare the starts and ends with the known minimum and maximum

// of this set. We need to know how we compare against the min/max

// of the set in order to be able to emit memchecks.

const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);

if (!Min0)

return false;

const SCEV *Min1 = getMinFromExprs(End, High, &SE);

if (!Min1)

return false;

// Update the low bound expression if we've found a new min value.

if (Min0 == Start)

Low = Start;

// Update the high bound expression if we've found a new max value.

if (Min1 != End)

High = End;

Members.push_back(Index);

this->NeedsFreeze |= NeedsFreeze;

return true;

}

void RuntimePointerChecking::groupChecks(

MemoryDepChecker::DepCandidates &DepCands) {

// We build the groups from dependency candidates equivalence classes

// because:

// - We know that pointers in the same equivalence class share

// the same underlying object and therefore there is a chance

// that we can compare pointers

// - We wouldn't be able to merge two pointers for which we need

// to emit a memcheck. The classes in DepCands are already

// conveniently built such that no two pointers in the same

// class need checking against each other.

// We use the following (greedy) algorithm to construct the groups

// For every pointer in the equivalence class:

// For each existing group:

// - if the difference between this pointer and the min/max bounds

// of the group is a constant, then make the pointer part of the

// group and update the min/max bounds of that group as required.

CheckingGroups.clear();

// If we need to check two pointers to the same underlying object

// with a non-constant difference, we shouldn't perform any pointer

// grouping with those pointers. This is because we can easily get

// into cases where the resulting check would return false, even when

// the accesses are safe.

// The following example shows this:

// for (i = 0; i < 1000; ++i)

// a[5000 + i * m] = a[i] + a[i + 9000]

// Here grouping gives a check of (5000, 5000 + 1000 * m) against

// (0, 10000) which is always false. However, if m is 1, there is no

// dependence. Not grouping the checks for a[i] and a[i + 9000] allows

// us to perform an accurate check in this case.

// In the above case, we have a non-constant distance and an Unknown

// dependence between accesses to the same underlying object, and could retry

// with runtime checks without dependency information being available. In this

// case we will use the fallback path and create separate checking groups for

// accesses not present in DepCands.

unsigned TotalComparisons = 0;

DenseMap<Value *, SmallVector<unsigned>> PositionMap;

for (unsigned Index = 0; Index < Pointers.size(); ++Index)

PositionMap[Pointers[Index].PointerValue].push_back(Index);

// We need to keep track of what pointers we've already seen so we

// don't process them twice.

SmallSet<unsigned, 2> Seen;

// Go through all equivalence classes, get the "pointer check groups"

// and add them to the overall solution. We use the order in which accesses

// appear in 'Pointers' to enforce determinism.

for (unsigned I = 0; I < Pointers.size(); ++I) {

// We've seen this pointer before, and therefore already processed

// its equivalence class.

if (Seen.contains(I))

continue;

MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,

Pointers[I].IsWritePtr);

// If there is no entry in the dependency partition, there are no potential

// accesses to merge; simply add a new pointer checking group.

if (!DepCands.contains(Access)) {

CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));

continue;

}

SmallVector<RuntimeCheckingPtrGroup, 2> Groups;

// Because DepCands is constructed by visiting accesses in the order in

// which they appear in alias sets (which is deterministic) and the

// iteration order within an equivalence class member is only dependent on

// the order in which unions and insertions are performed on the

// equivalence class, the iteration order is deterministic.

for (auto M : DepCands.members(Access)) {

auto PointerI = PositionMap.find(M.getPointer());

// If we can't find the pointer in PositionMap that means we can't

// generate a memcheck for it.

if (PointerI == PositionMap.end())

continue;

for (unsigned Pointer : PointerI->second) {

bool Merged = false;

// Mark this pointer as seen.

Seen.insert(Pointer);

// Go through all the existing sets and see if we can find one

// which can include this pointer.

for (RuntimeCheckingPtrGroup &Group : Groups) {

// Don't perform more than a certain amount of comparisons.

// This should limit the cost of grouping the pointers to something

// reasonable. If we do end up hitting this threshold, the algorithm

// will create separate groups for all remaining pointers.

if (TotalComparisons > MemoryCheckMergeThreshold)

break;

TotalComparisons++;

if (Group.addPointer(Pointer, *this)) {

Merged = true;

break;

}

if (!Merged)

// We couldn't add this pointer to any existing set or the threshold

// for the number of comparisons has been reached. Create a new group

// to hold the current pointer.

Groups.emplace_back(Pointer, *this);

}

// We've computed the grouped checks for this partition.

// Save the results and continue with the next one.

llvm::append_range(CheckingGroups, Groups);

}

bool RuntimePointerChecking::arePointersInSamePartition(

const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,

unsigned PtrIdx2) {

return (PtrToPartition[PtrIdx1] != -1 &&

PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);

}

bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {

const PointerInfo &PointerI = Pointers[I];

const PointerInfo &PointerJ = Pointers[J];

// No need to check if two readonly pointers intersect.

if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)

return false;

// Only need to check pointers between two different dependency sets.

if (PointerI.DependencySetId == PointerJ.DependencySetId)

return false;

// Only need to check pointers in the same alias set.

return PointerI.AliasSetId == PointerJ.AliasSetId;

}

/// Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.

static DenseMap<const RuntimeCheckingPtrGroup *, unsigned>

getPtrToIdxMap(ArrayRef<RuntimeCheckingPtrGroup> CheckingGroups) {

DenseMap<const RuntimeCheckingPtrGroup *, unsigned> PtrIndices;

for (const auto &[Idx, CG] : enumerate(CheckingGroups))

PtrIndices[&CG] = Idx;

return PtrIndices;

}

void RuntimePointerChecking::printChecks(

raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,

unsigned Depth) const {

unsigned N = 0;

auto PtrIndices = getPtrToIdxMap(CheckingGroups);

for (const auto &[Check1, Check2] : Checks) {

const auto &First = Check1->Members, &Second = Check2->Members;

OS.indent(Depth) << "Check " << N++ << ":\n";

OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)

<< ":\n";

for (unsigned K : First)

OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";

OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)

<< ":\n";

for (unsigned K : Second)

OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";

}

void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {

OS.indent(Depth) << "Run-time memory checks:\n";

printChecks(OS, Checks, Depth);

OS.indent(Depth) << "Grouped accesses:\n";

auto PtrIndices = getPtrToIdxMap(CheckingGroups);

for (const auto &CG : CheckingGroups) {

OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";

OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High

<< ")\n";

for (unsigned Member : CG.Members) {

OS.indent(Depth + 6) << "Member: " << *Pointers[Member].Expr << "\n";

}

namespace {

/// Analyses memory accesses in a loop.

///

/// Checks whether run time pointer checks are needed and builds sets for data

/// dependence checking.

class AccessAnalysis {

public:

using MemAccessInfo =

PointerIntPair<Value * /* AccessPtr */, 1, bool /* IsWrite */>;

AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,

DominatorTree &DT, MemoryDepChecker::DepCandidates &DA,

PredicatedScalarEvolution &PSE,

SmallPtrSetImpl<MDNode *> &LoopAliasScopes)

: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),

PSE(PSE), LoopAliasScopes(LoopAliasScopes) {

// We're analyzing dependences across loop iterations.

BAA.enableCrossIterationMode();

}

/// Register a load and whether it is only read from.

void addLoad(const MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {

Value *Ptr = const_cast<Value *>(Loc.Ptr);

AST.add(adjustLoc(Loc));

Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);

if (IsReadOnly)

ReadOnlyPtr.insert(Ptr);

}

/// Register a store.

void addStore(const MemoryLocation &Loc, Type *AccessTy) {

Value *Ptr = const_cast<Value *>(Loc.Ptr);

AST.add(adjustLoc(Loc));

Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);

}

/// Check if we can emit a run-time no-alias check for \p Access.

///

/// Returns true if we can emit a run-time no alias check for \p Access.

/// If we can check this access, this also adds it to a dependence set and

/// adds a run-time to check for it to \p RtCheck. If \p Assume is true,

/// we will attempt to use additional run-time checks in order to get

/// the bounds of the pointer.

bool createCheckForAccess(RuntimePointerChecking &RtCheck,

MemAccessInfo Access, Type *AccessTy,

const DenseMap<Value *, const SCEV *> &Strides,

DenseMap<Value *, unsigned> &DepSetId,

Loop *TheLoop, unsigned &RunningDepId,

unsigned ASId, bool Assume);

/// Check whether we can check the pointers at runtime for

/// non-intersection.

///

/// Returns true if we need no check or if we do and we can generate them

/// (i.e. the pointers have computable bounds). A return value of false means

/// we couldn't analyze and generate runtime checks for all pointers in the

/// loop, but if \p AllowPartial is set then we will have checks for those

/// pointers we could analyze. \p DepChecker is used to remove unknown

/// dependences from DepCands.

bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,

const DenseMap<Value *, const SCEV *> &Strides,

Value *&UncomputablePtr, bool AllowPartial,

const MemoryDepChecker &DepChecker);

/// Goes over all memory accesses, checks whether a RT check is needed

/// and builds sets of dependent accesses.

void buildDependenceSets();

/// Initial processing of memory accesses determined that we need to

/// perform dependency checking.

///

/// Note that this can later be cleared if we retry memcheck analysis without

/// dependency checking (i.e. ShouldRetryWithRuntimeChecks).

bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }

/// We decided that no dependence analysis would be used. Reset the state.

void resetDepChecks(MemoryDepChecker &DepChecker) {

CheckDeps.clear();

DepChecker.clearDependences();

}

ArrayRef<MemAccessInfo> getDependenciesToCheck() const { return CheckDeps; }

private:

using PtrAccessMap = MapVector<MemAccessInfo, SmallSetVector<Type *, 1>>;

/// Adjust the MemoryLocation so that it represents accesses to this

/// location across all iterations, rather than a single one.

MemoryLocation adjustLoc(MemoryLocation Loc) const {

// The accessed location varies within the loop, but remains within the

// underlying object.

Loc.Size = LocationSize::beforeOrAfterPointer();

Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);

Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);

return Loc;

}

/// Drop alias scopes that are only valid within a single loop iteration.

MDNode *adjustAliasScopeList(MDNode *ScopeList) const {

if (!ScopeList)

return nullptr;

// For the sake of simplicity, drop the whole scope list if any scope is

// iteration-local.

if (any_of(ScopeList->operands(), [&](Metadata *Scope) {

return LoopAliasScopes.contains(cast<MDNode>(Scope));

}))

return nullptr;

return ScopeList;

}

/// Map of all accesses. Values are the types used to access memory pointed to

/// by the pointer.

PtrAccessMap Accesses;

/// The loop being checked.

const Loop *TheLoop;

/// List of accesses that need a further dependence check.

SmallVector<MemAccessInfo, 8> CheckDeps;

/// Set of pointers that are read only.

SmallPtrSet<Value*, 16> ReadOnlyPtr;

/// Batched alias analysis results.

BatchAAResults BAA;

/// An alias set tracker to partition the access set by underlying object and

//intrinsic property (such as TBAA metadata).

AliasSetTracker AST;

/// The LoopInfo of the loop being checked.

const LoopInfo *LI;

/// The dominator tree of the function.

DominatorTree &DT;

/// Sets of potentially dependent accesses - members of one set share an

/// underlying pointer. The set "CheckDeps" identfies which sets really need a

/// dependence check.

MemoryDepChecker::DepCandidates &DepCands;

/// Initial processing of memory accesses determined that we may need

/// to add memchecks. Perform the analysis to determine the necessary checks.

///

/// Note that, this is different from isDependencyCheckNeeded. When we retry

/// memcheck analysis without dependency checking

/// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is

/// cleared while this remains set if we have potentially dependent accesses.

bool IsRTCheckAnalysisNeeded = false;

/// The SCEV predicate containing all the SCEV-related assumptions.

PredicatedScalarEvolution &PSE;

DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;

/// Alias scopes that are declared inside the loop, and as such not valid

/// across iterations.

SmallPtrSetImpl<MDNode *> &LoopAliasScopes;

};

} // end anonymous namespace

/// Try to compute a constant stride for \p AR. Used by getPtrStride and

/// isNoWrap.

static std::optional<int64_t>

getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,

Value *Ptr, PredicatedScalarEvolution &PSE) {

if (isa<ScalableVectorType>(AccessTy)) {

LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy

<< "\n");

return std::nullopt;

}

// The access function must stride over the innermost loop.

if (Lp != AR->getLoop()) {

LLVM_DEBUG({

dbgs() << "LAA: Bad stride - Not striding over innermost loop ";

if (Ptr)

dbgs() << *Ptr << " ";

dbgs() << "SCEV: " << *AR << "\n";

});

return std::nullopt;

}

// Check the step is constant.

const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());

// Calculate the pointer stride and check if it is constant.

const APInt *APStepVal;

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

LoopAccessAnalysis.cpp

Latest commit

History

LoopAccessAnalysis.cpp

File metadata and controls