diff --git a/llvm/test/Analysis/CostModel/AMDGPU/load.ll b/llvm/test/Analysis/CostModel/AMDGPU/load.ll new file mode 100644 index 0000000000000..3f8016178e719 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/load.ll @@ -0,0 +1,409 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck %s --check-prefixes=GFX90A + + +define void @loads_i1(i32 %arg) { +; GFX90A-LABEL: 'loads_i1' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i1, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <2 x i1>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = load <3 x i1>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = load <4 x i1>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + load i1, ptr poison + load <2 x i1>, ptr poison + load <3 x i1>, ptr poison + load <4 x i1>, ptr poison + + ret void +} + +define void @loads_i8(i32 %arg) { +; GFX90A-LABEL: 'loads_i8' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <2 x i8>, ptr poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = load <3 x i8>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = load <4 x i8>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i8, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = load <2 x i8>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %7 = load <3 x i8>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = load <4 x i8>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load i8, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i8>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <3 x i8>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i8>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + load i8, ptr poison + load <2 x i8>, ptr poison + load <3 x i8>, ptr poison + load <4 x i8>, ptr poison + + load i8, ptr poison, align 1 + load <2 x i8>, ptr poison, align 1 + load <3 x i8>, ptr poison, align 1 + load <4 x i8>, ptr poison, align 1 + + load i8, ptr poison, align 4 + load <2 x i8>, ptr poison, align 4 + load <3 x i8>, ptr poison, align 4 + load <4 x i8>, ptr poison, align 4 + + ret void +} + +define void @loads_i16(i32 %arg) { +; GFX90A-LABEL: 'loads_i16' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i16, ptr poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <2 x i16>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <3 x i16>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load <4 x i16>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i16, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i16>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x i16>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <4 x i16>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load i16, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i16>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = load <3 x i16>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i16>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load i16, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i16>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i16>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i16>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + load i16, ptr poison + load <2 x i16>, ptr poison + load <3 x i16>, ptr poison + load <4 x i16>, ptr poison + + load i16, ptr poison, align 1 + load <2 x i16>, ptr poison, align 1 + load <3 x i16>, ptr poison, align 1 + load <4 x i16>, ptr poison, align 1 + + load i16, ptr poison, align 4 + load <2 x i16>, ptr poison, align 4 + load <3 x i16>, ptr poison, align 4 + load <4 x i16>, ptr poison, align 4 + + load i16, ptr poison, align 8 + load <2 x i16>, ptr poison, align 8 + load <3 x i16>, ptr poison, align 8 + load <4 x i16>, ptr poison, align 8 + + ret void +} + +define void @loads_i32(i32 %arg) { +; GFX90A-LABEL: 'loads_i32' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i32, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <2 x i32>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load <3 x i32>, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load <4 x i32>, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i32, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <3 x i32>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <4 x i32>, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load i32, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <3 x i32>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load i32, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <2 x i32>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <3 x i32>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i32>, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load i32, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x i32>, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <3 x i32>, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + load i32, ptr poison + load <2 x i32>, ptr poison + load <3 x i32>, ptr poison + load <4 x i32>, ptr poison + + load i32, ptr poison, align 1 + load <2 x i32>, ptr poison, align 1 + load <3 x i32>, ptr poison, align 1 + load <4 x i32>, ptr poison, align 1 + + load i32, ptr poison, align 4 + load <2 x i32>, ptr poison, align 4 + load <3 x i32>, ptr poison, align 4 + load <4 x i32>, ptr poison, align 4 + + load i32, ptr poison, align 8 + load <2 x i32>, ptr poison, align 8 + load <3 x i32>, ptr poison, align 8 + load <4 x i32>, ptr poison, align 8 + + load i32, ptr poison, align 16 + load <2 x i32>, ptr poison, align 16 + load <3 x i32>, ptr poison, align 16 + load <4 x i32>, ptr poison, align 16 + + ret void +} + +define void @loads_addrspace_1(i32 %arg) { +; GFX90A-LABEL: 'loads_addrspace_1' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i1>, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <1 x i8>, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load <1 x i16>, ptr addrspace(1) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load <1 x i32>, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load <2 x i1>, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = load <2 x i8>, ptr addrspace(1) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <2 x i16>, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i32>, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = load <3 x i1>, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <3 x i8>, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = load <3 x i16>, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x i32>, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <4 x i1>, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <4 x i8>, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i16>, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i32>, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <8 x i1>, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %18 = load <8 x i8>, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <8 x i16>, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <8 x i32>, ptr addrspace(1) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %21 = load <16 x i1>, ptr addrspace(1) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %22 = load <16 x i8>, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <16 x i16>, ptr addrspace(1) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <16 x i32>, ptr addrspace(1) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %25 = load <32 x i1>, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %26 = load <32 x i8>, ptr addrspace(1) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i16>, ptr addrspace(1) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <32 x i32>, ptr addrspace(1) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %29 = load <64 x i1>, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %30 = load <64 x i8>, ptr addrspace(1) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <64 x i16>, ptr addrspace(1) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = load <64 x i32>, ptr addrspace(1) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %33 = load <128 x i1>, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %34 = load <128 x i8>, ptr addrspace(1) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <128 x i16>, ptr addrspace(1) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %36 = load <128 x i32>, ptr addrspace(1) poison, align 512 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + load <1 x i1>, ptr addrspace(1) poison + load <1 x i8>, ptr addrspace(1) poison + load <1 x i16>, ptr addrspace(1) poison + load <1 x i32>, ptr addrspace(1) poison + + load <2 x i1>, ptr addrspace(1) poison + load <2 x i8>, ptr addrspace(1) poison + load <2 x i16>, ptr addrspace(1) poison + load <2 x i32>, ptr addrspace(1) poison + + load <3 x i1>, ptr addrspace(1) poison + load <3 x i8>, ptr addrspace(1) poison + load <3 x i16>, ptr addrspace(1) poison + load <3 x i32>, ptr addrspace(1) poison + + load <4 x i1>, ptr addrspace(1) poison + load <4 x i8>, ptr addrspace(1) poison + load <4 x i16>, ptr addrspace(1) poison + load <4 x i32>, ptr addrspace(1) poison + + load <8 x i1>, ptr addrspace(1) poison + load <8 x i8>, ptr addrspace(1) poison + load <8 x i16>, ptr addrspace(1) poison + load <8 x i32>, ptr addrspace(1) poison + + load <16 x i1>, ptr addrspace(1) poison + load <16 x i8>, ptr addrspace(1) poison + load <16 x i16>, ptr addrspace(1) poison + load <16 x i32>, ptr addrspace(1) poison + + load <32 x i1>, ptr addrspace(1) poison + load <32 x i8>, ptr addrspace(1) poison + load <32 x i16>, ptr addrspace(1) poison + load <32 x i32>, ptr addrspace(1) poison + + load <64 x i1>, ptr addrspace(1) poison + load <64 x i8>, ptr addrspace(1) poison + load <64 x i16>, ptr addrspace(1) poison + load <64 x i32>, ptr addrspace(1) poison + + load <128 x i1>, ptr addrspace(1) poison + load <128 x i8>, ptr addrspace(1) poison + load <128 x i16>, ptr addrspace(1) poison + load <128 x i32>, ptr addrspace(1) poison + ret void +} + +define void @loads_addrspace_3(i32 %arg) { +; GFX90A-LABEL: 'loads_addrspace_3' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i1>, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <1 x i8>, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load <1 x i16>, ptr addrspace(3) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load <1 x i32>, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load <2 x i1>, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = load <2 x i8>, ptr addrspace(3) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <2 x i16>, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i32>, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = load <3 x i1>, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <3 x i8>, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = load <3 x i16>, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x i32>, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <4 x i1>, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <4 x i8>, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i16>, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i32>, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <8 x i1>, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %18 = load <8 x i8>, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <8 x i16>, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <8 x i32>, ptr addrspace(3) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %21 = load <16 x i1>, ptr addrspace(3) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %22 = load <16 x i8>, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <16 x i16>, ptr addrspace(3) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <16 x i32>, ptr addrspace(3) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %25 = load <32 x i1>, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %26 = load <32 x i8>, ptr addrspace(3) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i16>, ptr addrspace(3) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <32 x i32>, ptr addrspace(3) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %29 = load <64 x i1>, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %30 = load <64 x i8>, ptr addrspace(3) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <64 x i16>, ptr addrspace(3) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = load <64 x i32>, ptr addrspace(3) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %33 = load <128 x i1>, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %34 = load <128 x i8>, ptr addrspace(3) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <128 x i16>, ptr addrspace(3) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %36 = load <128 x i32>, ptr addrspace(3) poison, align 512 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + load <1 x i1>, ptr addrspace(3) poison + load <1 x i8>, ptr addrspace(3) poison + load <1 x i16>, ptr addrspace(3) poison + load <1 x i32>, ptr addrspace(3) poison + + load <2 x i1>, ptr addrspace(3) poison + load <2 x i8>, ptr addrspace(3) poison + load <2 x i16>, ptr addrspace(3) poison + load <2 x i32>, ptr addrspace(3) poison + + load <3 x i1>, ptr addrspace(3) poison + load <3 x i8>, ptr addrspace(3) poison + load <3 x i16>, ptr addrspace(3) poison + load <3 x i32>, ptr addrspace(3) poison + + load <4 x i1>, ptr addrspace(3) poison + load <4 x i8>, ptr addrspace(3) poison + load <4 x i16>, ptr addrspace(3) poison + load <4 x i32>, ptr addrspace(3) poison + + load <8 x i1>, ptr addrspace(3) poison + load <8 x i8>, ptr addrspace(3) poison + load <8 x i16>, ptr addrspace(3) poison + load <8 x i32>, ptr addrspace(3) poison + + load <16 x i1>, ptr addrspace(3) poison + load <16 x i8>, ptr addrspace(3) poison + load <16 x i16>, ptr addrspace(3) poison + load <16 x i32>, ptr addrspace(3) poison + + load <32 x i1>, ptr addrspace(3) poison + load <32 x i8>, ptr addrspace(3) poison + load <32 x i16>, ptr addrspace(3) poison + load <32 x i32>, ptr addrspace(3) poison + + load <64 x i1>, ptr addrspace(3) poison + load <64 x i8>, ptr addrspace(3) poison + load <64 x i16>, ptr addrspace(3) poison + load <64 x i32>, ptr addrspace(3) poison + + load <128 x i1>, ptr addrspace(3) poison + load <128 x i8>, ptr addrspace(3) poison + load <128 x i16>, ptr addrspace(3) poison + load <128 x i32>, ptr addrspace(3) poison + ret void +} + +define void @loads_addrspace_5(i32 %arg) { +; GFX90A-LABEL: 'loads_addrspace_5' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i1>, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <1 x i8>, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load <1 x i16>, ptr addrspace(5) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load <1 x i32>, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load <2 x i1>, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = load <2 x i8>, ptr addrspace(5) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <2 x i16>, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i32>, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = load <3 x i1>, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <3 x i8>, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = load <3 x i16>, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x i32>, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <4 x i1>, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <4 x i8>, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i16>, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i32>, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <8 x i1>, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %18 = load <8 x i8>, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <8 x i16>, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <8 x i32>, ptr addrspace(5) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %21 = load <16 x i1>, ptr addrspace(5) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %22 = load <16 x i8>, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <16 x i16>, ptr addrspace(5) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <16 x i32>, ptr addrspace(5) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %25 = load <32 x i1>, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %26 = load <32 x i8>, ptr addrspace(5) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i16>, ptr addrspace(5) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <32 x i32>, ptr addrspace(5) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %29 = load <64 x i1>, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %30 = load <64 x i8>, ptr addrspace(5) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <64 x i16>, ptr addrspace(5) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = load <64 x i32>, ptr addrspace(5) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %33 = load <128 x i1>, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %34 = load <128 x i8>, ptr addrspace(5) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <128 x i16>, ptr addrspace(5) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %36 = load <128 x i32>, ptr addrspace(5) poison, align 512 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + load <1 x i1>, ptr addrspace(5) poison + load <1 x i8>, ptr addrspace(5) poison + load <1 x i16>, ptr addrspace(5) poison + load <1 x i32>, ptr addrspace(5) poison + + load <2 x i1>, ptr addrspace(5) poison + load <2 x i8>, ptr addrspace(5) poison + load <2 x i16>, ptr addrspace(5) poison + load <2 x i32>, ptr addrspace(5) poison + + load <3 x i1>, ptr addrspace(5) poison + load <3 x i8>, ptr addrspace(5) poison + load <3 x i16>, ptr addrspace(5) poison + load <3 x i32>, ptr addrspace(5) poison + + load <4 x i1>, ptr addrspace(5) poison + load <4 x i8>, ptr addrspace(5) poison + load <4 x i16>, ptr addrspace(5) poison + load <4 x i32>, ptr addrspace(5) poison + + load <8 x i1>, ptr addrspace(5) poison + load <8 x i8>, ptr addrspace(5) poison + load <8 x i16>, ptr addrspace(5) poison + load <8 x i32>, ptr addrspace(5) poison + + load <16 x i1>, ptr addrspace(5) poison + load <16 x i8>, ptr addrspace(5) poison + load <16 x i16>, ptr addrspace(5) poison + load <16 x i32>, ptr addrspace(5) poison + + load <32 x i1>, ptr addrspace(5) poison + load <32 x i8>, ptr addrspace(5) poison + load <32 x i16>, ptr addrspace(5) poison + load <32 x i32>, ptr addrspace(5) poison + + load <64 x i1>, ptr addrspace(5) poison + load <64 x i8>, ptr addrspace(5) poison + load <64 x i16>, ptr addrspace(5) poison + load <64 x i32>, ptr addrspace(5) poison + + load <128 x i1>, ptr addrspace(5) poison + load <128 x i8>, ptr addrspace(5) poison + load <128 x i16>, ptr addrspace(5) poison + load <128 x i32>, ptr addrspace(5) poison + ret void +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/store.ll b/llvm/test/Analysis/CostModel/AMDGPU/store.ll new file mode 100644 index 0000000000000..9672c3256c751 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/store.ll @@ -0,0 +1,411 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck %s --check-prefixes=GFX90A + +define void @stores_i1(i32 %arg) { +; GFX90A-LABEL: 'stores_i1' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i1> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i1> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + store i1 poison, ptr poison + store <2 x i1> poison, ptr poison + store <3 x i1> poison, ptr poison + store <4 x i1> poison, ptr poison + + ret void +} + +define void @stores_i8(i32 %arg) { +; GFX90A-LABEL: 'stores_i8' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> poison, ptr poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + store i8 poison, ptr poison + store <2 x i8> poison, ptr poison + store <3 x i8> poison, ptr poison + store <4 x i8> poison, ptr poison + + store i8 poison, ptr poison, align 1 + store <2 x i8> poison, ptr poison, align 1 + store <3 x i8> poison, ptr poison, align 1 + store <4 x i8> poison, ptr poison, align 1 + + store i8 poison, ptr poison, align 4 + store <2 x i8> poison, ptr poison, align 4 + store <3 x i8> poison, ptr poison, align 4 + store <4 x i8> poison, ptr poison, align 4 + + ret void +} + +define void @stores_i16(i32 %arg) { +; GFX90A-LABEL: 'stores_i16' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 poison, ptr poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i16> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i16> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i16> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i16> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + store i16 poison, ptr poison + store <2 x i16> poison, ptr poison + store <3 x i16> poison, ptr poison + store <4 x i16> poison, ptr poison + + store i16 poison, ptr poison, align 1 + store <2 x i16> poison, ptr poison, align 1 + store <3 x i16> poison, ptr poison, align 1 + store <4 x i16> poison, ptr poison, align 1 + + store i16 poison, ptr poison, align 4 + store <2 x i16> poison, ptr poison, align 4 + store <3 x i16> poison, ptr poison, align 4 + store <4 x i16> poison, ptr poison, align 4 + + store i16 poison, ptr poison, align 8 + store <2 x i16> poison, ptr poison, align 8 + store <3 x i16> poison, ptr poison, align 8 + store <4 x i16> poison, ptr poison, align 8 + + ret void +} + +define void @stores_i32(i32 %arg) { +; GFX90A-LABEL: 'stores_i32' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> poison, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 poison, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> poison, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> poison, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + store i32 poison, ptr poison + store <2 x i32> poison, ptr poison + store <3 x i32> poison, ptr poison + store <4 x i32> poison, ptr poison + + store i32 poison, ptr poison, align 1 + store <2 x i32> poison, ptr poison, align 1 + store <3 x i32> poison, ptr poison, align 1 + store <4 x i32> poison, ptr poison, align 1 + + store i32 poison, ptr poison, align 4 + store <2 x i32> poison, ptr poison, align 4 + store <3 x i32> poison, ptr poison, align 4 + store <4 x i32> poison, ptr poison, align 4 + + store i32 poison, ptr poison, align 8 + store <2 x i32> poison, ptr poison, align 8 + store <3 x i32> poison, ptr poison, align 8 + store <4 x i32> poison, ptr poison, align 8 + + store i32 poison, ptr poison, align 16 + store <2 x i32> poison, ptr poison, align 16 + store <3 x i32> poison, ptr poison, align 16 + store <4 x i32> poison, ptr poison, align 16 + + ret void +} + +define void @stores_addrspace_1(i32 %arg) { +; GFX90A-LABEL: 'stores_addrspace_1' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> poison, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> poison, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> poison, ptr addrspace(1) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> poison, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> poison, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> poison, ptr addrspace(1) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> poison, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> poison, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i1> poison, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> poison, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i16> poison, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> poison, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i1> poison, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> poison, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> poison, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i1> poison, ptr addrspace(1) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> poison, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> poison, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> poison, ptr addrspace(1) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i1> poison, ptr addrspace(1) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> poison, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> poison, ptr addrspace(1) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> poison, ptr addrspace(1) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i1> poison, ptr addrspace(1) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i8> poison, ptr addrspace(1) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> poison, ptr addrspace(1) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i32> poison, ptr addrspace(1) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i1> poison, ptr addrspace(1) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i8> poison, ptr addrspace(1) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i16> poison, ptr addrspace(1) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i32> poison, ptr addrspace(1) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <128 x i1> poison, ptr addrspace(1) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <128 x i8> poison, ptr addrspace(1) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <128 x i16> poison, ptr addrspace(1) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <128 x i32> poison, ptr addrspace(1) poison, align 512 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + store <1 x i1> poison, ptr addrspace(1) poison + store <1 x i8> poison, ptr addrspace(1) poison + store <1 x i16> poison, ptr addrspace(1) poison + store <1 x i32> poison, ptr addrspace(1) poison + + store <2 x i1> poison, ptr addrspace(1) poison + store <2 x i8> poison, ptr addrspace(1) poison + store <2 x i16> poison, ptr addrspace(1) poison + store <2 x i32> poison, ptr addrspace(1) poison + + store <3 x i1> poison, ptr addrspace(1) poison + store <3 x i8> poison, ptr addrspace(1) poison + store <3 x i16> poison, ptr addrspace(1) poison + store <3 x i32> poison, ptr addrspace(1) poison + + store <4 x i1> poison, ptr addrspace(1) poison + store <4 x i8> poison, ptr addrspace(1) poison + store <4 x i16> poison, ptr addrspace(1) poison + store <4 x i32> poison, ptr addrspace(1) poison + + store <8 x i1> poison, ptr addrspace(1) poison + store <8 x i8> poison, ptr addrspace(1) poison + store <8 x i16> poison, ptr addrspace(1) poison + store <8 x i32> poison, ptr addrspace(1) poison + + store <16 x i1> poison, ptr addrspace(1) poison + store <16 x i8> poison, ptr addrspace(1) poison + store <16 x i16> poison, ptr addrspace(1) poison + store <16 x i32> poison, ptr addrspace(1) poison + + store <32 x i1> poison, ptr addrspace(1) poison + store <32 x i8> poison, ptr addrspace(1) poison + store <32 x i16> poison, ptr addrspace(1) poison + store <32 x i32> poison, ptr addrspace(1) poison + + store <64 x i1> poison, ptr addrspace(1) poison + store <64 x i8> poison, ptr addrspace(1) poison + store <64 x i16> poison, ptr addrspace(1) poison + store <64 x i32> poison, ptr addrspace(1) poison + + store <128 x i1> poison, ptr addrspace(1) poison + store <128 x i8> poison, ptr addrspace(1) poison + store <128 x i16> poison, ptr addrspace(1) poison + store <128 x i32> poison, ptr addrspace(1) poison + + ret void +} + +define void @stores_addrspace_3(i32 %arg) { +; GFX90A-LABEL: 'stores_addrspace_3' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> poison, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> poison, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> poison, ptr addrspace(3) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> poison, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> poison, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> poison, ptr addrspace(3) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> poison, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> poison, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i1> poison, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> poison, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i16> poison, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> poison, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i1> poison, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> poison, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> poison, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i1> poison, ptr addrspace(3) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> poison, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> poison, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> poison, ptr addrspace(3) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i1> poison, ptr addrspace(3) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> poison, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> poison, ptr addrspace(3) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> poison, ptr addrspace(3) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i1> poison, ptr addrspace(3) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i8> poison, ptr addrspace(3) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> poison, ptr addrspace(3) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i32> poison, ptr addrspace(3) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i1> poison, ptr addrspace(3) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i8> poison, ptr addrspace(3) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i16> poison, ptr addrspace(3) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i32> poison, ptr addrspace(3) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <128 x i1> poison, ptr addrspace(3) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <128 x i8> poison, ptr addrspace(3) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <128 x i16> poison, ptr addrspace(3) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <128 x i32> poison, ptr addrspace(3) poison, align 512 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + store <1 x i1> poison, ptr addrspace(3) poison + store <1 x i8> poison, ptr addrspace(3) poison + store <1 x i16> poison, ptr addrspace(3) poison + store <1 x i32> poison, ptr addrspace(3) poison + + store <2 x i1> poison, ptr addrspace(3) poison + store <2 x i8> poison, ptr addrspace(3) poison + store <2 x i16> poison, ptr addrspace(3) poison + store <2 x i32> poison, ptr addrspace(3) poison + + store <3 x i1> poison, ptr addrspace(3) poison + store <3 x i8> poison, ptr addrspace(3) poison + store <3 x i16> poison, ptr addrspace(3) poison + store <3 x i32> poison, ptr addrspace(3) poison + + store <4 x i1> poison, ptr addrspace(3) poison + store <4 x i8> poison, ptr addrspace(3) poison + store <4 x i16> poison, ptr addrspace(3) poison + store <4 x i32> poison, ptr addrspace(3) poison + + store <8 x i1> poison, ptr addrspace(3) poison + store <8 x i8> poison, ptr addrspace(3) poison + store <8 x i16> poison, ptr addrspace(3) poison + store <8 x i32> poison, ptr addrspace(3) poison + + store <16 x i1> poison, ptr addrspace(3) poison + store <16 x i8> poison, ptr addrspace(3) poison + store <16 x i16> poison, ptr addrspace(3) poison + store <16 x i32> poison, ptr addrspace(3) poison + + store <32 x i1> poison, ptr addrspace(3) poison + store <32 x i8> poison, ptr addrspace(3) poison + store <32 x i16> poison, ptr addrspace(3) poison + store <32 x i32> poison, ptr addrspace(3) poison + + store <64 x i1> poison, ptr addrspace(3) poison + store <64 x i8> poison, ptr addrspace(3) poison + store <64 x i16> poison, ptr addrspace(3) poison + store <64 x i32> poison, ptr addrspace(3) poison + + store <128 x i1> poison, ptr addrspace(3) poison + store <128 x i8> poison, ptr addrspace(3) poison + store <128 x i16> poison, ptr addrspace(3) poison + store <128 x i32> poison, ptr addrspace(3) poison + + ret void +} + +define void @stores_addrspace_5(i32 %arg) { +; GFX90A-LABEL: 'stores_addrspace_5' +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> poison, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> poison, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> poison, ptr addrspace(5) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> poison, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i1> poison, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> poison, ptr addrspace(5) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> poison, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> poison, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i1> poison, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> poison, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i16> poison, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i32> poison, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i1> poison, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> poison, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> poison, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i1> poison, ptr addrspace(5) poison, align 1 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> poison, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> poison, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> poison, ptr addrspace(5) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i1> poison, ptr addrspace(5) poison, align 2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> poison, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> poison, ptr addrspace(5) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> poison, ptr addrspace(5) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i1> poison, ptr addrspace(5) poison, align 4 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i8> poison, ptr addrspace(5) poison, align 32 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> poison, ptr addrspace(5) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i32> poison, ptr addrspace(5) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i1> poison, ptr addrspace(5) poison, align 8 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i8> poison, ptr addrspace(5) poison, align 64 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i16> poison, ptr addrspace(5) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i32> poison, ptr addrspace(5) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <128 x i1> poison, ptr addrspace(5) poison, align 16 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <128 x i8> poison, ptr addrspace(5) poison, align 128 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <128 x i16> poison, ptr addrspace(5) poison, align 256 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <128 x i32> poison, ptr addrspace(5) poison, align 512 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; + store <1 x i1> poison, ptr addrspace(5) poison + store <1 x i8> poison, ptr addrspace(5) poison + store <1 x i16> poison, ptr addrspace(5) poison + store <1 x i32> poison, ptr addrspace(5) poison + + store <2 x i1> poison, ptr addrspace(5) poison + store <2 x i8> poison, ptr addrspace(5) poison + store <2 x i16> poison, ptr addrspace(5) poison + store <2 x i32> poison, ptr addrspace(5) poison + + store <3 x i1> poison, ptr addrspace(5) poison + store <3 x i8> poison, ptr addrspace(5) poison + store <3 x i16> poison, ptr addrspace(5) poison + store <3 x i32> poison, ptr addrspace(5) poison + + store <4 x i1> poison, ptr addrspace(5) poison + store <4 x i8> poison, ptr addrspace(5) poison + store <4 x i16> poison, ptr addrspace(5) poison + store <4 x i32> poison, ptr addrspace(5) poison + + store <8 x i1> poison, ptr addrspace(5) poison + store <8 x i8> poison, ptr addrspace(5) poison + store <8 x i16> poison, ptr addrspace(5) poison + store <8 x i32> poison, ptr addrspace(5) poison + + store <16 x i1> poison, ptr addrspace(5) poison + store <16 x i8> poison, ptr addrspace(5) poison + store <16 x i16> poison, ptr addrspace(5) poison + store <16 x i32> poison, ptr addrspace(5) poison + + store <32 x i1> poison, ptr addrspace(5) poison + store <32 x i8> poison, ptr addrspace(5) poison + store <32 x i16> poison, ptr addrspace(5) poison + store <32 x i32> poison, ptr addrspace(5) poison + + store <64 x i1> poison, ptr addrspace(5) poison + store <64 x i8> poison, ptr addrspace(5) poison + store <64 x i16> poison, ptr addrspace(5) poison + store <64 x i32> poison, ptr addrspace(5) poison + + store <128 x i1> poison, ptr addrspace(5) poison + store <128 x i8> poison, ptr addrspace(5) poison + store <128 x i16> poison, ptr addrspace(5) poison + store <128 x i32> poison, ptr addrspace(5) poison + + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/vectorize-i8.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/vectorize-i8.ll new file mode 100644 index 0000000000000..b9b1bc1be681e --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/vectorize-i8.ll @@ -0,0 +1,1143 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX7 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX8 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX9 %s + +define protected amdgpu_kernel void @arith_2(<16 x i8> %invec, ptr %out, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @arith_2( +; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX7-NEXT: [[ENTRY:.*:]] +; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX7-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @arith_2( +; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX8-NEXT: [[ENTRY:.*:]] +; GFX8-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX8-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX8-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX8-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX8-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX8-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX8-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX8-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @arith_2( +; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX9-NEXT: [[ENTRY:.*:]] +; GFX9-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX9-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX9-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX9-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX9-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX9-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX9-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX9-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX9-NEXT: ret void +; +entry: + %el0 = extractelement <16 x i8> %invec, i64 0 + %el1 = extractelement <16 x i8> %invec, i64 1 + %mul0 = mul i8 %el0, 1 + %mul1 = mul i8 %el1, 1 + %add0 = add i8 %mul0, 1 + %add1 = add i8 %mul1, 1 + %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0 + %vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1 + store <16 x i8> %vecins1, ptr %out + ret void +} + +define protected amdgpu_kernel void @arith_3(<16 x i8> %invec, ptr %out, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @arith_3( +; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[ENTRY:.*:]] +; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX7-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX7-NEXT: [[MUL2:%.*]] = mul i8 [[EL0]], 1 +; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX7-NEXT: [[MUL3:%.*]] = mul i8 [[EL2]], 1 +; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD2]], i64 0 +; GFX7-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX7-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD3]], i64 2 +; GFX7-NEXT: store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @arith_3( +; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[ENTRY:.*:]] +; GFX8-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX8-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX8-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX8-NEXT: [[MUL2:%.*]] = mul i8 [[EL0]], 1 +; GFX8-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX8-NEXT: [[MUL3:%.*]] = mul i8 [[EL2]], 1 +; GFX8-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX8-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX8-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX8-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD2]], i64 0 +; GFX8-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX8-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD3]], i64 2 +; GFX8-NEXT: store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @arith_3( +; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX9-NEXT: [[ENTRY:.*:]] +; GFX9-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX9-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX9-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX9-NEXT: [[MUL2:%.*]] = mul i8 [[EL0]], 1 +; GFX9-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX9-NEXT: [[MUL3:%.*]] = mul i8 [[EL2]], 1 +; GFX9-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX9-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX9-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX9-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD2]], i64 0 +; GFX9-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX9-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD3]], i64 2 +; GFX9-NEXT: store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16 +; GFX9-NEXT: ret void +; +entry: + %el0 = extractelement <16 x i8> %invec, i64 0 + %el1 = extractelement <16 x i8> %invec, i64 1 + %el2 = extractelement <16 x i8> %invec, i64 2 + %mul0 = mul i8 %el0, 1 + %mul1 = mul i8 %el1, 1 + %mul2 = mul i8 %el2, 1 + %add0 = add i8 %mul0, 1 + %add1 = add i8 %mul1, 1 + %add2 = add i8 %mul2, 1 + %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0 + %vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1 + %vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2 + store <16 x i8> %vecins2, ptr %out + ret void +} + +define protected amdgpu_kernel void @arith_4(<16 x i8> %invec, ptr %out, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @arith_4( +; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[ENTRY:.*:]] +; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX7-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX7-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3 +; GFX7-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX7-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1 +; GFX7-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1 +; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX7-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX7-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2 +; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3 +; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @arith_4( +; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[ENTRY:.*:]] +; GFX8-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX8-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX8-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX8-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3 +; GFX8-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX8-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX8-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1 +; GFX8-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1 +; GFX8-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX8-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX8-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX8-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX8-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX8-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX8-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2 +; GFX8-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3 +; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @arith_4( +; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX9-NEXT: [[ENTRY:.*:]] +; GFX9-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX9-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX9-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX9-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3 +; GFX9-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX9-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX9-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1 +; GFX9-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1 +; GFX9-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX9-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX9-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX9-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX9-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX9-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX9-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2 +; GFX9-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3 +; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX9-NEXT: ret void +; +entry: + %el0 = extractelement <16 x i8> %invec, i64 0 + %el1 = extractelement <16 x i8> %invec, i64 1 + %el2 = extractelement <16 x i8> %invec, i64 2 + %el3 = extractelement <16 x i8> %invec, i64 3 + %mul0 = mul i8 %el0, 1 + %mul1 = mul i8 %el1, 1 + %mul2 = mul i8 %el2, 1 + %mul3 = mul i8 %el3, 1 + %add0 = add i8 %mul0, 1 + %add1 = add i8 %mul1, 1 + %add2 = add i8 %mul2, 1 + %add3 = add i8 %mul3, 1 + %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0 + %vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1 + %vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2 + %vecins3 = insertelement <16 x i8> %vecins2, i8 %add3, i64 3 + store <16 x i8> %vecins3, ptr %out + ret void +} + +define protected amdgpu_kernel void @arith_16(<16 x i8> %invec, ptr %out, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @arith_16( +; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[ENTRY:.*:]] +; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX7-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX7-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3 +; GFX7-NEXT: [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4 +; GFX7-NEXT: [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5 +; GFX7-NEXT: [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6 +; GFX7-NEXT: [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7 +; GFX7-NEXT: [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8 +; GFX7-NEXT: [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9 +; GFX7-NEXT: [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10 +; GFX7-NEXT: [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11 +; GFX7-NEXT: [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12 +; GFX7-NEXT: [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13 +; GFX7-NEXT: [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14 +; GFX7-NEXT: [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15 +; GFX7-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX7-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1 +; GFX7-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1 +; GFX7-NEXT: [[MUL4:%.*]] = mul i8 [[EL4]], 1 +; GFX7-NEXT: [[MUL5:%.*]] = mul i8 [[EL5]], 1 +; GFX7-NEXT: [[MUL6:%.*]] = mul i8 [[EL6]], 1 +; GFX7-NEXT: [[MUL7:%.*]] = mul i8 [[EL7]], 1 +; GFX7-NEXT: [[MUL8:%.*]] = mul i8 [[EL8]], 1 +; GFX7-NEXT: [[MUL9:%.*]] = mul i8 [[EL9]], 1 +; GFX7-NEXT: [[MUL10:%.*]] = mul i8 [[EL10]], 1 +; GFX7-NEXT: [[MUL11:%.*]] = mul i8 [[EL11]], 1 +; GFX7-NEXT: [[MUL12:%.*]] = mul i8 [[EL12]], 1 +; GFX7-NEXT: [[MUL13:%.*]] = mul i8 [[EL13]], 1 +; GFX7-NEXT: [[MUL14:%.*]] = mul i8 [[EL14]], 1 +; GFX7-NEXT: [[MUL15:%.*]] = mul i8 [[EL15]], 1 +; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX7-NEXT: [[ADD4:%.*]] = add i8 [[MUL4]], 1 +; GFX7-NEXT: [[ADD5:%.*]] = add i8 [[MUL5]], 1 +; GFX7-NEXT: [[ADD6:%.*]] = add i8 [[MUL6]], 1 +; GFX7-NEXT: [[ADD7:%.*]] = add i8 [[MUL7]], 1 +; GFX7-NEXT: [[ADD8:%.*]] = add i8 [[MUL8]], 1 +; GFX7-NEXT: [[ADD9:%.*]] = add i8 [[MUL9]], 1 +; GFX7-NEXT: [[ADD10:%.*]] = add i8 [[MUL10]], 1 +; GFX7-NEXT: [[ADD11:%.*]] = add i8 [[MUL11]], 1 +; GFX7-NEXT: [[ADD12:%.*]] = add i8 [[MUL12]], 1 +; GFX7-NEXT: [[ADD13:%.*]] = add i8 [[MUL13]], 1 +; GFX7-NEXT: [[ADD14:%.*]] = add i8 [[MUL14]], 1 +; GFX7-NEXT: [[ADD15:%.*]] = add i8 [[MUL15]], 1 +; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX7-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX7-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2 +; GFX7-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3 +; GFX7-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4 +; GFX7-NEXT: [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5 +; GFX7-NEXT: [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6 +; GFX7-NEXT: [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7 +; GFX7-NEXT: [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8 +; GFX7-NEXT: [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9 +; GFX7-NEXT: [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10 +; GFX7-NEXT: [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11 +; GFX7-NEXT: [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12 +; GFX7-NEXT: [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13 +; GFX7-NEXT: [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14 +; GFX7-NEXT: [[VECINS153:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15 +; GFX7-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @arith_16( +; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[ENTRY:.*:]] +; GFX8-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX8-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX8-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX8-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3 +; GFX8-NEXT: [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4 +; GFX8-NEXT: [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5 +; GFX8-NEXT: [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6 +; GFX8-NEXT: [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7 +; GFX8-NEXT: [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8 +; GFX8-NEXT: [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9 +; GFX8-NEXT: [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10 +; GFX8-NEXT: [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11 +; GFX8-NEXT: [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12 +; GFX8-NEXT: [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13 +; GFX8-NEXT: [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14 +; GFX8-NEXT: [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15 +; GFX8-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX8-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX8-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1 +; GFX8-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1 +; GFX8-NEXT: [[MUL4:%.*]] = mul i8 [[EL4]], 1 +; GFX8-NEXT: [[MUL5:%.*]] = mul i8 [[EL5]], 1 +; GFX8-NEXT: [[MUL6:%.*]] = mul i8 [[EL6]], 1 +; GFX8-NEXT: [[MUL7:%.*]] = mul i8 [[EL7]], 1 +; GFX8-NEXT: [[MUL8:%.*]] = mul i8 [[EL8]], 1 +; GFX8-NEXT: [[MUL9:%.*]] = mul i8 [[EL9]], 1 +; GFX8-NEXT: [[MUL10:%.*]] = mul i8 [[EL10]], 1 +; GFX8-NEXT: [[MUL11:%.*]] = mul i8 [[EL11]], 1 +; GFX8-NEXT: [[MUL12:%.*]] = mul i8 [[EL12]], 1 +; GFX8-NEXT: [[MUL13:%.*]] = mul i8 [[EL13]], 1 +; GFX8-NEXT: [[MUL14:%.*]] = mul i8 [[EL14]], 1 +; GFX8-NEXT: [[MUL15:%.*]] = mul i8 [[EL15]], 1 +; GFX8-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX8-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX8-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX8-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX8-NEXT: [[ADD4:%.*]] = add i8 [[MUL4]], 1 +; GFX8-NEXT: [[ADD5:%.*]] = add i8 [[MUL5]], 1 +; GFX8-NEXT: [[ADD6:%.*]] = add i8 [[MUL6]], 1 +; GFX8-NEXT: [[ADD7:%.*]] = add i8 [[MUL7]], 1 +; GFX8-NEXT: [[ADD8:%.*]] = add i8 [[MUL8]], 1 +; GFX8-NEXT: [[ADD9:%.*]] = add i8 [[MUL9]], 1 +; GFX8-NEXT: [[ADD10:%.*]] = add i8 [[MUL10]], 1 +; GFX8-NEXT: [[ADD11:%.*]] = add i8 [[MUL11]], 1 +; GFX8-NEXT: [[ADD12:%.*]] = add i8 [[MUL12]], 1 +; GFX8-NEXT: [[ADD13:%.*]] = add i8 [[MUL13]], 1 +; GFX8-NEXT: [[ADD14:%.*]] = add i8 [[MUL14]], 1 +; GFX8-NEXT: [[ADD15:%.*]] = add i8 [[MUL15]], 1 +; GFX8-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX8-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX8-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2 +; GFX8-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3 +; GFX8-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4 +; GFX8-NEXT: [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5 +; GFX8-NEXT: [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6 +; GFX8-NEXT: [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7 +; GFX8-NEXT: [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8 +; GFX8-NEXT: [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9 +; GFX8-NEXT: [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10 +; GFX8-NEXT: [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11 +; GFX8-NEXT: [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12 +; GFX8-NEXT: [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13 +; GFX8-NEXT: [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14 +; GFX8-NEXT: [[VECINS153:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15 +; GFX8-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @arith_16( +; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX9-NEXT: [[ENTRY:.*:]] +; GFX9-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0 +; GFX9-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1 +; GFX9-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2 +; GFX9-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3 +; GFX9-NEXT: [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4 +; GFX9-NEXT: [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5 +; GFX9-NEXT: [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6 +; GFX9-NEXT: [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7 +; GFX9-NEXT: [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8 +; GFX9-NEXT: [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9 +; GFX9-NEXT: [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10 +; GFX9-NEXT: [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11 +; GFX9-NEXT: [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12 +; GFX9-NEXT: [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13 +; GFX9-NEXT: [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14 +; GFX9-NEXT: [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15 +; GFX9-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1 +; GFX9-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1 +; GFX9-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1 +; GFX9-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1 +; GFX9-NEXT: [[MUL4:%.*]] = mul i8 [[EL4]], 1 +; GFX9-NEXT: [[MUL5:%.*]] = mul i8 [[EL5]], 1 +; GFX9-NEXT: [[MUL6:%.*]] = mul i8 [[EL6]], 1 +; GFX9-NEXT: [[MUL7:%.*]] = mul i8 [[EL7]], 1 +; GFX9-NEXT: [[MUL8:%.*]] = mul i8 [[EL8]], 1 +; GFX9-NEXT: [[MUL9:%.*]] = mul i8 [[EL9]], 1 +; GFX9-NEXT: [[MUL10:%.*]] = mul i8 [[EL10]], 1 +; GFX9-NEXT: [[MUL11:%.*]] = mul i8 [[EL11]], 1 +; GFX9-NEXT: [[MUL12:%.*]] = mul i8 [[EL12]], 1 +; GFX9-NEXT: [[MUL13:%.*]] = mul i8 [[EL13]], 1 +; GFX9-NEXT: [[MUL14:%.*]] = mul i8 [[EL14]], 1 +; GFX9-NEXT: [[MUL15:%.*]] = mul i8 [[EL15]], 1 +; GFX9-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1 +; GFX9-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1 +; GFX9-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1 +; GFX9-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1 +; GFX9-NEXT: [[ADD4:%.*]] = add i8 [[MUL4]], 1 +; GFX9-NEXT: [[ADD5:%.*]] = add i8 [[MUL5]], 1 +; GFX9-NEXT: [[ADD6:%.*]] = add i8 [[MUL6]], 1 +; GFX9-NEXT: [[ADD7:%.*]] = add i8 [[MUL7]], 1 +; GFX9-NEXT: [[ADD8:%.*]] = add i8 [[MUL8]], 1 +; GFX9-NEXT: [[ADD9:%.*]] = add i8 [[MUL9]], 1 +; GFX9-NEXT: [[ADD10:%.*]] = add i8 [[MUL10]], 1 +; GFX9-NEXT: [[ADD11:%.*]] = add i8 [[MUL11]], 1 +; GFX9-NEXT: [[ADD12:%.*]] = add i8 [[MUL12]], 1 +; GFX9-NEXT: [[ADD13:%.*]] = add i8 [[MUL13]], 1 +; GFX9-NEXT: [[ADD14:%.*]] = add i8 [[MUL14]], 1 +; GFX9-NEXT: [[ADD15:%.*]] = add i8 [[MUL15]], 1 +; GFX9-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0 +; GFX9-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1 +; GFX9-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2 +; GFX9-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3 +; GFX9-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4 +; GFX9-NEXT: [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5 +; GFX9-NEXT: [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6 +; GFX9-NEXT: [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7 +; GFX9-NEXT: [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8 +; GFX9-NEXT: [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9 +; GFX9-NEXT: [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10 +; GFX9-NEXT: [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11 +; GFX9-NEXT: [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12 +; GFX9-NEXT: [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13 +; GFX9-NEXT: [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14 +; GFX9-NEXT: [[VECINS153:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15 +; GFX9-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16 +; GFX9-NEXT: ret void +; +entry: + %el0 = extractelement <16 x i8> %invec, i64 0 + %el1 = extractelement <16 x i8> %invec, i64 1 + %el2 = extractelement <16 x i8> %invec, i64 2 + %el3 = extractelement <16 x i8> %invec, i64 3 + %el4 = extractelement <16 x i8> %invec, i64 4 + %el5 = extractelement <16 x i8> %invec, i64 5 + %el6 = extractelement <16 x i8> %invec, i64 6 + %el7 = extractelement <16 x i8> %invec, i64 7 + %el8 = extractelement <16 x i8> %invec, i64 8 + %el9 = extractelement <16 x i8> %invec, i64 9 + %el10 = extractelement <16 x i8> %invec, i64 10 + %el11 = extractelement <16 x i8> %invec, i64 11 + %el12 = extractelement <16 x i8> %invec, i64 12 + %el13 = extractelement <16 x i8> %invec, i64 13 + %el14 = extractelement <16 x i8> %invec, i64 14 + %el15 = extractelement <16 x i8> %invec, i64 15 + %mul0 = mul i8 %el0, 1 + %mul1 = mul i8 %el1, 1 + %mul2 = mul i8 %el2, 1 + %mul3 = mul i8 %el3, 1 + %mul4 = mul i8 %el4, 1 + %mul5 = mul i8 %el5, 1 + %mul6 = mul i8 %el6, 1 + %mul7 = mul i8 %el7, 1 + %mul8 = mul i8 %el8, 1 + %mul9 = mul i8 %el9, 1 + %mul10 = mul i8 %el10, 1 + %mul11 = mul i8 %el11, 1 + %mul12 = mul i8 %el12, 1 + %mul13 = mul i8 %el13, 1 + %mul14 = mul i8 %el14, 1 + %mul15 = mul i8 %el15, 1 + %add0 = add i8 %mul0, 1 + %add1 = add i8 %mul1, 1 + %add2 = add i8 %mul2, 1 + %add3 = add i8 %mul3, 1 + %add4 = add i8 %mul4, 1 + %add5 = add i8 %mul5, 1 + %add6 = add i8 %mul6, 1 + %add7 = add i8 %mul7, 1 + %add8 = add i8 %mul8, 1 + %add9 = add i8 %mul9, 1 + %add10 = add i8 %mul10, 1 + %add11 = add i8 %mul11, 1 + %add12 = add i8 %mul12, 1 + %add13 = add i8 %mul13, 1 + %add14 = add i8 %mul14, 1 + %add15 = add i8 %mul15, 1 + %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0 + %vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1 + %vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2 + %vecins3 = insertelement <16 x i8> %vecins2, i8 %add3, i64 3 + %vecins4 = insertelement <16 x i8> %vecins3, i8 %add4, i64 4 + %vecins5 = insertelement <16 x i8> %vecins4, i8 %add5, i64 5 + %vecins6 = insertelement <16 x i8> %vecins5, i8 %add6, i64 6 + %vecins7 = insertelement <16 x i8> %vecins6, i8 %add7, i64 7 + %vecins8 = insertelement <16 x i8> %vecins7, i8 %add8, i64 8 + %vecins9 = insertelement <16 x i8> %vecins8, i8 %add9, i64 9 + %vecins10 = insertelement <16 x i8> %vecins9, i8 %add10, i64 10 + %vecins11 = insertelement <16 x i8> %vecins10, i8 %add11, i64 11 + %vecins12 = insertelement <16 x i8> %vecins11, i8 %add12, i64 12 + %vecins13 = insertelement <16 x i8> %vecins12, i8 %add13, i64 13 + %vecins14 = insertelement <16 x i8> %vecins13, i8 %add14, i64 14 + %vecins15 = insertelement <16 x i8> %vecins14, i8 %add15, i64 15 + store <16 x i8> %vecins15, ptr %out + ret void +} + +define protected amdgpu_kernel void @phi_2(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @phi_2( +; GFX7-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[ENTRY:.*]]: +; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: br label %[[DO_BODY:.*]] +; GFX7: [[DO_BODY]]: +; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX7-NEXT: [[VEC111:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX7-NEXT: store <16 x i8> [[VEC111]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX7: [[EXIT]]: +; GFX7-NEXT: store <16 x i8> [[VEC111]], ptr [[OUT]], align 16 +; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @phi_2( +; GFX8-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[ENTRY:.*]]: +; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX8-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX8-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX8-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: br label %[[DO_BODY:.*]] +; GFX8: [[DO_BODY]]: +; GFX8-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX8-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX8-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX8-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX8-NEXT: [[VEC111:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX8-NEXT: store <16 x i8> [[VEC111]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX8: [[EXIT]]: +; GFX8-NEXT: store <16 x i8> [[VEC111]], ptr [[OUT]], align 16 +; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @phi_2( +; GFX9-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX9-NEXT: [[ENTRY:.*]]: +; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX9-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX9-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX9-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: br label %[[DO_BODY:.*]] +; GFX9: [[DO_BODY]]: +; GFX9-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX9-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX9-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX9-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX9-NEXT: [[VEC111:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX9-NEXT: store <16 x i8> [[VEC111]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX9: [[EXIT]]: +; GFX9-NEXT: store <16 x i8> [[VEC111]], ptr [[OUT]], align 16 +; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX9-NEXT: ret void +; +entry: + %gep0 = getelementptr i8, ptr addrspace(3) %inptr0, i32 0 + %ele0 = load i8, ptr addrspace(3) %gep0, align 8 + %gep1 = getelementptr i8, ptr addrspace(3) %inptr0, i32 1 + %ele1 = load i8, ptr addrspace(3) %gep1, align 1 + br label %do.body + +do.body: + %phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ] + %phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ] + %otherele0 = load i8, ptr addrspace(3) %gep0, align 8 + %otherele1 = load i8, ptr addrspace(3) %gep1, align 1 + %vec00 = insertelement <16 x i8> poison, i8 %otherele0, i64 8 + %vec01 = insertelement <16 x i8> %vec00, i8 %otherele1, i64 9 + %vec10 = insertelement <16 x i8> poison, i8 %phi3, i64 8 + %vec11 = insertelement <16 x i8> %vec10, i8 %phi2, i64 9 + store <16 x i8> %vec11, ptr addrspace(3) %inptr1, align 2 + %cmp = icmp eq i32 %flag, 0 + br i1 %cmp, label %exit, label %do.body + +exit: + store <16 x i8> %vec11, ptr %out + store <16 x i8> %vec01, ptr %out1 + ret void +} + +define protected amdgpu_kernel void @phi_3(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @phi_3( +; GFX7-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[ENTRY:.*]]: +; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX7-NEXT: br label %[[DO_BODY:.*]] +; GFX7: [[DO_BODY]]: +; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX7-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[OTHERELE2]], i64 10 +; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX7-NEXT: [[VEC111:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC111]], i8 [[PHI1]], i64 10 +; GFX7-NEXT: store <16 x i8> [[VEC12]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX7: [[EXIT]]: +; GFX7-NEXT: store <16 x i8> [[VEC12]], ptr [[OUT]], align 16 +; GFX7-NEXT: store <16 x i8> [[VEC02]], ptr [[OUT1]], align 16 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @phi_3( +; GFX8-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[ENTRY:.*]]: +; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX8-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX8-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX8-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX8-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX8-NEXT: br label %[[DO_BODY:.*]] +; GFX8: [[DO_BODY]]: +; GFX8-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX8-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX8-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX8-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX8-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[OTHERELE2]], i64 10 +; GFX8-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX8-NEXT: [[VEC111:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX8-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC111]], i8 [[PHI1]], i64 10 +; GFX8-NEXT: store <16 x i8> [[VEC12]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX8: [[EXIT]]: +; GFX8-NEXT: store <16 x i8> [[VEC12]], ptr [[OUT]], align 16 +; GFX8-NEXT: store <16 x i8> [[VEC02]], ptr [[OUT1]], align 16 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @phi_3( +; GFX9-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX9-NEXT: [[ENTRY:.*]]: +; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX9-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX9-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX9-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX9-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX9-NEXT: br label %[[DO_BODY:.*]] +; GFX9: [[DO_BODY]]: +; GFX9-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX9-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX9-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX9-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX9-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[OTHERELE2]], i64 10 +; GFX9-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX9-NEXT: [[VEC111:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX9-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC111]], i8 [[PHI1]], i64 10 +; GFX9-NEXT: store <16 x i8> [[VEC12]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX9: [[EXIT]]: +; GFX9-NEXT: store <16 x i8> [[VEC12]], ptr [[OUT]], align 16 +; GFX9-NEXT: store <16 x i8> [[VEC02]], ptr [[OUT1]], align 16 +; GFX9-NEXT: ret void +; +entry: + %gep0 = getelementptr i8, ptr addrspace(3) %inptr0, i32 0 + %ele0 = load i8, ptr addrspace(3) %gep0, align 8 + %gep1 = getelementptr i8, ptr addrspace(3) %inptr0, i32 1 + %ele1 = load i8, ptr addrspace(3) %gep1, align 1 + %gep2 = getelementptr i8, ptr addrspace(3) %inptr0, i32 2 + %ele2 = load i8, ptr addrspace(3) %gep2, align 2 + br label %do.body + +do.body: + %phi1 = phi i8 [ %ele2, %entry ], [ %otherele2, %do.body ] + %phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ] + %phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ] + %otherele0 = load i8, ptr addrspace(3) %gep0, align 8 + %otherele1 = load i8, ptr addrspace(3) %gep1, align 1 + %otherele2 = load i8, ptr addrspace(3) %gep2, align 2 + %vec00 = insertelement <16 x i8> poison, i8 %otherele0, i64 8 + %vec01 = insertelement <16 x i8> %vec00, i8 %otherele1, i64 9 + %vec02 = insertelement <16 x i8> %vec01, i8 %otherele2, i64 10 + %vec10 = insertelement <16 x i8> poison, i8 %phi3, i64 8 + %vec11 = insertelement <16 x i8> %vec10, i8 %phi2, i64 9 + %vec12 = insertelement <16 x i8> %vec11, i8 %phi1, i64 10 + store <16 x i8> %vec12, ptr addrspace(3) %inptr1, align 2 + %cmp = icmp eq i32 %flag, 0 + br i1 %cmp, label %exit, label %do.body + +exit: + store <16 x i8> %vec12, ptr %out + store <16 x i8> %vec02, ptr %out1 + ret void +} + +define protected amdgpu_kernel void @phi_4(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @phi_4( +; GFX7-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[ENTRY:.*]]: +; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX7-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3 +; GFX7-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX7-NEXT: br label %[[DO_BODY:.*]] +; GFX7: [[DO_BODY]]: +; GFX7-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX7-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX7-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX7-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10 +; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11 +; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX7-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10 +; GFX7-NEXT: [[VEC131:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11 +; GFX7-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX7: [[EXIT]]: +; GFX7-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT]], align 16 +; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @phi_4( +; GFX8-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[ENTRY:.*]]: +; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX8-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX8-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX8-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX8-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX8-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3 +; GFX8-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX8-NEXT: br label %[[DO_BODY:.*]] +; GFX8: [[DO_BODY]]: +; GFX8-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX8-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX8-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX8-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX8-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX8-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10 +; GFX8-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11 +; GFX8-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX8-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX8-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10 +; GFX8-NEXT: [[VEC131:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11 +; GFX8-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX8: [[EXIT]]: +; GFX8-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT]], align 16 +; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @phi_4( +; GFX9-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX9-NEXT: [[ENTRY:.*]]: +; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX9-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX9-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX9-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX9-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX9-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3 +; GFX9-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX9-NEXT: br label %[[DO_BODY:.*]] +; GFX9: [[DO_BODY]]: +; GFX9-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX9-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX9-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX9-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8 +; GFX9-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9 +; GFX9-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10 +; GFX9-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11 +; GFX9-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX9-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX9-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10 +; GFX9-NEXT: [[VEC131:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11 +; GFX9-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX9: [[EXIT]]: +; GFX9-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT]], align 16 +; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX9-NEXT: ret void +; +entry: + %gep0 = getelementptr i8, ptr addrspace(3) %inptr0, i32 0 + %ele0 = load i8, ptr addrspace(3) %gep0, align 8 + %gep1 = getelementptr i8, ptr addrspace(3) %inptr0, i32 1 + %ele1 = load i8, ptr addrspace(3) %gep1, align 1 + %gep2 = getelementptr i8, ptr addrspace(3) %inptr0, i32 2 + %ele2 = load i8, ptr addrspace(3) %gep2, align 2 + %gep3 = getelementptr i8, ptr addrspace(3) %inptr0, i32 3 + %ele3 = load i8, ptr addrspace(3) %gep3, align 1 + br label %do.body + +do.body: + %phi0 = phi i8 [ %ele3, %entry ], [ %otherele3, %do.body ] + %phi1 = phi i8 [ %ele2, %entry ], [ %otherele2, %do.body ] + %phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ] + %phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ] + %otherele0 = load i8, ptr addrspace(3) %gep0, align 8 + %otherele1 = load i8, ptr addrspace(3) %gep1, align 1 + %otherele2 = load i8, ptr addrspace(3) %gep2, align 2 + %otherele3 = load i8, ptr addrspace(3) %gep3, align 1 + %vec00 = insertelement <16 x i8> poison, i8 %otherele0, i64 8 + %vec01 = insertelement <16 x i8> %vec00, i8 %otherele1, i64 9 + %vec02 = insertelement <16 x i8> %vec01, i8 %otherele2, i64 10 + %vec03 = insertelement <16 x i8> %vec02, i8 %otherele3, i64 11 + %vec10 = insertelement <16 x i8> poison, i8 %phi3, i64 8 + %vec11 = insertelement <16 x i8> %vec10, i8 %phi2, i64 9 + %vec12 = insertelement <16 x i8> %vec11, i8 %phi1, i64 10 + %vec13 = insertelement <16 x i8> %vec12, i8 %phi0, i64 11 + store <16 x i8> %vec13, ptr addrspace(3) %inptr1, align 2 + %cmp = icmp eq i32 %flag, 0 + br i1 %cmp, label %exit, label %do.body + +exit: + store <16 x i8> %vec13, ptr %out + store <16 x i8> %vec03, ptr %out1 + ret void +} + +define protected amdgpu_kernel void @phi_4_with_stores(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @phi_4_with_stores( +; GFX7-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[ENTRY:.*]]: +; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX7-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3 +; GFX7-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX7-NEXT: br label %[[DO_BODY:.*]] +; GFX7: [[DO_BODY]]: +; GFX7-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX7-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX7-NEXT: store i8 [[PHI3]], ptr addrspace(3) [[GEP0]], align 2 +; GFX7-NEXT: store i8 [[PHI2]], ptr addrspace(3) [[GEP1]], align 2 +; GFX7-NEXT: store i8 [[PHI1]], ptr addrspace(3) [[GEP2]], align 2 +; GFX7-NEXT: store i8 [[PHI0]], ptr addrspace(3) [[GEP3]], align 2 +; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX7-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10 +; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11 +; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX7: [[EXIT]]: +; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @phi_4_with_stores( +; GFX8-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[ENTRY:.*]]: +; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX8-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX8-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX8-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX8-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX8-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3 +; GFX8-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX8-NEXT: br label %[[DO_BODY:.*]] +; GFX8: [[DO_BODY]]: +; GFX8-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX8-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX8-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX8-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX8-NEXT: store i8 [[PHI3]], ptr addrspace(3) [[GEP0]], align 2 +; GFX8-NEXT: store i8 [[PHI2]], ptr addrspace(3) [[GEP1]], align 2 +; GFX8-NEXT: store i8 [[PHI1]], ptr addrspace(3) [[GEP2]], align 2 +; GFX8-NEXT: store i8 [[PHI0]], ptr addrspace(3) [[GEP3]], align 2 +; GFX8-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX8-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX8-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10 +; GFX8-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11 +; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX8: [[EXIT]]: +; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @phi_4_with_stores( +; GFX9-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX9-NEXT: [[ENTRY:.*]]: +; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0 +; GFX9-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX9-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1 +; GFX9-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2 +; GFX9-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX9-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3 +; GFX9-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX9-NEXT: br label %[[DO_BODY:.*]] +; GFX9: [[DO_BODY]]: +; GFX9-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8 +; GFX9-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2 +; GFX9-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1 +; GFX9-NEXT: store i8 [[PHI3]], ptr addrspace(3) [[GEP0]], align 2 +; GFX9-NEXT: store i8 [[PHI2]], ptr addrspace(3) [[GEP1]], align 2 +; GFX9-NEXT: store i8 [[PHI1]], ptr addrspace(3) [[GEP2]], align 2 +; GFX9-NEXT: store i8 [[PHI0]], ptr addrspace(3) [[GEP3]], align 2 +; GFX9-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8 +; GFX9-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9 +; GFX9-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10 +; GFX9-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11 +; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr addrspace(3) [[INPTR1]], align 2 +; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX9: [[EXIT]]: +; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16 +; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16 +; GFX9-NEXT: ret void +; +entry: + %gep0 = getelementptr i8, ptr addrspace(3) %inptr0, i32 0 + %ele0 = load i8, ptr addrspace(3) %gep0, align 8 + %gep1 = getelementptr i8, ptr addrspace(3) %inptr0, i32 1 + %ele1 = load i8, ptr addrspace(3) %gep1, align 1 + %gep2 = getelementptr i8, ptr addrspace(3) %inptr0, i32 2 + %ele2 = load i8, ptr addrspace(3) %gep2, align 2 + %gep3 = getelementptr i8, ptr addrspace(3) %inptr0, i32 3 + %ele3 = load i8, ptr addrspace(3) %gep3, align 1 + br label %do.body + +do.body: + %phi0 = phi i8 [ %ele3, %entry ], [ %otherele3, %do.body ] + %phi1 = phi i8 [ %ele2, %entry ], [ %otherele2, %do.body ] + %phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ] + %phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ] + %otherele0 = load i8, ptr addrspace(3) %gep0, align 8 + %otherele1 = load i8, ptr addrspace(3) %gep1, align 1 + %otherele2 = load i8, ptr addrspace(3) %gep2, align 2 + %otherele3 = load i8, ptr addrspace(3) %gep3, align 1 + store i8 %phi3, ptr addrspace(3) %gep0, align 2 + store i8 %phi2, ptr addrspace(3) %gep1, align 2 + store i8 %phi1, ptr addrspace(3) %gep2, align 2 + store i8 %phi0, ptr addrspace(3) %gep3, align 2 + %vec10 = insertelement <16 x i8> poison, i8 %phi3, i64 8 + %vec11 = insertelement <16 x i8> %vec10, i8 %phi2, i64 9 + %vec12 = insertelement <16 x i8> %vec11, i8 %phi1, i64 10 + %vec13 = insertelement <16 x i8> %vec12, i8 %phi0, i64 11 + store <16 x i8> %vec13, ptr addrspace(3) %inptr1, align 2 + %cmp = icmp eq i32 %flag, 0 + br i1 %cmp, label %exit, label %do.body + +exit: + store <16 x i8> %vec13, ptr %out + store <16 x i8> %vec13, ptr %out1 + ret void +} + +define protected amdgpu_kernel void @phi_4_with_stores_outside_loop(<4 x i8> %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr addrspace(3) %out1, i32 %flag) { +; GFX7-LABEL: define protected amdgpu_kernel void @phi_4_with_stores_outside_loop( +; GFX7-SAME: <4 x i8> [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr addrspace(3) [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX7-NEXT: [[ENTRY:.*]]: +; GFX7-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 3 +; GFX7-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 2 +; GFX7-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 1 +; GFX7-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 0 +; GFX7-NEXT: br label %[[DO_BODY:.*]] +; GFX7: [[DO_BODY]]: +; GFX7-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ [[INPTR0]], %[[ENTRY]] ], [ [[INPTR0]], %[[DO_BODY]] ] +; GFX7-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR1]], i32 8 +; GFX7-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(3) [[GEP4]], align 2 +; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX7: [[EXIT]]: +; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 0 +; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 1 +; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 2 +; GFX7-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 3 +; GFX7-NEXT: store i8 [[TMP3]], ptr addrspace(3) [[GEP0]], align 1 +; GFX7-NEXT: store i8 [[TMP2]], ptr addrspace(3) [[GEP1]], align 1 +; GFX7-NEXT: store i8 [[TMP1]], ptr addrspace(3) [[GEP2]], align 1 +; GFX7-NEXT: store i8 [[TMP4]], ptr addrspace(3) [[GEP3]], align 1 +; GFX7-NEXT: ret void +; +; GFX8-LABEL: define protected amdgpu_kernel void @phi_4_with_stores_outside_loop( +; GFX8-SAME: <4 x i8> [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr addrspace(3) [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX8-NEXT: [[ENTRY:.*]]: +; GFX8-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 3 +; GFX8-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 2 +; GFX8-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 1 +; GFX8-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 0 +; GFX8-NEXT: br label %[[DO_BODY:.*]] +; GFX8: [[DO_BODY]]: +; GFX8-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ [[INPTR0]], %[[ENTRY]] ], [ [[INPTR0]], %[[DO_BODY]] ] +; GFX8-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR1]], i32 8 +; GFX8-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(3) [[GEP4]], align 2 +; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX8: [[EXIT]]: +; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 0 +; GFX8-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 1 +; GFX8-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 2 +; GFX8-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 3 +; GFX8-NEXT: store i8 [[TMP3]], ptr addrspace(3) [[GEP0]], align 1 +; GFX8-NEXT: store i8 [[TMP2]], ptr addrspace(3) [[GEP1]], align 1 +; GFX8-NEXT: store i8 [[TMP1]], ptr addrspace(3) [[GEP2]], align 1 +; GFX8-NEXT: store i8 [[TMP4]], ptr addrspace(3) [[GEP3]], align 1 +; GFX8-NEXT: ret void +; +; GFX9-LABEL: define protected amdgpu_kernel void @phi_4_with_stores_outside_loop( +; GFX9-SAME: <4 x i8> [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr addrspace(3) [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] { +; GFX9-NEXT: [[ENTRY:.*]]: +; GFX9-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 3 +; GFX9-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 2 +; GFX9-NEXT: [[TMP2:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 1 +; GFX9-NEXT: [[TMP3:%.*]] = extractelement <4 x i8> [[INPTR0]], i32 0 +; GFX9-NEXT: br label %[[DO_BODY:.*]] +; GFX9: [[DO_BODY]]: +; GFX9-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ [[INPTR0]], %[[ENTRY]] ], [ [[INPTR0]], %[[DO_BODY]] ] +; GFX9-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR1]], i32 8 +; GFX9-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(3) [[GEP4]], align 2 +; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GFX9: [[EXIT]]: +; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 0 +; GFX9-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 1 +; GFX9-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 2 +; GFX9-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 3 +; GFX9-NEXT: store i8 [[TMP3]], ptr addrspace(3) [[GEP0]], align 1 +; GFX9-NEXT: store i8 [[TMP2]], ptr addrspace(3) [[GEP1]], align 1 +; GFX9-NEXT: store i8 [[TMP1]], ptr addrspace(3) [[GEP2]], align 1 +; GFX9-NEXT: store i8 [[TMP4]], ptr addrspace(3) [[GEP3]], align 1 +; GFX9-NEXT: ret void +; +entry: + %ele0 = extractelement <4 x i8> %inptr0, i32 0 + %ele1 = extractelement <4 x i8> %inptr0, i32 1 + %ele2 = extractelement <4 x i8> %inptr0, i32 2 + %ele3 = extractelement <4 x i8> %inptr0, i32 3 + br label %do.body + +do.body: + %phi0 = phi i8 [ %ele3, %entry ], [ %otherele3, %do.body ] + %phi1 = phi i8 [ %ele2, %entry ], [ %otherele2, %do.body ] + %phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ] + %phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ] + %otherele0 = extractelement <4 x i8> %inptr0, i32 0 + %otherele1 = extractelement <4 x i8> %inptr0, i32 1 + %otherele2 = extractelement <4 x i8> %inptr0, i32 2 + %otherele3 = extractelement <4 x i8> %inptr0, i32 3 + %gep4 = getelementptr i8, ptr addrspace(3) %inptr1, i32 8 + store i8 %phi3, ptr addrspace(3) %gep4, align 2 + %gep5 = getelementptr i8, ptr addrspace(3) %inptr1, i32 9 + store i8 %phi2, ptr addrspace(3) %gep5, align 2 + %gep6 = getelementptr i8, ptr addrspace(3) %inptr1, i32 10 + store i8 %phi1, ptr addrspace(3) %gep6, align 2 + %gep7 = getelementptr i8, ptr addrspace(3) %inptr1, i32 11 + store i8 %phi0, ptr addrspace(3) %gep7, align 2 + %cmp = icmp eq i32 %flag, 0 + br i1 %cmp, label %exit, label %do.body + +exit: + %gep0 = getelementptr i8, ptr addrspace(3) %out1, i32 0 + %gep1 = getelementptr i8, ptr addrspace(3) %out1, i32 1 + %gep2 = getelementptr i8, ptr addrspace(3) %out1, i32 2 + %gep3 = getelementptr i8, ptr addrspace(3) %out1, i32 3 + store i8 %otherele0, ptr addrspace(3) %gep0 + store i8 %otherele1, ptr addrspace(3) %gep1 + store i8 %otherele2, ptr addrspace(3) %gep2 + store i8 %otherele3, ptr addrspace(3) %gep3 + ret void +}