|
| 1 | +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
| 2 | +// RUN: -mcode-object-version=4 -DUSER -x hip -o %t_4.bc %s |
| 3 | + |
| 4 | +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
| 5 | +// RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s |
| 6 | + |
| 7 | +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
| 8 | +// RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s |
| 9 | + |
| 10 | +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ |
| 11 | +// RUN: %t_4.bc -mlink-builtin-bitcode %t_0.bc -o - |\ |
| 12 | +// RUN: FileCheck -check-prefix=LINKED4 %s |
| 13 | + |
| 14 | +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ |
| 15 | +// RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\ |
| 16 | +// RUN: FileCheck -check-prefix=LINKED5 %s |
| 17 | + |
| 18 | +#include "Inputs/cuda.h" |
| 19 | + |
| 20 | +// LINKED4: @llvm.amdgcn.abi.version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400 |
| 21 | +// LINKED4-LABEL: bar |
| 22 | +// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}} |
| 23 | +// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 |
| 24 | +// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 25 | +// LINKED4: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 |
| 26 | +// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 27 | +// LINKED4: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 |
| 28 | +// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] |
| 29 | +// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 30 | + |
| 31 | +// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}} |
| 32 | +// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 |
| 33 | +// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 34 | +// LINKED4: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 |
| 35 | +// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 36 | +// LINKED4: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 |
| 37 | +// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] |
| 38 | +// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 39 | + |
| 40 | +// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}} |
| 41 | +// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 |
| 42 | +// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 43 | +// LINKED4: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 |
| 44 | +// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 45 | +// LINKED4: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 |
| 46 | +// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] |
| 47 | +// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 48 | +// LINKED4: "amdgpu_code_object_version", i32 400 |
| 49 | + |
| 50 | +// LINKED5: llvm.amdgcn.abi.version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 |
| 51 | +// LINKED5-LABEL: bar |
| 52 | +// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}} |
| 53 | +// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 |
| 54 | +// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 55 | +// LINKED5: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 |
| 56 | +// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 57 | +// LINKED5: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 |
| 58 | +// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] |
| 59 | +// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 60 | + |
| 61 | +// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}} |
| 62 | +// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 |
| 63 | +// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 64 | +// LINKED5: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 |
| 65 | +// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 66 | +// LINKED5: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 |
| 67 | +// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] |
| 68 | +// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 69 | + |
| 70 | +// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @llvm.amdgcn.abi.version to ptr), align {{.*}} |
| 71 | +// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 |
| 72 | +// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 73 | +// LINKED5: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 |
| 74 | +// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 75 | +// LINKED5: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 |
| 76 | +// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] |
| 77 | +// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 78 | +// LINKED5: "amdgpu_code_object_version", i32 500 |
| 79 | + |
| 80 | +#ifdef DEVICELIB |
| 81 | +__device__ void bar(int *x, int *y, int *z) |
| 82 | +{ |
| 83 | + *x = __builtin_amdgcn_workgroup_size_x(); |
| 84 | + *y = __builtin_amdgcn_workgroup_size_y(); |
| 85 | + *z = __builtin_amdgcn_workgroup_size_z(); |
| 86 | +} |
| 87 | +#endif |
| 88 | + |
| 89 | +#ifdef USER |
| 90 | +__device__ void bar(int *x, int *y, int *z); |
| 91 | +__device__ void foo() |
| 92 | +{ |
| 93 | + int *x, *y, *z; |
| 94 | + bar(x, y, z); |
| 95 | +} |
| 96 | +#endif |
0 commit comments