; ModuleID = '/tmp/single_func.ll'
source_filename = "/home/lambj/git/repro-rocprim-7.13/projects/rocprim/benchmark/benchmark_device_adjacent_difference.cpp"
target datalayout = "e-m:e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-amd-amdhsa"
; Function Attrs: convergent nocallback nofree nounwind willreturn
declare void @llvm.amdgcn.s.barrier() #0
; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn
define amdgpu_kernel void @_ZN7rocprim17ROCPRIM_400400_NS6detail17trampoline_kernelINS0_14default_configENS1_35adjacent_difference_config_selectorILb1EsEEZNS1_24adjacent_difference_implIS3_Lb1ELb1EPsS7_NS0_4plusIvEEEE10hipError_tPvRmT2_T3_mT4_P12ihipStream_tbEUlT_E_NS1_11comp_targetILNS1_3genE0ELNS1_11target_archE4294967295ELNS1_3gpuE0ELNS1_3repE0EEENS1_30default_config_static_selectorELNS0_4arch9wavefront6targetE1EEEvT1_() #1 {
%1 = addrspacecast ptr null to ptr addrspace(1)
%2 = load i64, ptr addrspace(4) null, align 8
%3 = load ptr, ptr addrspace(4) null, align 8
%4 = addrspacecast ptr %3 to ptr addrspace(1)
%5 = load i64, ptr addrspace(4) null, align 8
%6 = load ptr, ptr addrspace(4) null, align 8
%7 = addrspacecast ptr %6 to ptr addrspace(1)
%8 = load i64, ptr addrspace(4) null, align 8
%9 = getelementptr inbounds [2 x i8], ptr addrspace(1) %1, i64 %2
%10 = getelementptr inbounds [2 x i8], ptr addrspace(1) %4, i64 %2
%11 = getelementptr inbounds [2 x i8], ptr addrspace(1) %7, i64 %8
%12 = tail call noundef i32 @llvm.amdgcn.workgroup.id.x()
%13 = mul i32 %12, 736
%14 = freeze i64 %5
%15 = udiv i64 %14, 736
%16 = mul i64 %15, 736
%17 = sub i64 %14, %16
%18 = icmp ne i64 %17, 0
%19 = zext i1 %18 to i64
%20 = zext i32 %12 to i64
%21 = add i64 %8, %20
%22 = add nsw i64 %15, -1
%23 = add nsw i64 %22, %19
%24 = icmp ult i64 %21, %23
%25 = zext i32 %13 to i64
%26 = getelementptr inbounds nuw [2 x i8], ptr addrspace(1) %9, i64 %25
%27 = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
%28 = zext nneg i32 %27 to i64
%29 = getelementptr inbounds nuw [2 x i8], ptr addrspace(1) %26, i64 %28
%30 = load i16, ptr addrspace(1) %29, align 2
%31 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 64
%32 = load i16, ptr addrspace(1) %31, align 2
%33 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 128
%34 = load i16, ptr addrspace(1) %33, align 2
%35 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 192
%36 = load i16, ptr addrspace(1) %35, align 2
%37 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 256
%38 = load i16, ptr addrspace(1) %37, align 2
%39 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 320
%40 = load i16, ptr addrspace(1) %39, align 2
%41 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 384
%42 = load i16, ptr addrspace(1) %41, align 2
%43 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 448
%44 = load i16, ptr addrspace(1) %43, align 2
%45 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 512
%46 = load i16, ptr addrspace(1) %45, align 2
%47 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 576
%48 = load i16, ptr addrspace(1) %47, align 2
%49 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 640
%50 = load i16, ptr addrspace(1) %49, align 2
%51 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 704
%52 = load i16, ptr addrspace(1) %51, align 2
%53 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 768
%54 = load i16, ptr addrspace(1) %53, align 2
%55 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 832
%56 = load i16, ptr addrspace(1) %55, align 2
%57 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 896
%58 = load i16, ptr addrspace(1) %57, align 2
%59 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 960
%60 = load i16, ptr addrspace(1) %59, align 2
%61 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 1024
%62 = load i16, ptr addrspace(1) %61, align 2
%63 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 1088
%64 = load i16, ptr addrspace(1) %63, align 2
%65 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 1152
%66 = load i16, ptr addrspace(1) %65, align 2
%67 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 1216
%68 = load i16, ptr addrspace(1) %67, align 2
%69 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 1280
%70 = load i16, ptr addrspace(1) %69, align 2
%71 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 1344
%72 = load i16, ptr addrspace(1) %71, align 2
%73 = getelementptr inbounds nuw i8, ptr addrspace(1) %29, i64 1408
%74 = load i16, ptr addrspace(1) %73, align 2
%75 = getelementptr inbounds nuw [2 x i8], ptr addrspace(3) null, i32 %27
store i16 %30, ptr addrspace(3) %75, align 2
%76 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 64
store i16 %32, ptr addrspace(3) %76, align 2
%77 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 128
store i16 %34, ptr addrspace(3) %77, align 2
%78 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 192
store i16 %36, ptr addrspace(3) %78, align 2
%79 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 256
store i16 %38, ptr addrspace(3) %79, align 2
%80 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 320
store i16 %40, ptr addrspace(3) %80, align 2
%81 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 384
store i16 %42, ptr addrspace(3) %81, align 2
%82 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 448
store i16 %44, ptr addrspace(3) %82, align 2
%83 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 512
store i16 %46, ptr addrspace(3) %83, align 2
%84 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 576
store i16 %48, ptr addrspace(3) %84, align 2
%85 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 640
store i16 %50, ptr addrspace(3) %85, align 2
%86 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 704
store i16 %52, ptr addrspace(3) %86, align 2
%87 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 768
store i16 %54, ptr addrspace(3) %87, align 2
%88 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 832
store i16 %56, ptr addrspace(3) %88, align 2
%89 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 896
store i16 %58, ptr addrspace(3) %89, align 2
%90 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 960
store i16 %60, ptr addrspace(3) %90, align 2
%91 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 1024
store i16 %62, ptr addrspace(3) %91, align 2
%92 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 1088
store i16 %64, ptr addrspace(3) %92, align 2
%93 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 1152
store i16 %66, ptr addrspace(3) %93, align 2
%94 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 1216
store i16 %68, ptr addrspace(3) %94, align 2
%95 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 1280
store i16 %70, ptr addrspace(3) %95, align 2
%96 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 1344
store i16 %72, ptr addrspace(3) %96, align 2
%97 = getelementptr inbounds nuw i8, ptr addrspace(3) %75, i32 1408
store i16 %74, ptr addrspace(3) %97, align 2
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%98 = mul nuw nsw i32 %27, 46
%99 = getelementptr inbounds nuw i8, ptr addrspace(3) null, i32 %98
%100 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 2
%101 = load <2 x i16>, ptr addrspace(3) %99, align 2
%102 = load <2 x i16>, ptr addrspace(3) %100, align 2
%103 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 6
%104 = load <2 x i16>, ptr addrspace(3) %103, align 2
%105 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 10
%106 = load <2 x i16>, ptr addrspace(3) %105, align 2
%107 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 14
%108 = load <2 x i16>, ptr addrspace(3) %107, align 2
%109 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 18
%110 = load <2 x i16>, ptr addrspace(3) %109, align 2
%111 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 22
%112 = load <2 x i16>, ptr addrspace(3) %111, align 2
%113 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 26
%114 = load <2 x i16>, ptr addrspace(3) %113, align 2
%115 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 30
%116 = load <2 x i16>, ptr addrspace(3) %115, align 2
%117 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 34
%118 = load <2 x i16>, ptr addrspace(3) %117, align 2
%119 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 38
%120 = load <2 x i16>, ptr addrspace(3) %119, align 2
%121 = getelementptr inbounds nuw i8, ptr addrspace(3) %99, i32 42
%122 = load <2 x i16>, ptr addrspace(3) %121, align 2
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%123 = icmp eq i64 %21, %23
%124 = shufflevector <2 x i16> %101, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%125 = mul i64 %21, 4294966560
%126 = add i64 %125, %5
%127 = trunc i64 %126 to i32
%128 = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
%129 = getelementptr inbounds nuw [2 x i8], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 64), i32 %128
%130 = extractelement <2 x i16> %101, i64 0
store i16 %130, ptr addrspace(3) %129, align 2
%131 = mul nuw nsw i32 %128, 23
%132 = add nuw nsw i32 %131, 1
%133 = shufflevector <23 x i16> <i16 0, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>, <23 x i16> %124, <23 x i32> <i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%134 = icmp ult i32 %132, %127
%135 = add <2 x i16> %102, %101
%136 = shufflevector <2 x i16> %135, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%137 = select i1 %134, <23 x i16> %136, <23 x i16> %133
%138 = shufflevector <23 x i16> %137, <23 x i16> %133, <23 x i32> <i32 0, i32 poison, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45>
%139 = add nuw nsw i32 %131, 2
%140 = shufflevector <2 x i16> %102, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%141 = shufflevector <23 x i16> %138, <23 x i16> %140, <23 x i32> <i32 0, i32 23, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%142 = icmp ult i32 %139, %127
%143 = shufflevector <2 x i16> %102, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%144 = add <2 x i16> %143, %102
%145 = shufflevector <2 x i16> %144, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%146 = shufflevector <23 x i16> %141, <23 x i16> %145, <23 x i32> <i32 0, i32 23, i32 poison, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%147 = select i1 %142, <23 x i16> %146, <23 x i16> %141
%148 = add nuw nsw i32 %131, 3
%149 = shufflevector <23 x i16> %147, <23 x i16> %140, <23 x i32> <i32 0, i32 1, i32 24, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%150 = icmp ult i32 %148, %127
%151 = shufflevector <2 x i16> %102, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%152 = add <2 x i16> %104, %151
%153 = shufflevector <2 x i16> %152, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%154 = shufflevector <23 x i16> %149, <23 x i16> %153, <23 x i32> <i32 0, i32 1, i32 23, i32 poison, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%155 = select i1 %150, <23 x i16> %154, <23 x i16> %149
%156 = add nuw nsw i32 %131, 4
%157 = shufflevector <2 x i16> %104, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%158 = shufflevector <23 x i16> %155, <23 x i16> %157, <23 x i32> <i32 0, i32 1, i32 2, i32 23, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%159 = icmp ult i32 %156, %127
%160 = shufflevector <2 x i16> %104, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%161 = add <2 x i16> %160, %104
%162 = shufflevector <2 x i16> %161, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%163 = shufflevector <23 x i16> %158, <23 x i16> %162, <23 x i32> <i32 0, i32 1, i32 2, i32 23, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%164 = select i1 %159, <23 x i16> %163, <23 x i16> %158
%165 = add nuw nsw i32 %131, 5
%166 = shufflevector <23 x i16> %164, <23 x i16> %157, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 24, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%167 = icmp ult i32 %165, %127
%168 = shufflevector <2 x i16> %104, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%169 = add <2 x i16> %106, %168
%170 = shufflevector <2 x i16> %169, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%171 = shufflevector <23 x i16> %166, <23 x i16> %170, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 23, i32 poison, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%172 = select i1 %167, <23 x i16> %171, <23 x i16> %166
%173 = add nuw nsw i32 %131, 6
%174 = shufflevector <2 x i16> %106, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%175 = shufflevector <23 x i16> %172, <23 x i16> %174, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 23, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%176 = icmp ult i32 %173, %127
%177 = shufflevector <2 x i16> %106, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%178 = add <2 x i16> %177, %106
%179 = shufflevector <2 x i16> %178, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%180 = shufflevector <23 x i16> %175, <23 x i16> %179, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 23, i32 poison, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%181 = select i1 %176, <23 x i16> %180, <23 x i16> %175
%182 = add nuw nsw i32 %131, 7
%183 = shufflevector <23 x i16> %181, <23 x i16> %174, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 24, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%184 = icmp ult i32 %182, %127
%185 = shufflevector <2 x i16> %106, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%186 = add <2 x i16> %108, %185
%187 = shufflevector <2 x i16> %186, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%188 = shufflevector <23 x i16> %183, <23 x i16> %187, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 23, i32 poison, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%189 = select i1 %184, <23 x i16> %188, <23 x i16> %183
%190 = add nuw nsw i32 %131, 8
%191 = shufflevector <2 x i16> %108, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%192 = shufflevector <23 x i16> %189, <23 x i16> %191, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%193 = icmp ult i32 %190, %127
%194 = shufflevector <2 x i16> %108, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%195 = add <2 x i16> %194, %108
%196 = shufflevector <2 x i16> %195, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%197 = shufflevector <23 x i16> %192, <23 x i16> %196, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 poison, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%198 = select i1 %193, <23 x i16> %197, <23 x i16> %192
%199 = add nuw nsw i32 %131, 9
%200 = shufflevector <23 x i16> %198, <23 x i16> %191, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%201 = icmp ult i32 %199, %127
%202 = shufflevector <2 x i16> %108, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%203 = add <2 x i16> %110, %202
%204 = shufflevector <2 x i16> %203, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%205 = shufflevector <23 x i16> %200, <23 x i16> %204, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 23, i32 poison, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%206 = select i1 %201, <23 x i16> %205, <23 x i16> %200
%207 = add nuw nsw i32 %131, 10
%208 = shufflevector <2 x i16> %110, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%209 = shufflevector <23 x i16> %206, <23 x i16> %208, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 23, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%210 = icmp ult i32 %207, %127
%211 = shufflevector <2 x i16> %110, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%212 = add <2 x i16> %211, %110
%213 = shufflevector <2 x i16> %212, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%214 = shufflevector <23 x i16> %209, <23 x i16> %213, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 23, i32 poison, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%215 = select i1 %210, <23 x i16> %214, <23 x i16> %209
%216 = add nuw nsw i32 %131, 11
%217 = shufflevector <23 x i16> %215, <23 x i16> %208, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 24, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%218 = icmp ult i32 %216, %127
%219 = shufflevector <2 x i16> %110, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%220 = add <2 x i16> %112, %219
%221 = shufflevector <2 x i16> %220, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%222 = shufflevector <23 x i16> %217, <23 x i16> %221, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 23, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%223 = select i1 %218, <23 x i16> %222, <23 x i16> %217
%224 = add nuw nsw i32 %131, 12
%225 = shufflevector <2 x i16> %112, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%226 = shufflevector <23 x i16> %223, <23 x i16> %225, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 23, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%227 = icmp ult i32 %224, %127
%228 = shufflevector <2 x i16> %112, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%229 = add <2 x i16> %228, %112
%230 = shufflevector <2 x i16> %229, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%231 = shufflevector <23 x i16> %226, <23 x i16> %230, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 23, i32 poison, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%232 = select i1 %227, <23 x i16> %231, <23 x i16> %226
%233 = add nuw nsw i32 %131, 13
%234 = shufflevector <23 x i16> %232, <23 x i16> %225, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 24, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%235 = icmp ult i32 %233, %127
%236 = shufflevector <2 x i16> %112, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%237 = add <2 x i16> %114, %236
%238 = shufflevector <2 x i16> %237, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%239 = shufflevector <23 x i16> %234, <23 x i16> %238, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 23, i32 poison, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%240 = select i1 %235, <23 x i16> %239, <23 x i16> %234
%241 = add nuw nsw i32 %131, 14
%242 = shufflevector <2 x i16> %114, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%243 = shufflevector <23 x i16> %240, <23 x i16> %242, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 23, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%244 = icmp ult i32 %241, %127
%245 = shufflevector <2 x i16> %114, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%246 = add <2 x i16> %245, %114
%247 = shufflevector <2 x i16> %246, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%248 = shufflevector <23 x i16> %243, <23 x i16> %247, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 23, i32 poison, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%249 = select i1 %244, <23 x i16> %248, <23 x i16> %243
%250 = add nuw nsw i32 %131, 15
%251 = shufflevector <23 x i16> %249, <23 x i16> %242, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 24, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%252 = icmp ult i32 %250, %127
%253 = shufflevector <2 x i16> %114, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%254 = add <2 x i16> %116, %253
%255 = shufflevector <2 x i16> %254, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%256 = shufflevector <23 x i16> %251, <23 x i16> %255, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 23, i32 poison, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%257 = select i1 %252, <23 x i16> %256, <23 x i16> %251
%258 = add nuw nsw i32 %131, 16
%259 = shufflevector <2 x i16> %116, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%260 = shufflevector <23 x i16> %257, <23 x i16> %259, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%261 = icmp ult i32 %258, %127
%262 = shufflevector <2 x i16> %116, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%263 = add <2 x i16> %262, %116
%264 = shufflevector <2 x i16> %263, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%265 = shufflevector <23 x i16> %260, <23 x i16> %264, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 23, i32 poison, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%266 = select i1 %261, <23 x i16> %265, <23 x i16> %260
%267 = add nuw nsw i32 %131, 17
%268 = shufflevector <23 x i16> %266, <23 x i16> %259, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
%269 = icmp ult i32 %267, %127
%270 = shufflevector <2 x i16> %116, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%271 = add <2 x i16> %118, %270
%272 = shufflevector <2 x i16> %271, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%273 = shufflevector <23 x i16> %268, <23 x i16> %272, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 23, i32 poison, i32 18, i32 19, i32 20, i32 21, i32 22>
%274 = select i1 %269, <23 x i16> %273, <23 x i16> %268
%275 = add nuw nsw i32 %131, 18
%276 = shufflevector <2 x i16> %118, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%277 = shufflevector <23 x i16> %274, <23 x i16> %276, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 23, i32 18, i32 19, i32 20, i32 21, i32 22>
%278 = icmp ult i32 %275, %127
%279 = shufflevector <2 x i16> %118, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%280 = add <2 x i16> %279, %118
%281 = shufflevector <2 x i16> %280, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%282 = shufflevector <23 x i16> %277, <23 x i16> %281, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 23, i32 poison, i32 19, i32 20, i32 21, i32 22>
%283 = select i1 %278, <23 x i16> %282, <23 x i16> %277
%284 = add nuw nsw i32 %131, 19
%285 = shufflevector <23 x i16> %283, <23 x i16> %276, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 24, i32 19, i32 20, i32 21, i32 22>
%286 = icmp ult i32 %284, %127
%287 = shufflevector <2 x i16> %118, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%288 = add <2 x i16> %120, %287
%289 = shufflevector <2 x i16> %288, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%290 = shufflevector <23 x i16> %285, <23 x i16> %289, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 23, i32 poison, i32 20, i32 21, i32 22>
%291 = select i1 %286, <23 x i16> %290, <23 x i16> %285
%292 = add nuw nsw i32 %131, 20
%293 = shufflevector <2 x i16> %120, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%294 = shufflevector <23 x i16> %291, <23 x i16> %293, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 23, i32 20, i32 21, i32 22>
%295 = icmp ult i32 %292, %127
%296 = shufflevector <2 x i16> %120, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%297 = add <2 x i16> %296, %120
%298 = shufflevector <2 x i16> %297, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%299 = shufflevector <23 x i16> %294, <23 x i16> %298, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 23, i32 poison, i32 21, i32 22>
%300 = select i1 %295, <23 x i16> %299, <23 x i16> %294
%301 = add nuw nsw i32 %131, 21
%302 = shufflevector <23 x i16> %300, <23 x i16> %293, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 21, i32 22>
%303 = icmp ult i32 %301, %127
%304 = shufflevector <2 x i16> %120, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%305 = add <2 x i16> %122, %304
%306 = shufflevector <2 x i16> %305, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%307 = shufflevector <23 x i16> %302, <23 x i16> %306, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 23, i32 poison, i32 22>
%308 = select i1 %303, <23 x i16> %307, <23 x i16> %302
%309 = add nuw nsw i32 %131, 22
%310 = shufflevector <2 x i16> %122, <2 x i16> poison, <23 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%311 = shufflevector <23 x i16> %308, <23 x i16> %310, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22>
%312 = icmp ult i32 %309, %127
%313 = shufflevector <2 x i16> %122, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%314 = add <2 x i16> %313, %122
%315 = shufflevector <2 x i16> %314, <2 x i16> poison, <23 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%316 = shufflevector <23 x i16> %311, <23 x i16> %315, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 poison>
%317 = select i1 %312, <23 x i16> %316, <23 x i16> %311
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%318 = shufflevector <23 x i16> %317, <23 x i16> %310, <23 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 24>
%319 = add nuw nsw i32 %131, 23
%320 = icmp ult i32 %319, %127
%321 = extractelement <2 x i16> %122, i64 1
%322 = getelementptr inbounds nuw i8, ptr addrspace(3) %129, i32 2
%323 = load i16, ptr addrspace(3) %322, align 2
%324 = add i16 %323, %321
%325 = insertelement <23 x i16> %318, i16 %324, i64 22
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%326 = zext i32 %13 to i64
%327 = getelementptr inbounds nuw [2 x i8], ptr addrspace(1) %10, i64 %326
%328 = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
%329 = mul nuw nsw i32 %328, 46
%330 = getelementptr inbounds nuw i8, ptr addrspace(3) null, i32 %329
%331 = shufflevector <23 x i16> %325, <23 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i16> %331, ptr addrspace(3) %330, align 2
%332 = getelementptr inbounds nuw i8, ptr addrspace(3) %330, i32 8
%333 = shufflevector <23 x i16> %325, <23 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
store <4 x i16> %333, ptr addrspace(3) %332, align 2
%334 = getelementptr inbounds nuw i8, ptr addrspace(3) %330, i32 16
%335 = shufflevector <23 x i16> %325, <23 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
store <4 x i16> %335, ptr addrspace(3) %334, align 2
%336 = getelementptr inbounds nuw i8, ptr addrspace(3) %330, i32 24
%337 = shufflevector <23 x i16> %325, <23 x i16> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
store <4 x i16> %337, ptr addrspace(3) %336, align 2
%338 = getelementptr inbounds nuw i8, ptr addrspace(3) %330, i32 32
%339 = shufflevector <23 x i16> %325, <23 x i16> poison, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
store <4 x i16> %339, ptr addrspace(3) %338, align 2
%340 = getelementptr inbounds nuw i8, ptr addrspace(3) %330, i32 40
%341 = shufflevector <23 x i16> %325, <23 x i16> poison, <2 x i32> <i32 20, i32 21>
store <2 x i16> %341, ptr addrspace(3) %340, align 2
%342 = extractelement <23 x i16> %325, i64 22
%343 = getelementptr inbounds nuw i8, ptr addrspace(3) %330, i32 44
store i16 %342, ptr addrspace(3) %343, align 2
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%344 = zext nneg i32 %328 to i64
%345 = getelementptr inbounds nuw [2 x i8], ptr addrspace(3) null, i32 %328
%346 = load i16, ptr addrspace(3) %345, align 2
%347 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 64
%348 = load i16, ptr addrspace(3) %347, align 2
%349 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 128
%350 = load i16, ptr addrspace(3) %349, align 2
%351 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 192
%352 = load i16, ptr addrspace(3) %351, align 2
%353 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 256
%354 = load i16, ptr addrspace(3) %353, align 2
%355 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 320
%356 = load i16, ptr addrspace(3) %355, align 2
%357 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 384
%358 = load i16, ptr addrspace(3) %357, align 2
%359 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 448
%360 = load i16, ptr addrspace(3) %359, align 2
%361 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 512
%362 = load i16, ptr addrspace(3) %361, align 2
%363 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 576
%364 = load i16, ptr addrspace(3) %363, align 2
%365 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 640
%366 = load i16, ptr addrspace(3) %365, align 2
%367 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 704
%368 = load i16, ptr addrspace(3) %367, align 2
%369 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 768
%370 = load i16, ptr addrspace(3) %369, align 2
%371 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 832
%372 = load i16, ptr addrspace(3) %371, align 2
%373 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 896
%374 = load i16, ptr addrspace(3) %373, align 2
%375 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 960
%376 = load i16, ptr addrspace(3) %375, align 2
%377 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 1024
%378 = load i16, ptr addrspace(3) %377, align 2
%379 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 1088
%380 = load i16, ptr addrspace(3) %379, align 2
%381 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 1152
%382 = load i16, ptr addrspace(3) %381, align 2
%383 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 1216
%384 = load i16, ptr addrspace(3) %383, align 2
%385 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 1280
%386 = load i16, ptr addrspace(3) %385, align 2
%387 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 1344
%388 = load i16, ptr addrspace(3) %387, align 2
%389 = getelementptr inbounds nuw i8, ptr addrspace(3) %345, i32 1408
%390 = load i16, ptr addrspace(3) %389, align 2
%391 = getelementptr inbounds nuw [2 x i8], ptr addrspace(1) %327, i64 %344
store i16 %346, ptr addrspace(1) %391, align 2
%392 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 64
store i16 %348, ptr addrspace(1) %392, align 2
%393 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 128
store i16 %350, ptr addrspace(1) %393, align 2
%394 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 192
store i16 %352, ptr addrspace(1) %394, align 2
%395 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 256
store i16 %354, ptr addrspace(1) %395, align 2
%396 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 320
store i16 %356, ptr addrspace(1) %396, align 2
%397 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 384
store i16 %358, ptr addrspace(1) %397, align 2
%398 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 448
store i16 %360, ptr addrspace(1) %398, align 2
%399 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 512
store i16 %362, ptr addrspace(1) %399, align 2
%400 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 576
store i16 %364, ptr addrspace(1) %400, align 2
%401 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 640
store i16 %366, ptr addrspace(1) %401, align 2
%402 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 704
store i16 %368, ptr addrspace(1) %402, align 2
%403 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 768
store i16 %370, ptr addrspace(1) %403, align 2
%404 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 832
store i16 %372, ptr addrspace(1) %404, align 2
%405 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 896
store i16 %374, ptr addrspace(1) %405, align 2
%406 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 960
store i16 %376, ptr addrspace(1) %406, align 2
%407 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 1024
store i16 %378, ptr addrspace(1) %407, align 2
%408 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 1088
store i16 %380, ptr addrspace(1) %408, align 2
%409 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 1152
store i16 %382, ptr addrspace(1) %409, align 2
%410 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 1216
store i16 %384, ptr addrspace(1) %410, align 2
%411 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 1280
store i16 %386, ptr addrspace(1) %411, align 2
%412 = getelementptr inbounds nuw i8, ptr addrspace(1) %391, i64 1344
store i16 %388, ptr addrspace(1) %412, align 2
ret void
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.amdgcn.workgroup.id.x() #2
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() #2
; uselistorder directives
uselistorder ptr @llvm.amdgcn.workitem.id.x, { 2, 1, 0 }
attributes #0 = { convergent nocallback nofree nounwind willreturn }
attributes #1 = { convergent mustprogress nofree norecurse nounwind willreturn "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,32" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx950" "uniform-work-group-size" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
Summary
clang 23.0.0git (and standalone
llc) hangs indefinitely in AMDGPU DAG->DAG Pattern Instruction Selection when compiling a rocPRIMadjacent_difference_impltrampoline kernel forgfx950at-O3. The compiler thread sits at 100% CPU with no output. The same IR compiles in ROCm 7.2 (older LLVM 23-era amd-staging) and hangs starting in ROCm 7.13.Original downstream tracking: ROCM-24721. Reproduced cleanly on a workstation with the publicly available
rocm-sdk-devel==7.13.0a20260515wheel.Affected versions
rocm-sdk-devel==7.13.0a2026051543215c73116c407735c85a180d174f718798c3282506c552d8428e2cc1778bef048b20f818e06bb3amd-staging-derivedllc(AOMP-23.0-60, SHA8b7c0c42edfe) — so the bug is in shared/upstream AMDGPU codegen, not unique to any one downstream patch set.Known good vs known bad
release/rocm-rel-7.2c793782b03abrocm-sdk-devel==7.13.0a20260515(per-familygfx950-dcgpuindex)5b1eed6278deBisect window is roughly 5 months of upstream commits plus the AMD patches that landed on
amd-stagingbetween those merge-bases. (Not bisected to a single commit yet — see "What we tried".)Reproducer
Setup (no GPU required — compile-only)
Reproduce with
llcSave the IR below as
reduced.ll, then:llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 reduced.ll -o /tmp/out.s # Hangs forever at 100% CPU. Kill with Ctrl-C.Reproduce end-to-end with clang (matches original repro)
Diagnostic output
With
-mllvm -debug-pass=Executions, the last pass entered before the hang is:i.e. the rocPRIM
adjacent_difference_impltrampoline kernel instantiated forshort*+plus<void>atgfx950.psconfirmsclang-23is pinned at 100% CPU; the compile completes through the rest of the TU when codegen is bypassed (-emit-llvmsucceeds in ~9 s on the same TU). So the hang is purely in SDAG ISel for this one function.What we tried
5d018e93fe98 AMDGPU: Perform zero/any extend combine into permute) looked suspicious — it addsISD::ANY_EXTENDto the DAG-combine list with anAfterLegalizeTypesthreshold, createsAMDGPUISD::PERMnodes, and lands inside the affected window (Jan 28 2026). Reverting it from the source tree and rebuildingllcdid not fix the hang. So either the trigger is a different commit, or it's a multi-commit interaction.Reduction process used
clang -cc1invocation from the rocPRIM build (hipcc -###).-emit-objwith-emit-llvm→ IR dumped in 9 s (so the hang is purely codegen).llvm-extract --func=… --recursivereduced 602 functions → 1.llvm-reduce --test=<hangs.sh>shrunk the single function from 1316 → 505 lines.llcstill hangs on the final 505-line IR.Environment
x86_64Reduced IR
Save the block below as
reduced.lland run:llc -O2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 reduced.ll -o /tmp/out.s # Hangs forever at 100% CPU. Kill with Ctrl-C. (llc must be built with the suspect commit in its source.)reduced.ll (505 lines, single function, 412 instructions)
cc: AMDGPU codegen