@@ -1299,3 +1299,114 @@ loop:
12991299exit:
13001300 ret i64 %iv.1.next
13011301}
1302+
1303+ define i32 @cast_incremented_iv_live_out (ptr %arr , i32 %n ) {
1304+ ; VEC-LABEL: define i32 @cast_incremented_iv_live_out(
1305+ ; VEC-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
1306+ ; VEC-NEXT: [[ENTRY:.*]]:
1307+ ; VEC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
1308+ ; VEC-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
1309+ ; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 2
1310+ ; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1311+ ; VEC: [[VECTOR_PH]]:
1312+ ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 2
1313+ ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
1314+ ; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
1315+ ; VEC: [[VECTOR_BODY]]:
1316+ ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1317+ ; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
1318+ ; VEC-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[ARR]], i64 [[INDEX]]
1319+ ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1
1320+ ; VEC-NEXT: [[TMP2:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1)
1321+ ; VEC-NEXT: store <2 x i8> [[TMP2]], ptr [[TMP1]], align 1
1322+ ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1323+ ; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
1324+ ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1325+ ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
1326+ ; VEC: [[MIDDLE_BLOCK]]:
1327+ ; VEC-NEXT: [[TMP4:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1)
1328+ ; VEC-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i32>
1329+ ; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i64 1
1330+ ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]]
1331+ ; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1332+ ; VEC: [[SCALAR_PH]]:
1333+ ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1334+ ; VEC-NEXT: br label %[[LOOP:.*]]
1335+ ; VEC: [[LOOP]]:
1336+ ; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1337+ ; VEC-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[ARR]], i64 [[IV]]
1338+ ; VEC-NEXT: [[VAL:%.*]] = load i8, ptr [[GEP]], align 1
1339+ ; VEC-NEXT: [[VAL_INC:%.*]] = add i8 [[VAL]], 1
1340+ ; VEC-NEXT: store i8 [[VAL_INC]], ptr [[GEP]], align 1
1341+ ; VEC-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
1342+ ; VEC-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV_NEXT]] to i32
1343+ ; VEC-NEXT: [[COND:%.*]] = icmp ult i32 [[IV_TRUNC]], [[N]]
1344+ ; VEC-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}}
1345+ ; VEC: [[EXIT]]:
1346+ ; VEC-NEXT: [[IV_TRUNC_LCSSA:%.*]] = phi i32 [ [[IV_TRUNC]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
1347+ ; VEC-NEXT: ret i32 [[IV_TRUNC_LCSSA]]
1348+ ;
1349+ ; INTERLEAVE-LABEL: define i32 @cast_incremented_iv_live_out(
1350+ ; INTERLEAVE-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
1351+ ; INTERLEAVE-NEXT: [[ENTRY:.*]]:
1352+ ; INTERLEAVE-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
1353+ ; INTERLEAVE-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
1354+ ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 2
1355+ ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1356+ ; INTERLEAVE: [[VECTOR_PH]]:
1357+ ; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 2
1358+ ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
1359+ ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
1360+ ; INTERLEAVE: [[VECTOR_BODY]]:
1361+ ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1362+ ; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
1363+ ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[ARR]], i64 [[INDEX]]
1364+ ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[ARR]], i64 [[TMP1]]
1365+ ; INTERLEAVE-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1
1366+ ; INTERLEAVE-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
1367+ ; INTERLEAVE-NEXT: [[TMP6:%.*]] = add i8 [[TMP4]], 1
1368+ ; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i8 [[TMP5]], 1
1369+ ; INTERLEAVE-NEXT: store i8 [[TMP6]], ptr [[TMP2]], align 1
1370+ ; INTERLEAVE-NEXT: store i8 [[TMP7]], ptr [[TMP3]], align 1
1371+ ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1372+ ; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1373+ ; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
1374+ ; INTERLEAVE: [[MIDDLE_BLOCK]]:
1375+ ; INTERLEAVE-NEXT: [[TMP9:%.*]] = add i64 [[TMP1]], 1
1376+ ; INTERLEAVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
1377+ ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]]
1378+ ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1379+ ; INTERLEAVE: [[SCALAR_PH]]:
1380+ ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1381+ ; INTERLEAVE-NEXT: br label %[[LOOP:.*]]
1382+ ; INTERLEAVE: [[LOOP]]:
1383+ ; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1384+ ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[ARR]], i64 [[IV]]
1385+ ; INTERLEAVE-NEXT: [[VAL:%.*]] = load i8, ptr [[GEP]], align 1
1386+ ; INTERLEAVE-NEXT: [[VAL_INC:%.*]] = add i8 [[VAL]], 1
1387+ ; INTERLEAVE-NEXT: store i8 [[VAL_INC]], ptr [[GEP]], align 1
1388+ ; INTERLEAVE-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
1389+ ; INTERLEAVE-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV_NEXT]] to i32
1390+ ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp ult i32 [[IV_TRUNC]], [[N]]
1391+ ; INTERLEAVE-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}}
1392+ ; INTERLEAVE: [[EXIT]]:
1393+ ; INTERLEAVE-NEXT: [[IV_TRUNC_LCSSA:%.*]] = phi i32 [ [[IV_TRUNC]], %[[LOOP]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
1394+ ; INTERLEAVE-NEXT: ret i32 [[IV_TRUNC_LCSSA]]
1395+ ;
1396+ entry:
1397+ br label %loop
1398+
1399+ loop:
1400+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
1401+ %gep = getelementptr i8 , ptr %arr , i64 %iv
1402+ %val = load i8 , ptr %gep , align 1
1403+ %val.inc = add i8 %val , 1
1404+ store i8 %val.inc , ptr %gep , align 1
1405+ %iv.next = add i64 %iv , 1
1406+ %iv.trunc = trunc i64 %iv.next to i32
1407+ %cond = icmp ult i32 %iv.trunc , %n
1408+ br i1 %cond , label %loop , label %exit
1409+
1410+ exit:
1411+ ret i32 %iv.trunc
1412+ }
0 commit comments