@@ -229,42 +229,6 @@ func.func @batch_matmul_with_padding_strategy(%arg0: tensor<1x?x1280xf16>, %arg1
229229
230230// -----
231231
232- #pipeline_layout = #hal.pipeline.layout <bindings = [
233- #hal.pipeline.binding <storage_buffer >,
234- #hal.pipeline.binding <storage_buffer >
235- ]>
236- func.func @_batch_matmul_narrow_n_2_dispatch_4_unpack_i32 () attributes {translation_info = #iree_codegen.translation_info <pipeline = CPUDataTiling >} {
237- %c0_i32 = arith.constant 0 : i32
238- %c2 = arith.constant 2 : index
239- %c128 = arith.constant 128 : index
240- %c0 = arith.constant 0 : index
241- %0 = hal.interface.binding.subspan layout (#pipeline_layout ) binding (0 ) alignment (64 ) offset (%c128 ) flags (ReadOnly ) : !iree_tensor_ext.dispatch.tensor <readonly :tensor <2 x1 x1 x2 x8 xi32 >>
242- %1 = hal.interface.binding.subspan layout (#pipeline_layout ) binding (1 ) alignment (64 ) offset (%c0 ) : !iree_tensor_ext.dispatch.tensor <writeonly :tensor <2 x3 x2 xi32 >>
243- %workgroup_id_x = hal.interface.workgroup.id [0 ] : index
244- %workgroup_count_x = hal.interface.workgroup.count [0 ] : index
245- scf.for %arg0 = %workgroup_id_x to %c2 step %workgroup_count_x {
246- %2 = iree_tensor_ext.dispatch.tensor.load %1 , offsets = [%arg0 , 0 , 0 ], sizes = [1 , 3 , 2 ], strides = [1 , 1 , 1 ] : !iree_tensor_ext.dispatch.tensor <writeonly :tensor <2 x3 x2 xi32 >> -> tensor <1 x3 x2 xi32 >
247- %3 = iree_tensor_ext.dispatch.tensor.load %0 , offsets = [%arg0 , 0 , 0 , 0 , 0 ], sizes = [1 , 1 , 1 , 2 , 8 ], strides = [1 , 1 , 1 , 1 , 1 ] : !iree_tensor_ext.dispatch.tensor <readonly :tensor <2 x1 x1 x2 x8 xi32 >> -> tensor <1 x1 x1 x2 x8 xi32 >
248- %4 = vector.transfer_read %3 [%c0 , %c0 , %c0 , %c0 , %c0 ], %c0_i32 {in_bounds = [true , true ]} : tensor <1 x1 x1 x2 x8 xi32 >, vector <2 x8 xi32 >
249- %5 = vector.transpose %4 , [1 , 0 ] : vector <2 x8 xi32 > to vector <8 x2 xi32 >
250- %6 = tensor.empty () : tensor <3 x2 xi32 >
251- %7 = vector.transfer_write %5 , %6 [%c0 , %c0 ] {in_bounds = [false , true ]} : vector <8 x2 xi32 >, tensor <3 x2 xi32 >
252- %inserted_slice = tensor.insert_slice %7 into %2 [0 , 0 , 0 ] [1 , 3 , 2 ] [1 , 1 , 1 ] : tensor <3 x2 xi32 > into tensor <1 x3 x2 xi32 >
253- iree_tensor_ext.dispatch.tensor.store %inserted_slice , %1 , offsets = [%arg0 , 0 , 0 ], sizes = [1 , 3 , 2 ], strides = [1 , 1 , 1 ] : tensor <1 x3 x2 xi32 > -> !iree_tensor_ext.dispatch.tensor <writeonly :tensor <2 x3 x2 xi32 >>
254- }
255- return
256- }
257-
258- // CHECK-LABEL: func.func @_batch_matmul_narrow_n_2_dispatch_4_unpack_i32
259- // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
260- // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x2xi32>
261- // CHECK: scf.for
262- // CHECK: %[[READ:.+]] = vector.transfer_read
263- // CHECK: %[[TRANS:.+]] = vector.transpose %[[READ]], [1, 0] : vector<2x8xi32> to vector<8x2xi32>
264- // CHECK: vector.transfer_write %[[TRANS]], %[[EMPTY]][%[[C0]], %[[C0]]] {in_bounds = [false, true]} : vector<8x2xi32>, tensor<3x2xi32>
265-
266- // -----
267-
268232func.func @subset_hoisting_invariant_tensor (%init: tensor <64 x64 xf32 >, %t: tensor <64 x64 xf32 >) -> tensor <64 x64 xf32 > {
269233 %c0 = arith.constant 0 : index
270234 %c1 = arith.constant 1 : index
@@ -373,3 +337,30 @@ func.func @licm_generic(%source: tensor<32x32xf16>, %idx : index) -> tensor<32x3
373337// CHECK: linalg.generic
374338// CHECK-NOT: tensor.extract
375339// CHECK: return
340+
341+ // -----
342+
343+ // Verify that loop invariant ops are not hoisted from regions that may not be
344+ // executed.
345+ func.func @no_hoist_from_possibly_unexecuted_region (%arg0: tensor <4 x8 xi32 >) -> tensor <8 x4 xi32 > {
346+ %c0_i32 = arith.constant 0 : i32
347+ %c0 = arith.constant 0 : index
348+ %c1 = arith.constant 1 : index
349+ %c100 = arith.constant 100 : index
350+ %workgroup_id_x = hal.interface.workgroup.id [0 ] : index
351+ %0 = tensor.empty () : tensor <8 x4 xi32 >
352+ %1 = scf.for %arg1 = %workgroup_id_x to %c1 step %c100 iter_args (%arg2 = %0 ) -> tensor <8 x4 xi32 > {
353+ %2 = vector.transfer_read %arg0 [%c0 , %c0 ], %c0_i32 {in_bounds = [true , true ]} : tensor <4 x8 xi32 >, vector <2 x8 xi32 >
354+ %3 = vector.transpose %2 , [1 , 0 ] : vector <2 x8 xi32 > to vector <8 x2 xi32 >
355+ %4 = vector.transfer_write %3 , %arg2 [%c0 , %c0 ] {in_bounds = [true , true ]} : vector <8 x2 xi32 >, tensor <8 x4 xi32 >
356+ scf.yield %4 : tensor <8 x4 xi32 >
357+ }
358+ return %1 : tensor <8 x4 xi32 >
359+ }
360+
361+ // CHECK-LABEL: func.func @no_hoist_from_possibly_unexecuted_region
362+ // CHECK: scf.for {{.*}} {
363+ // CHECK: vector.transfer_read
364+ // CHECK: vector.transpose
365+ // CHECK: vector.transfer_write
366+ // CHECK: }
0 commit comments