[LLVMGPU] Add a verifier for tile sizes. (iree-org#19906)

erman-gurses · web-flow · commit d2d243d41392 · 2025-02-27T12:04:20.000-08:00
Add a verifier for tile sizes.

---------

Signed-off-by: erman-gurses &lt;erman@nod-labs.com&gt;
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUSelectLoweringStrategy.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUSelectLoweringStrategy.cpp
@@ -39,18 +39,25 @@ class LLVMGPUSelectLoweringStrategyPass final
 
 /// Verify that valid configuration is set for all ops within the compiled
 /// module.
-template <typename F>
+template <typename ConfigTy>
 static LogicalResult
 verifyLoweringConfiguration(FunctionOpInterface funcOp,
                             IREE::Codegen::TranslationInfoAttr translationInfo,
-                            ArrayRef<int64_t> workgroupSize, F verificationFn) {
+                            ArrayRef<int64_t> workgroupSize) {
   auto walkResult = funcOp.walk([&](Operation *op) -> WalkResult {
-    auto loweringConfig =
-        getLoweringConfig<IREE::Codegen::LoweringConfigAttr>(op);
+    auto loweringConfig = getLoweringConfig<ConfigTy>(op);
     if (!loweringConfig)
       return WalkResult::advance();
-    return verificationFn(op, loweringConfig, translationInfo, workgroupSize);
+
+    // Calls the correct overloaded function based on ConfigTy.
+    if constexpr (std::is_same_v<ConfigTy, IREE::GPU::LoweringConfigAttr>) {
+      return verifyGPUMatmulPipeline(op, loweringConfig, translationInfo);
+    } else {
+      return verifyGPUMatmulPipeline(op, loweringConfig, translationInfo,
+                                     workgroupSize);
+    }
   });
+
   return failure(walkResult.wasInterrupted());
 }
 
@@ -63,8 +70,18 @@ verifyEntryPoint(FunctionOpInterface funcOp,
         "failed to get workgroup size needed for verification");
   }
 
-  return verifyLoweringConfiguration(
-      funcOp, translationInfo, workgroupSize.value(), verifyGPUMatmulPipeline);
+  // Verify GPU-specific configuration
+  if (failed(verifyLoweringConfiguration<IREE::GPU::LoweringConfigAttr>(
+          funcOp, translationInfo, workgroupSize.value()))) {
+    return failure();
+  }
+
+  // Verify Codegen-specific configuration
+  if (failed(verifyLoweringConfiguration<IREE::Codegen::LoweringConfigAttr>(
+          funcOp, translationInfo, workgroupSize.value()))) {
+    return failure();
+  }
+
   return success();
 }
 
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.h
@@ -90,6 +90,10 @@ void buildLLVMGPUCodegenConfigurationPassPipeline(
 /// the module within the IREE::HAL::ExecutableOp.
 void buildLLVMGPUCodegenPassPipeline(OpPassManager &variantPassManagery,
                                      bool useROCM);
+LogicalResult
+verifyGPUMatmulPipeline(Operation *op,
+                        IREE::GPU::LoweringConfigAttr loweringConfig,
+                        IREE::Codegen::TranslationInfoAttr translationInfo);
 
 /// Lowering calling vectorization patterns.
 LogicalResult
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp
@@ -75,12 +75,13 @@ verifyGPUMatmulPipeline(Operation *op,
                         IREE::Codegen::LoweringConfigAttr loweringConfig,
                         IREE::Codegen::TranslationInfoAttr translationInfo,
                         ArrayRef<int64_t> workgroupSize) {
-  // This verifier only applies to matmul.
   CodeGenPipeline pipeline = translationInfo.getDispatchLoweringPassPipeline();
+
   if (pipeline != CodeGenPipeline::LLVMGPUMatmulTensorCore &&
       pipeline != CodeGenPipeline::LLVMGPUMatmulTensorCoreMmaSync) {
     return success();
   }
+
   // Only verify batched and unbatched matmul.
   if (!isa<linalg::MatmulOp, linalg::BatchMatmulOp>(op)) {
     return success();
@@ -234,4 +235,58 @@ verifyGPUMatmulPipeline(Operation *op,
   return success();
 }
 
+/// Verifies pipelines that use iree_gpu.lowering_config attributes.
+LogicalResult
+verifyGPUMatmulPipeline(Operation *op,
+                        IREE::GPU::LoweringConfigAttr loweringConfig,
+                        IREE::Codegen::TranslationInfoAttr translationInfo) {
+
+  CodeGenPipeline pipeline = translationInfo.getDispatchLoweringPassPipeline();
+  // TODO: add verification for other pipelines
+  if (pipeline != CodeGenPipeline::LLVMGPUVectorDistribute) {
+    return success();
+  }
+
+  // Only verify batched and unbatched matmul.
+  if (!isa<linalg::MatmulOp, linalg::BatchMatmulOp>(op)) {
+    return success();
+  }
+
+  unsigned reduction = static_cast<uint32_t>(IREE::GPU::TilingLevel::Reduction);
+  uint numLoops = llvm::cast<linalg::LinalgOp>(op).getNumLoops();
+  size_t size = 0;
+
+  SmallVector<int64_t> reductionTileSizes =
+      loweringConfig.getStaticTilingLevelSizes(reduction, op);
+
+  size = reductionTileSizes.size();
+
+  if (size > numLoops) {
+    return op->emitOpError("expected number of reduction tile size is equal "
+                           "or less than number of loops");
+  }
+  for (size_t i = 0; i < size; ++i) {
+    if (reductionTileSizes[i] > 0 &&
+        llvm::cast<linalg::LinalgOp>(op).getIteratorTypesArray()[i] !=
+            utils::IteratorType::reduction) {
+      return op->emitOpError(
+          "expected to non-zero reduction tile has reduction iterator");
+    }
+  }
+
+  SmallVector<int64_t> workgroupTileSizes =
+      loweringConfig.getWorkgroupTileSizes();
+  size = workgroupTileSizes.size();
+  for (size_t i = 0; i < size; ++i) {
+    if (workgroupTileSizes[i] > 0 &&
+        llvm::cast<linalg::LinalgOp>(op).getIteratorTypesArray()[i] !=
+            utils::IteratorType::parallel) {
+      return op->emitOpError(
+          "expected to non-zero workgroup tile has parallel iterator");
+    }
+  }
+
+  return success();
+}
+
 } // namespace mlir::iree_compiler