[CIR][NEON] Add lowering support for vceqzd_s64 (llvm#179779)

banach-space · web-flow · commit 80677dc5e00c · 2026-02-09T18:48:42.000Z
Rather than creating a dedicated ClangIR test file, the original test file for this intrinsic is effectively reused: * clang/test/CodeGen/AArch64/neon-intrinsics.c “Effectively” meaning that the corresponding test is moved (rather than literally reused) to a new file within the original AArch64 builtins test directory: * clang/test/CodeGen/AArch64/neon/intrinsics.c This is necessary to avoid lowering unsupported examples from intrinsics.c with `-fclangir`. The new file will eventually replace the original one once all builtins from it can be lowered via ClangIR. To facilitate test re-use, new LIT "feature" is added so that CIR tests can be run conditionally, e.g. the following will only run when `CLANG_ENABLE_CIR` is set: ```C // RUN: %if cir %{%clang_cc1 ... %} ``` This sort of substitutions are documented in [2]. REFERENCES: [1] https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:@navigationhierarchiessimdisa=[Neon]&q=vceqzd_s64 [2] https://llvm.org/docs/TestingGuide.html#substitutions
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1,5 +1,4 @@
 //===---- CIRGenBuiltinAArch64.cpp - Emit CIR for AArch64 builtins --------===//
-//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -88,6 +87,42 @@ findARMVectorIntrinsicInMap(ArrayRef<AArch64BuiltinInfo> intrinsicMap,
   return nullptr;
 }
 
+//===----------------------------------------------------------------------===//
+//  Emit-helpers
+//===----------------------------------------------------------------------===//
+static mlir::Value
+emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
+                              mlir::Location loc, mlir::Value src,
+                              mlir::Type retTy, const cir::CmpOpKind kind) {
+
+  bool scalarCmp = !isa<cir::VectorType>(src.getType());
+  if (!scalarCmp) {
+    assert(cast<cir::VectorType>(retTy).getIsScalable() &&
+           "This is only intended for fixed-width vectors");
+    // Vector retTypes are cast to i8 vectors. Recover original retType.
+    cgf.cgm.errorNYI(loc, std::string("unimplemented vector compare"));
+  }
+
+  mlir::Value zero = builder.getNullValue(src.getType(), loc);
+  mlir::Value cmp;
+  if (cir::isFPOrVectorOfFPType(src.getType())) {
+    cgf.cgm.errorNYI(loc, std::string("unimplemented FP compare"));
+  } else {
+    if (scalarCmp)
+      // For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare
+      // result is sign- rather zero-extended when casting to the output
+      // retType.
+      cmp = builder.createCast(
+          loc, cir::CastKind::bool_to_int,
+          builder.createCompare(loc, cir::CmpOpKind::eq, src, zero),
+          builder.getSIntNTy(1));
+    else
+      cgf.cgm.errorNYI(loc, std::string("unimplemented vector compare"));
+  }
+
+  return builder.createCast(loc, cir::CastKind::integral, cmp, retTy);
+}
+
 bool CIRGenFunction::getAArch64SVEProcessedOperands(
     unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> &ops,
     SVETypeFlags typeFlags) {
@@ -1357,7 +1392,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vpaddd_s64:
   case NEON::BI__builtin_neon_vpaddd_f64:
   case NEON::BI__builtin_neon_vpadds_f32:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vceqzd_s64:
+    ops.push_back(emitScalarExpr(expr->getArg(0)));
+    return emitAArch64CompareBuiltinExpr(
+        *this, builder, loc, ops[0],
+        convertType(expr->getCallReturnType(getContext())), cir::CmpOpKind::eq);
   case NEON::BI__builtin_neon_vceqzd_f64:
   case NEON::BI__builtin_neon_vceqzs_f32:
   case NEON::BI__builtin_neon_vceqzh_f16:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -36,6 +36,7 @@
 #include "clang/CIR/MissingFeatures.h"
 #include "clang/CIR/TypeEvaluationKind.h"
 #include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/IR/Instructions.h"
 
 namespace {
 class ScalarExprEmitter;
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1293,6 +1293,7 @@ mlir::LogicalResult CIRToLLVMCastOpLowering::matchAndRewrite(
     auto llvmSrcTy = mlir::cast<mlir::IntegerType>(llvmSrcVal.getType());
     auto llvmDstTy =
         mlir::cast<mlir::IntegerType>(getTypeConverter()->convertType(dstTy));
+
     if (llvmSrcTy.getWidth() == llvmDstTy.getWidth())
       rewriter.replaceOpWithNewOp<mlir::LLVM::BitcastOp>(castOp, llvmDstTy,
                                                          llvmSrcVal);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -1721,6 +1721,9 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
   return CGM.getIntrinsic(IntrinsicID, Tys);
 }
 
+//===----------------------------------------------------------------------===//
+//  Emit-helpers
+//===----------------------------------------------------------------------===//
 static Value *EmitCommonNeonSISDBuiltinExpr(
     CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
     SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
@@ -2494,13 +2497,15 @@ CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
     Op = Builder.CreateBitCast(Op, Ty);
   }
 
+  Constant *zero = Constant::getNullValue(Op->getType());
+
   if (CmpInst::isFPPredicate(Pred)) {
     if (Pred == CmpInst::FCMP_OEQ)
-      Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
+      Op = Builder.CreateFCmp(Pred, Op, zero);
     else
-      Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
+      Op = Builder.CreateFCmpS(Pred, Op, zero);
   } else {
-    Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
+    Op = Builder.CreateICmp(Pred, Op, zero);
   }
 
   llvm::Type *ResTy = Ty;
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -17242,17 +17242,6 @@ uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
   return (int64_t)vceqd_u64(a, b);
 }
 
-// CHECK-LABEL: define dso_local i64 @test_vceqzd_s64(
-// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[A]], 0
-// CHECK-NEXT:    [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
-// CHECK-NEXT:    ret i64 [[VCEQZ_I]]
-//
-uint64_t test_vceqzd_s64(int64_t a) {
-  return (uint64_t)vceqzd_s64(a);
-}
-
 // CHECK-LABEL: define dso_local i64 @test_vceqzd_u64(
 // CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -0,0 +1,33 @@
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// RUN:                   %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none           -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM %}
+// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -flax-vector-conversions=none -fclangir -emit-cir  -o - %s |                               FileCheck %s --check-prefixes=CIR %}
+
+//=============================================================================
+// NOTES
+//
+// Minor differences between RUNs (e.g. presence of `noundef` attached to
+// argumens, `align` attribute attached to pointers), are matched using
+// catch-alls like {{.*}}.
+//
+// Different labels for CIR stem from an additional function call that is
+// present at the AST and CIR levels, but is inlined at the LLVM IR level.
+//=============================================================================
+
+#include <arm_neon.h>
+
+// LLVM-LABEL: @test_vceqzd_s64
+// CIR-LABEL: @vceqzd_s64
+uint64_t test_vceqzd_s64(int64_t a) {
+// CIR:   [[C_0:%.*]] = cir.const #cir.int<0>
+// CIR:   [[CMP:%.*]] = cir.cmp(eq, %{{.*}}, [[C_0]]) : !s64i, !cir.bool
+// CIR:   [[RES:%.*]] = cir.cast bool_to_int [[CMP]] : !cir.bool -> !cir.int<s, 1>
+// CIR:   cir.cast integral [[RES]] : !cir.int<s, 1> -> !u64i
+
+// LLVM-SAME: i64{{.*}} [[A:%.*]])
+// LLVM:          [[TMP0:%.*]] = icmp eq i64 [[A]], 0
+// LLVM-NEXT:    [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// LLVM-NEXT:    ret i64 [[VCEQZ_I]]
+  return (uint64_t)vceqzd_s64(a);
+}
diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py
@@ -406,6 +406,8 @@ def calculate_arch_features(arch_string):
 if config.have_llvm_driver:
     config.available_features.add("llvm-driver")
 
+if config.clang_enable_cir:
+    config.available_features.add("cir-enabled")
 
 # Some tests perform deep recursion, which requires a larger pthread stack size
 # than the relatively low default of 192 KiB for 64-bit processes on AIX. The