fix(stirrup_agent): reasoning fallback + surface tool-arg validation errors #4984
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| name: Unit tests | |
| on: | |
| pull_request: | |
| types: [opened, synchronize, reopened, labeled, unlabeled] | |
| workflow_call: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| test: | |
| name: Test | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| with: | |
| submodules: 'recursive' | |
| - name: Detect changed files | |
| id: changed-files | |
| uses: step-security/[email protected] | |
| with: | |
| # 'files' input enables global other_changed_files_count for uncategorized files | |
| files: | | |
| **.md | |
| docs/** | |
| fern/** | |
| LICENSE | |
| resources_servers/** | |
| responses_api_agents/** | |
| responses_api_models/** | |
| benchmarks/** | |
| files_yaml: | | |
| doc: | |
| - '**.md' | |
| - docs/** | |
| - fern/** | |
| - LICENSE | |
| - benchmarks/** | |
| server: | |
| - resources_servers/** | |
| - responses_api_agents/** | |
| - responses_api_models/** | |
| - name: Classify changes | |
| id: changes | |
| run: | | |
| # workflow_call or non-PR context: run everything | |
| if [[ "${{ github.event_name }}" != "pull_request" ]]; then | |
| echo "run_full=true" >> $GITHUB_OUTPUT | |
| echo "run_servers=false" >> $GITHUB_OUTPUT | |
| echo "No PR context - running full suite" | |
| exit 0 | |
| fi | |
| DOC_CHANGED="${{ steps.changed-files.outputs.doc_any_changed }}" | |
| SERVER_CHANGED="${{ steps.changed-files.outputs.server_any_changed }}" | |
| OTHER_COUNT="${{ steps.changed-files.outputs.other_changed_files_count }}" | |
| echo "============================================" | |
| echo "File change categorization" | |
| echo "============================================" | |
| echo "" | |
| echo "Categories:" | |
| echo " doc (skip tests): **.md, docs/**, fern/**, LICENSE, benchmarks/**" | |
| echo " server (test changed): resources_servers/**, responses_api_agents/**, responses_api_models/**" | |
| echo " other (full suite): everything else (core library, CI, scripts, etc.)" | |
| echo " Priority: other > server > doc" | |
| echo "" | |
| echo "This PR:" | |
| echo " doc changed: $DOC_CHANGED" | |
| echo " server changed: $SERVER_CHANGED" | |
| echo " other (uncategorized): ${OTHER_COUNT:-0} file(s)" | |
| if [[ "${OTHER_COUNT:-0}" -gt 0 ]]; then | |
| echo " other files: ${{ steps.changed-files.outputs.other_changed_files }}" | |
| fi | |
| echo "" | |
| # Files outside doc/server categories (CI, core library, config, etc.) → full suite | |
| if [[ "${OTHER_COUNT:-0}" -gt 0 ]]; then | |
| echo "Decision: FULL TEST SUITE" | |
| echo "Reason: ${OTHER_COUNT} file(s) outside doc/server categories" | |
| echo "run_full=true" >> $GITHUB_OUTPUT | |
| echo "run_servers=false" >> $GITHUB_OUTPUT | |
| # Only server files changed (possibly with doc changes) → test changed servers | |
| elif [[ "$SERVER_CHANGED" == "true" ]]; then | |
| echo "Decision: SERVER TESTS ONLY" | |
| echo "Reason: only server files changed (+ possibly docs)" | |
| echo "run_full=false" >> $GITHUB_OUTPUT | |
| echo "run_servers=true" >> $GITHUB_OUTPUT | |
| # Only doc files changed → skip tests | |
| else | |
| echo "Decision: SKIP TESTS" | |
| echo "Reason: only doc/benchmark files changed" | |
| echo "run_full=false" >> $GITHUB_OUTPUT | |
| echo "run_servers=false" >> $GITHUB_OUTPUT | |
| fi | |
| echo "============================================" | |
| - name: Cache uv dependencies | |
| if: steps.changes.outputs.run_full == 'true' || steps.changes.outputs.run_servers == 'true' | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/uv | |
| key: uv-${{ runner.os }}-${{ hashFiles('uv.lock') }} | |
| restore-keys: | | |
| uv-${{ runner.os }}- | |
| - name: Setup for test | |
| if: steps.changes.outputs.run_full == 'true' || steps.changes.outputs.run_servers == 'true' | |
| run: | | |
| sudo apt-get update | |
| # Curl is required for setup_nvidia.sh to download uv | |
| # ca-certificates is there to support curl and mitigate `curl: (77) error setting certificate file: /etc/ssl/certs/ca-certificates.crt` | |
| sudo apt-get install -y --no-install-recommends git curl ca-certificates | |
| # The flow below should be used and synced with any Docker or container related flows. There is no script here to keep it 100% explicit. | |
| # This is how we test and this is how you should use/consume. | |
| curl -LsSf https://astral.sh/uv/install.sh | sh | |
| uv venv --python 3.12 | |
| source .venv/bin/activate | |
| uv sync --extra dev | |
| - name: Test | |
| if: steps.changes.outputs.run_full == 'true' || steps.changes.outputs.run_servers == 'true' | |
| run: | | |
| source .venv/bin/activate | |
| # Full suite: core library tests + all server tests | |
| if [[ "${{ steps.changes.outputs.run_full }}" == "true" ]]; then | |
| echo "Running full test suite" | |
| ng_dev_test | |
| ng_test_all +fail_on_total_and_test_mismatch=true +delete_venvs_after_each_test=true | |
| # Server-only: test only the changed servers | |
| elif [[ "${{ steps.changes.outputs.run_servers }}" == "true" ]]; then | |
| CHANGED_SERVERS=$(echo "${{ steps.changed-files.outputs.server_all_changed_files }}" | \ | |
| tr ' ' '\n' | cut -d'/' -f1-2 | sort -u) | |
| echo "Testing changed servers:" | |
| echo "$CHANGED_SERVERS" | sed 's/^/ - /' | |
| EXIT_CODE=0 | |
| for server in $CHANGED_SERVERS; do | |
| echo "" | |
| echo "================================================" | |
| echo "Testing: $server" | |
| echo "================================================" | |
| if ng_test +entrypoint=$server +delete_venvs_after_each_test=true; then | |
| echo "PASS: $server" | |
| else | |
| echo "FAIL: $server" | |
| EXIT_CODE=1 | |
| fi | |
| done | |
| if [ $EXIT_CODE -ne 0 ]; then | |
| echo "Some server tests failed" | |
| exit 1 | |
| fi | |
| # Docs-only: nothing to test | |
| else | |
| echo "No tests to run" | |
| fi |