feat: add MiMo V2 Pro reasoning support via OpenRouter

thygrrr · thygrrr · commit c05160e969a4 · 2026-04-14T19:43:20.000+02:00
Adds reasoning_details round-trip for OpenRouter's MiMo V2 Pro in the
same style as the existing reasoning_content path for DeepSeek-R1/Kimi.
Request-side reasoning is passed via extra_body so litellm.drop_params
can't silently strip it, and _apply_model_overrides now consults the
active gateway spec so registry overrides fire for gateway-routed models
(previously a latent dead-code bug for any override attached to
OpenRouter or AiHubMix).
diff --git a/pocketfox/agent/context.py b/pocketfox/agent/context.py
@@ -766,6 +766,7 @@ def add_assistant_message(
         content: str | None,
         tool_calls: list[dict[str, Any]] | None = None,
         reasoning_content: str | None = None,
+        reasoning_details: list[dict[str, Any]] | None = None,
     ) -> list[dict[str, Any]]:
         """
         Add an assistant message to the message list.
@@ -775,6 +776,8 @@ def add_assistant_message(
             content: Message content.
             tool_calls: Optional tool calls.
             reasoning_content: Thinking output (Kimi, DeepSeek-R1, etc.).
+            reasoning_details: OpenRouter reasoning segments (MiMo etc.) — must
+                round-trip verbatim in subsequent turns.
 
         Returns:
             Updated message list.
@@ -788,5 +791,8 @@ def add_assistant_message(
         if reasoning_content:
             msg["reasoning_content"] = reasoning_content
 
+        if reasoning_details:
+            msg["reasoning_details"] = reasoning_details
+
         messages.append(msg)
         return messages
diff --git a/pocketfox/agent/loop.py b/pocketfox/agent/loop.py
@@ -586,6 +586,7 @@ async def _run_llm_loop(
                     response.content,
                     tool_call_dicts,
                     reasoning_content=response.reasoning_content,
+                    reasoning_details=response.reasoning_details,
                 )
 
                 # Collect any image blocks returned by multimodal tools (e.g.
diff --git a/pocketfox/cli/tty.py b/pocketfox/cli/tty.py
@@ -211,6 +211,7 @@ async def process(self, content: str, max_iterations: int = 50) -> str:
                     response.content,
                     tool_call_dicts,
                     reasoning_content=response.reasoning_content,
+                    reasoning_details=response.reasoning_details,
                 )
 
                 # Execute tools. Collect any image blocks returned by
diff --git a/pocketfox/providers/base.py b/pocketfox/providers/base.py
@@ -23,6 +23,7 @@ class LLMResponse:
     finish_reason: str = "stop"
     usage: dict[str, int] = field(default_factory=dict)
     reasoning_content: str | None = None  # Kimi, DeepSeek-R1 etc.
+    reasoning_details: list[dict[str, Any]] | None = None  # OpenRouter MiMo etc. — round-trip verbatim
 
     @property
     def has_tool_calls(self) -> bool:
diff --git a/pocketfox/providers/litellm_provider.py b/pocketfox/providers/litellm_provider.py
@@ -11,6 +11,15 @@
 from pocketfox.providers.registry import find_by_model, find_gateway
 
 
+def _merge_overrides(kwargs: dict[str, Any], overrides: dict[str, Any]) -> None:
+    """Merge registry overrides into kwargs, deep-merging extra_body."""
+    for key, val in overrides.items():
+        if key == "extra_body" and isinstance(kwargs.get("extra_body"), dict) and isinstance(val, dict):
+            kwargs["extra_body"] = {**kwargs["extra_body"], **val}
+        else:
+            kwargs[key] = val
+
+
 class LiteLLMProvider(LLMProvider):
     """
     LLM provider using LiteLLM for multi-provider support.
@@ -90,13 +99,19 @@ def _resolve_model(self, model: str) -> str:
         return model
 
     def _apply_model_overrides(self, model: str, kwargs: dict[str, Any]) -> None:
-        """Apply model-specific parameter overrides from the registry."""
+        """Apply model-specific parameter overrides from the registry.
+
+        Consults both the matched standard provider (if any) and the active
+        gateway spec (if any) — gateways route arbitrary models, so their
+        per-model overrides must also fire (e.g. OpenRouter → xiaomi/mimo).
+        """
         model_lower = model.lower()
-        spec = find_by_model(model)
-        if spec:
+        for spec in (find_by_model(model), self._gateway):
+            if not spec:
+                continue
             for pattern, overrides in spec.model_overrides:
                 if pattern in model_lower:
-                    kwargs.update(overrides)
+                    _merge_overrides(kwargs, overrides)
                     return
 
     async def chat(
@@ -192,13 +207,30 @@ def _parse_response(self, response: Any) -> LLMResponse:
                 usage["cache_read_input_tokens"] = response.usage.cache_read_input_tokens
 
         reasoning_content = getattr(message, "reasoning_content", None)
+        reasoning_details = getattr(message, "reasoning_details", None)
+
+        # LiteLLM 1.83.1 may not know about OpenRouter's reasoning_details field
+        # and stash it in pydantic's model_extra instead of exposing it directly.
+        if reasoning_details is None:
+            extra = getattr(message, "model_extra", None) or {}
+            reasoning_details = extra.get("reasoning_details")
+
+        # Synthesize a human-readable reasoning_content from details when the
+        # server populates details-only (MiMo via OpenRouter). Keeps downstream
+        # display/logging code working without caring about the shape.
+        if not reasoning_content and reasoning_details:
+            synthesized = "\n".join(
+                d.get("text", "") for d in reasoning_details if isinstance(d, dict)
+            ).strip()
+            reasoning_content = synthesized or None
 
         return LLMResponse(
             content=message.content,
             tool_calls=tool_calls,
             finish_reason=choice.finish_reason or "stop",
             usage=usage,
             reasoning_content=reasoning_content,
+            reasoning_details=reasoning_details,
         )
 
     def get_default_model(self) -> str:
diff --git a/pocketfox/providers/registry.py b/pocketfox/providers/registry.py
@@ -78,7 +78,11 @@ def label(self) -> str:
         detect_by_base_keyword="openrouter",
         default_api_base="https://openrouter.ai/api/v1",
         strip_model_prefix=False,
-        model_overrides=(),
+        # MiMo V2 Pro: pass reasoning via extra_body so LiteLLM's drop_params
+        # can't silently strip it before the request leaves the library.
+        model_overrides=(
+            ("xiaomi/mimo", {"extra_body": {"reasoning": {"enabled": True}}}),
+        ),
     ),
     # AiHubMix: global gateway, OpenAI-compatible interface.
     # strip_model_prefix=True: it doesn't understand "anthropic/claude-3",
diff --git a/scripts/test_mimo.py b/scripts/test_mimo.py
@@ -0,0 +1,83 @@
+"""Manual smoke test for MiMo V2 Pro via OpenRouter.
+
+Requires OPENROUTER_API_KEY in env. Not part of the pytest suite — it hits the
+real API and will spend tokens.
+
+Usage:
+    python scripts/test_mimo.py
+
+Verifies end-to-end:
+  1. `extra_body={"reasoning": {"enabled": True}}` reaches OpenRouter.
+  2. `reasoning_details` is parsed off the response.
+  3. A follow-up turn with the full `reasoning_details` round-tripped in
+     conversation history is accepted by OpenRouter without complaint.
+"""
+
+import asyncio
+import os
+import sys
+
+from pocketfox.agent.context import ContextBuilder  # noqa: F401  (unused direct, but mirrors prod code path)
+from pocketfox.providers.litellm_provider import LiteLLMProvider
+
+
+def _require_key() -> str:
+    key = os.environ.get("OPENROUTER_API_KEY")
+    if not key:
+        print("ERROR: set OPENROUTER_API_KEY before running this script", file=sys.stderr)
+        sys.exit(1)
+    return key
+
+
+async def main() -> None:
+    provider = LiteLLMProvider(
+        api_key=_require_key(),
+        default_model="openrouter/xiaomi/mimo-v2-pro",
+        provider_name="openrouter",
+    )
+
+    # --- Turn 1 -------------------------------------------------------------
+    messages = [{"role": "user", "content": "Think step by step: what's 17 * 23?"}]
+    print("[turn 1] calling MiMo V2 Pro...")
+    response1 = await provider.chat(messages=messages)
+
+    print(f"[turn 1] content: {response1.content!r}")
+    print(f"[turn 1] reasoning_content (first 500 chars): {(response1.reasoning_content or '')[:500]!r}")
+    if response1.reasoning_details:
+        print(f"[turn 1] reasoning_details[0]: {response1.reasoning_details[0]!r}")
+        print(f"[turn 1] reasoning_details count: {len(response1.reasoning_details)}")
+    else:
+        print("[turn 1] reasoning_details: None  <-- likely a problem")
+
+    if not response1.reasoning_details and not response1.reasoning_content:
+        print(
+            "[turn 1] WARNING: no reasoning returned. The extra_body path may not "
+            "have reached OpenRouter. Try setting `litellm.set_verbose = True` "
+            "in this script and re-running to see the outgoing HTTP body.",
+            file=sys.stderr,
+        )
+
+    # --- Turn 2 -------------------------------------------------------------
+    assistant_msg: dict = {"role": "assistant", "content": response1.content or ""}
+    if response1.reasoning_content:
+        assistant_msg["reasoning_content"] = response1.reasoning_content
+    if response1.reasoning_details:
+        assistant_msg["reasoning_details"] = response1.reasoning_details
+
+    messages.append(assistant_msg)
+    messages.append({"role": "user", "content": "Now double it."})
+
+    print("\n[turn 2] calling MiMo V2 Pro with round-tripped reasoning_details...")
+    response2 = await provider.chat(messages=messages)
+
+    print(f"[turn 2] content: {response2.content!r}")
+    print(f"[turn 2] finish_reason: {response2.finish_reason}")
+    if response2.finish_reason == "error":
+        print("[turn 2] FAILED — OpenRouter rejected the history", file=sys.stderr)
+        sys.exit(2)
+
+    print("\nOK")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/tests/test_litellm_reasoning.py b/tests/test_litellm_reasoning.py
@@ -0,0 +1,158 @@
+"""Tests for reasoning_content / reasoning_details parsing and gateway override routing."""
+
+from types import SimpleNamespace
+from typing import Any
+
+from pocketfox.providers.litellm_provider import LiteLLMProvider, _merge_overrides
+
+
+def _fake_response(
+    *,
+    content: str = "answer",
+    reasoning_content: Any = None,
+    reasoning_details: Any = None,
+    model_extra: dict[str, Any] | None = None,
+) -> SimpleNamespace:
+    """Build an object shaped enough like a LiteLLM ModelResponse for _parse_response."""
+    message = SimpleNamespace(
+        content=content,
+        tool_calls=None,
+        reasoning_content=reasoning_content,
+        reasoning_details=reasoning_details,
+        model_extra=model_extra,
+    )
+    choice = SimpleNamespace(message=message, finish_reason="stop")
+    return SimpleNamespace(choices=[choice], usage=None)
+
+
+def _provider() -> LiteLLMProvider:
+    # provider_name="openrouter" routes through the OpenRouter gateway spec
+    # without requiring a real api_key / network access.
+    return LiteLLMProvider(
+        api_key="sk-or-test",
+        default_model="openrouter/xiaomi/mimo-v2-pro",
+        provider_name="openrouter",
+    )
+
+
+# ---------------------------------------------------------------------------
+# _parse_response — reasoning extraction
+# ---------------------------------------------------------------------------
+
+
+def test_parse_reasoning_details_populated_synthesizes_content():
+    provider = _provider()
+    details = [
+        {"type": "reasoning.text", "text": "first step"},
+        {"type": "reasoning.text", "text": "second step"},
+    ]
+    response = _fake_response(
+        content="final",
+        reasoning_content=None,
+        reasoning_details=details,
+    )
+
+    parsed = provider._parse_response(response)
+
+    assert parsed.content == "final"
+    assert parsed.reasoning_details == details
+    assert parsed.reasoning_content == "first step\nsecond step"
+
+
+def test_parse_reasoning_content_only_backwards_compatible():
+    provider = _provider()
+    response = _fake_response(
+        content="final",
+        reasoning_content="thinking...",
+        reasoning_details=None,
+    )
+
+    parsed = provider._parse_response(response)
+
+    assert parsed.reasoning_content == "thinking..."
+    assert parsed.reasoning_details is None
+
+
+def test_parse_reasoning_details_via_model_extra_fallback():
+    provider = _provider()
+    details = [{"type": "reasoning.text", "text": "hidden in extras"}]
+    response = _fake_response(
+        content="final",
+        reasoning_content=None,
+        reasoning_details=None,
+        model_extra={"reasoning_details": details},
+    )
+
+    parsed = provider._parse_response(response)
+
+    assert parsed.reasoning_details == details
+    assert parsed.reasoning_content == "hidden in extras"
+
+
+def test_parse_no_reasoning_at_all():
+    provider = _provider()
+    response = _fake_response(content="final")
+
+    parsed = provider._parse_response(response)
+
+    assert parsed.content == "final"
+    assert parsed.reasoning_content is None
+    assert parsed.reasoning_details is None
+
+
+# ---------------------------------------------------------------------------
+# _apply_model_overrides — gateway routing
+# ---------------------------------------------------------------------------
+
+
+def test_gateway_override_fires_for_openrouter_mimo():
+    provider = _provider()
+    kwargs: dict[str, Any] = {"model": "openrouter/xiaomi/mimo-v2-pro", "messages": []}
+
+    provider._apply_model_overrides("openrouter/xiaomi/mimo-v2-pro", kwargs)
+
+    assert kwargs["extra_body"] == {"reasoning": {"enabled": True}}
+
+
+def test_gateway_override_does_not_fire_for_unrelated_openrouter_model():
+    provider = _provider()
+    kwargs: dict[str, Any] = {"model": "openrouter/anthropic/claude-3", "messages": []}
+
+    provider._apply_model_overrides("openrouter/anthropic/claude-3", kwargs)
+
+    assert "extra_body" not in kwargs
+
+
+# ---------------------------------------------------------------------------
+# _merge_overrides — extra_body deep merge
+# ---------------------------------------------------------------------------
+
+
+def test_merge_overrides_deep_merges_extra_body():
+    kwargs: dict[str, Any] = {"extra_body": {"existing": "value"}}
+    overrides = {"extra_body": {"reasoning": {"enabled": True}}}
+
+    _merge_overrides(kwargs, overrides)
+
+    assert kwargs["extra_body"] == {
+        "existing": "value",
+        "reasoning": {"enabled": True},
+    }
+
+
+def test_merge_overrides_sets_extra_body_when_absent():
+    kwargs: dict[str, Any] = {}
+    overrides = {"extra_body": {"reasoning": {"enabled": True}}}
+
+    _merge_overrides(kwargs, overrides)
+
+    assert kwargs["extra_body"] == {"reasoning": {"enabled": True}}
+
+
+def test_merge_overrides_non_extra_body_keys_overwrite():
+    kwargs: dict[str, Any] = {"temperature": 0.7}
+    overrides = {"temperature": 1.0}
+
+    _merge_overrides(kwargs, overrides)
+
+    assert kwargs["temperature"] == 1.0

Original file line number	Diff line number	Diff line change
`@@ -586,6 +586,7 @@ async def _run_llm_loop(`
`586`	`586`	`response.content,`
`587`	`587`	`tool_call_dicts,`
`588`	`588`	`reasoning_content=response.reasoning_content,`
	`589`	`+ reasoning_details=response.reasoning_details,`
`589`	`590`	`)`
`590`	`591`
`591`	`592`	`# Collect any image blocks returned by multimodal tools (e.g.`
Original file line number	Diff line number	Diff line change
`@@ -211,6 +211,7 @@ async def process(self, content: str, max_iterations: int = 50) -> str:`
`211`	`211`	`response.content,`
`212`	`212`	`tool_call_dicts,`
`213`	`213`	`reasoning_content=response.reasoning_content,`
	`214`	`+ reasoning_details=response.reasoning_details,`
`214`	`215`	`)`
`215`	`216`
`216`	`217`	`# Execute tools. Collect any image blocks returned by`