Telosnex · jpohhhh · Mar 14, 2025 · Mar 9, 2025 · Mar 9, 2025 · Mar 9, 2025
diff --git a/common/chat.cpp b/common/chat.cpp
@@ -580,10 +580,7 @@ static common_chat_msg parse_json_tool_calls(
     }
 
     if (!result.tool_calls.empty()) {
-        if (!string_strip(result.content).empty()) {
-            LOG_WRN("Content found with tool calls: %s\n", result.content.c_str());
-        }
-        result.content = "";
+        result.content = string_strip(result.content);
     }
     return result;
 }
@@ -1359,14 +1356,15 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
             std::string name = function.at("name");
             auto parameters = function.at("parameters");
             builder.resolve_refs(parameters);
-            tool_rules.push_back(builder.add_schema(name + "-call", {
+            auto call_rule = builder.add_schema(name + "-call", {
                 {"type", "object"},
                 {"properties", {
                     {"name", {{"const", name}}},
                     {"arguments", parameters},
                 }},
                 {"required", json::array({"name", "arguments"})},
-            }));
+            });
+            tool_rules.push_back(builder.add_rule(name + "-call", "\"<|tool_call|>\" " + call_rule + " \"<|/tool_call|>\""));
         });
         auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
         std::vector<std::string> alt_tags {
@@ -1379,6 +1377,9 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
         data.preserved_tokens = {
             "<|tool_call|>",
             "</|tool_call|>",
+            "<|tool_response|>",
+            "<|tool|>",
+            "</|tool|>",
         };
     });
 
@@ -1437,89 +1438,9 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
 }
 
 static common_chat_msg common_chat_parse_phi_4(const std::string & input) {
-    common_chat_msg result;
-    result.role = "assistant";
-
-    std::string final_content = "";
-
-    const std::string opening_tag = "<|tool_call|>";
-    const std::string closing_tag = "</|tool_call|>";
-
-    size_t start_pos = 0;
-    while (true) {
-        // Find next tool call
-        size_t tool_start = input.find(opening_tag, start_pos);
-        if (tool_start == std::string::npos) {
-            // No more tool calls.
-
-            // Is start_pos within string bounds?
-            if (start_pos < input.length()) {
-                // Add the rest of the string to final_content
-                final_content += input.substr(start_pos);
-            }
-            break;
-        }
-
-        // Add content before the tool call to final_content
-        final_content += input.substr(start_pos, tool_start - start_pos);
-
-        // Find closing tag
-        size_t content_start = tool_start + opening_tag.length();
-        size_t tool_end = input.find(closing_tag, content_start);
-
-        if (tool_end == std::string::npos) {
-            // No closing tag found, so just include the rest of the string as tool.
-            tool_end = input.length();
-        }
-
-        // Extract tool call content
-        std::string tool_content = input.substr(
-            content_start,
-            tool_end - content_start
-        );
-
-        // Try to parse the tool call
-        try {
-            auto tool_call = json::parse(tool_content);
-
-            // Verify the required fields exist
-            if (!tool_call.contains("name")) {
-                throw std::runtime_error("Missing 'name' field in tool call");
-            }
-
-            if (!tool_call.contains("arguments")) {
-                throw std::runtime_error("Missing 'arguments' field in tool call");
-            }
-
-            std::string name = tool_call["name"].get<std::string>();
-
-            std::string arguments;
-            try {
-                arguments = tool_call["arguments"].dump();
-            } catch (const std::exception & e) {
-                LOG_ERR("Failed to serialize arguments: %s\n", e.what());
-                arguments = "{}";
-            }
-
-            result.tool_calls.push_back({
-                name,
-                arguments,
-                /* id= */ "",
-            });
-        } catch (const std::exception & e) {
-            // If parsing fails, include the entire tool call in the content
-            final_content += input.substr(
-                tool_start,
-                tool_end + closing_tag.length() - tool_start
-            );
-        }
-
-        // Move past this tool call for next iteration
-        start_pos = tool_end + closing_tag.length();
-    }
-
-    result.content = final_content;
-    return result;
+    static std::regex function_regex("<\\|tool_call\\|>\\s*\\{\\s*\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"arguments\"\\s*:");
+    static std::regex close_regex(R"(\}\s*(</\|tool_call\|>)?)");
+    return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
 }
 
 

diff --git a/docs/function-calling.md b/docs/function-calling.md
@@ -12,11 +12,12 @@ Function calling is supported for all models (see https://github.com/ggml-org/ll
   - Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2
   - Functionary v3.1 / v3.2
   - Hermes 2/3, Qwen 2.5
-  - Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034)
+  - Qwen 2.5 Coder (#12034)
   - Mistral Nemo
   - Firefunction v2
-  - Command R7B
-  - DeepSeek R1 (WIP / seems reluctant to call any tools?)
+  - Command R7B (#11585)
+  - DeepSeek R1 (#11607)
+  - Phi 4 (#12288)
 
 - Generic tool call is supported when the template isn't recognized by native format handlers (you'll see `Chat format: Generic` in the logs).
   - Use `--chat-template-file` to override the template when appropriate (see examples below)
@@ -297,9 +298,14 @@ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
 llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
     --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
 
+# Native support for Phi 4 also needs a template override (official template is buggy)
+
+llama-server --jinja -fa -hf bartowski/microsoft_Phi-4-mini-instruct-GGUF \
+    --chat-template-file models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja
+
 # Native support requires the right template for these GGUFs:
 
-llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
+llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M \
     --chat-template-file models/templates/meetkai-functionary-medium-v3.2.jinja
 
 llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -384,7 +384,8 @@ struct server_task {
                             SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str());
                             common_grammar_trigger trigger;
                             trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
-                            trigger.value = (llama_token) token;
+                            trigger.value = word;
+                            trigger.token = token;
                             params.sampling.grammar_triggers.push_back(trigger);
                         } else {
                             SRV_DBG("Grammar trigger word: `%s`\n", word.c_str());

diff --git a/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja b/models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja
@@ -0,0 +1,35 @@
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+{% elif tools is defined -%}
+    {%- set system_message = "You are a helpful assistant with access to tools." -%}
+{% else %}
+    {%- set system_message = "" -%}
+{%- endif %}
+{%- if tools is defined -%}
+    {%- set system_message = system_message + '<|tool|>' + (tools | tojson) + '<|/tool|>' -%}
+    {%- if '<|tool_call|>' not in system_message -%}
+        {%- set system_message = system_message + "\nTo use a tool, respond in this format: <|tool_call|>{\"name\": \"foo\", \"arguments\": {\"a\": 1}}<|/tool_call|>" %}
+    {%- endif %}
+{%- endif %}
+{%- if system_message is defined -%}
+    {{- '<|system|>' + system_message + '<|end|>' -}}
+{%- endif -%}
+{%- for message in messages -%}
+    {%- if message['role'] == 'tool' -%}
+        {{- '<|tool_response|>' + (message['content'] | tojson) + '<|/tool_response|>' -}}
+    {%- elif message['role'] != 'system' -%}
+        {{- '<|' + message['role'] + '|>' -}}
+        {%- if message.content -%}
+            {{- message['content'] -}}
+        {%- endif -%}  
+        {%- for tool_call in message.tool_calls -%}
+            {{- '<|tool_call|>' + (tool_call | tojson) + '<|/tool_call|>' -}}
+        {%- endfor -%}
+        {{- '<|end|>' -}}
+    {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+   {{- '<|assistant|>' -}}
+{%- else -%}
+   {{- eos_token -}}
+{%- endif -%}