Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Fixes for phi-4 support #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 10 additions & 89 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,7 @@ static common_chat_msg parse_json_tool_calls(
}

if (!result.tool_calls.empty()) {
if (!string_strip(result.content).empty()) {
LOG_WRN("Content found with tool calls: %s\n", result.content.c_str());
}
result.content = "";
result.content = string_strip(result.content);
}
return result;
}
Expand Down Expand Up @@ -1359,14 +1356,15 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
std::string name = function.at("name");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
tool_rules.push_back(builder.add_schema(name + "-call", {
auto call_rule = builder.add_schema(name + "-call", {
{"type", "object"},
{"properties", {
{"name", {{"const", name}}},
{"arguments", parameters},
}},
{"required", json::array({"name", "arguments"})},
}));
});
tool_rules.push_back(builder.add_rule(name + "-call", "\"<|tool_call|>\" " + call_rule + " \"<|/tool_call|>\""));
});
auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
std::vector<std::string> alt_tags {
Expand All @@ -1379,6 +1377,9 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
data.preserved_tokens = {
"<|tool_call|>",
"</|tool_call|>",
"<|tool_response|>",
"<|tool|>",
"</|tool|>",
};
});

Expand Down Expand Up @@ -1437,89 +1438,9 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
}

static common_chat_msg common_chat_parse_phi_4(const std::string & input) {
common_chat_msg result;
result.role = "assistant";

std::string final_content = "";

const std::string opening_tag = "<|tool_call|>";
const std::string closing_tag = "</|tool_call|>";

size_t start_pos = 0;
while (true) {
// Find next tool call
size_t tool_start = input.find(opening_tag, start_pos);
if (tool_start == std::string::npos) {
// No more tool calls.

// Is start_pos within string bounds?
if (start_pos < input.length()) {
// Add the rest of the string to final_content
final_content += input.substr(start_pos);
}
break;
}

// Add content before the tool call to final_content
final_content += input.substr(start_pos, tool_start - start_pos);

// Find closing tag
size_t content_start = tool_start + opening_tag.length();
size_t tool_end = input.find(closing_tag, content_start);

if (tool_end == std::string::npos) {
// No closing tag found, so just include the rest of the string as tool.
tool_end = input.length();
}

// Extract tool call content
std::string tool_content = input.substr(
content_start,
tool_end - content_start
);

// Try to parse the tool call
try {
auto tool_call = json::parse(tool_content);

// Verify the required fields exist
if (!tool_call.contains("name")) {
throw std::runtime_error("Missing 'name' field in tool call");
}

if (!tool_call.contains("arguments")) {
throw std::runtime_error("Missing 'arguments' field in tool call");
}

std::string name = tool_call["name"].get<std::string>();

std::string arguments;
try {
arguments = tool_call["arguments"].dump();
} catch (const std::exception & e) {
LOG_ERR("Failed to serialize arguments: %s\n", e.what());
arguments = "{}";
}

result.tool_calls.push_back({
name,
arguments,
/* id= */ "",
});
} catch (const std::exception & e) {
// If parsing fails, include the entire tool call in the content
final_content += input.substr(
tool_start,
tool_end + closing_tag.length() - tool_start
);
}

// Move past this tool call for next iteration
start_pos = tool_end + closing_tag.length();
}

result.content = final_content;
return result;
static std::regex function_regex("<\\|tool_call\\|>\\s*\\{\\s*\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"arguments\"\\s*:");
static std::regex close_regex(R"(\}\s*(</\|tool_call\|>)?)");
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
}


Expand Down
14 changes: 10 additions & 4 deletions docs/function-calling.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ Function calling is supported for all models (see https://github.com/ggml-org/ll
- Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2
- Functionary v3.1 / v3.2
- Hermes 2/3, Qwen 2.5
- Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034)
- Qwen 2.5 Coder (#12034)
- Mistral Nemo
- Firefunction v2
- Command R7B
- DeepSeek R1 (WIP / seems reluctant to call any tools?)
- Command R7B (#11585)
- DeepSeek R1 (#11607)
- Phi 4 (#12288)

- Generic tool call is supported when the template isn't recognized by native format handlers (you'll see `Chat format: Generic` in the logs).
- Use `--chat-template-file` to override the template when appropriate (see examples below)
Expand Down Expand Up @@ -297,9 +298,14 @@ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja

# Native support for Phi 4 also needs a template override (official template is buggy)

llama-server --jinja -fa -hf bartowski/microsoft_Phi-4-mini-instruct-GGUF \
--chat-template-file models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja

# Native support requires the right template for these GGUFs:

llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M \
--chat-template-file models/templates/meetkai-functionary-medium-v3.2.jinja

llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \
Expand Down
3 changes: 2 additions & 1 deletion examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,8 @@ struct server_task {
SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str());
common_grammar_trigger trigger;
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
trigger.value = (llama_token) token;
trigger.value = word;
trigger.token = token;
params.sampling.grammar_triggers.push_back(trigger);
} else {
SRV_DBG("Grammar trigger word: `%s`\n", word.c_str());
Expand Down
35 changes: 35 additions & 0 deletions models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{%- if messages[0]["role"] == "system" %}
{%- set system_message = messages[0]["content"] %}
{% elif tools is defined -%}
{%- set system_message = "You are a helpful assistant with access to tools." -%}
{% else %}
{%- set system_message = "" -%}
{%- endif %}
{%- if tools is defined -%}
{%- set system_message = system_message + '<|tool|>' + (tools | tojson) + '<|/tool|>' -%}
{%- if '<|tool_call|>' not in system_message -%}
{%- set system_message = system_message + "\nTo use a tool, respond in this format: <|tool_call|>{\"name\": \"foo\", \"arguments\": {\"a\": 1}}<|/tool_call|>" %}
{%- endif %}
{%- endif %}
{%- if system_message is defined -%}
{{- '<|system|>' + system_message + '<|end|>' -}}
{%- endif -%}
{%- for message in messages -%}
{%- if message['role'] == 'tool' -%}
{{- '<|tool_response|>' + (message['content'] | tojson) + '<|/tool_response|>' -}}
{%- elif message['role'] != 'system' -%}
{{- '<|' + message['role'] + '|>' -}}
{%- if message.content -%}
{{- message['content'] -}}
{%- endif -%}
{%- for tool_call in message.tool_calls -%}
{{- '<|tool_call|>' + (tool_call | tojson) + '<|/tool_call|>' -}}
{%- endfor -%}
{{- '<|end|>' -}}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{- '<|assistant|>' -}}
{%- else -%}
{{- eos_token -}}
{%- endif -%}