Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4f0ec65

Browse files
authored
fix: chat API logprobs format (abetlen#1788)
* fix: chat API logprobs format * Fix optional properties
1 parent d610477 commit 4f0ec65

File tree

3 files changed

+59
-18
lines changed

3 files changed

+59
-18
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ docker:
6262
docker build -t llama-cpp-python:latest -f docker/simple/Dockerfile .
6363

6464
run-server:
65-
uvicorn --factory llama.server:app --host ${HOST} --port ${PORT}
65+
python llama_cpp/server --model ${MODEL}
6666

6767
clean:
6868
- cd vendor/llama.cpp && make clean

llama_cpp/llama_chat_format.py

+38-15
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,31 @@ def to_chat_handler(self) -> LlamaChatCompletionHandler:
259259
return chat_formatter_to_chat_completion_handler(self)
260260

261261

262+
def _convert_text_completion_logprobs_to_chat(
263+
logprobs: Optional[llama_types.CompletionLogprobs],
264+
) -> llama_types.ChatCompletionLogprobs:
265+
if logprobs is None:
266+
return None
267+
268+
return {
269+
"content": [
270+
{
271+
"token": token,
272+
"bytes": None,
273+
"logprob": logprob,
274+
"top_logprobs": [
275+
{
276+
"token": top_token,
277+
"logprob": top_logprob,
278+
"bytes": None,
279+
}
280+
for top_token, top_logprob in top_logprobs.items()
281+
],
282+
} for (token, logprob, top_logprobs) in zip(logprobs["tokens"], logprobs["token_logprobs"], logprobs["top_logprobs"])
283+
],
284+
"refusal": None,
285+
}
286+
262287
def _convert_text_completion_to_chat(
263288
completion: llama_types.Completion,
264289
) -> llama_types.ChatCompletion:
@@ -275,7 +300,7 @@ def _convert_text_completion_to_chat(
275300
"role": "assistant",
276301
"content": completion["choices"][0]["text"],
277302
},
278-
"logprobs": completion["choices"][0]["logprobs"],
303+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
279304
"finish_reason": completion["choices"][0]["finish_reason"],
280305
}
281306
],
@@ -319,7 +344,7 @@ def _convert_text_completion_chunks_to_chat(
319344
if chunk["choices"][0]["finish_reason"] is None
320345
else {}
321346
),
322-
"logprobs": chunk["choices"][0]["logprobs"],
347+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
323348
"finish_reason": chunk["choices"][0]["finish_reason"],
324349
}
325350
],
@@ -382,7 +407,7 @@ def _convert_completion_to_chat_function(
382407
}
383408
],
384409
},
385-
"logprobs": completion["choices"][0]["logprobs"],
410+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
386411
"finish_reason": "tool_calls",
387412
}
388413
],
@@ -435,7 +460,7 @@ def _stream_response_to_function_stream(
435460
{
436461
"index": 0,
437462
"finish_reason": None,
438-
"logprobs": chunk["choices"][0]["logprobs"],
463+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
439464
"delta": {
440465
"role": None,
441466
"content": None,
@@ -472,7 +497,7 @@ def _stream_response_to_function_stream(
472497
{
473498
"index": 0,
474499
"finish_reason": None,
475-
"logprobs": chunk["choices"][0]["logprobs"],
500+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
476501
"delta": {
477502
"role": None,
478503
"content": None,
@@ -1716,7 +1741,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
17161741
}
17171742
],
17181743
},
1719-
"logprobs": completion["choices"][0]["logprobs"],
1744+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
17201745
"finish_reason": "tool_calls",
17211746
}
17221747
],
@@ -2128,7 +2153,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21282153
choices=[
21292154
{
21302155
"index": 0,
2131-
"logprobs": chunk["choices"][0]["logprobs"],
2156+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
21322157
"delta": {
21332158
"role": None,
21342159
"content": None,
@@ -2230,7 +2255,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22302255
choices=[
22312256
{
22322257
"index": 0,
2233-
"logprobs": chunk["choices"][0]["logprobs"],
2258+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
22342259
"delta": {
22352260
"role": "assistant",
22362261
"content": None,
@@ -2268,9 +2293,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22682293
choices=[
22692294
{
22702295
"index": 0,
2271-
"logprobs": chunk["choices"][0][
2272-
"logprobs"
2273-
],
2296+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
22742297
"delta": {
22752298
"role": "assistant",
22762299
"content": buffer.pop(0),
@@ -2293,7 +2316,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22932316
choices=[
22942317
{
22952318
"index": 0,
2296-
"logprobs": chunk["choices"][0]["logprobs"],
2319+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
22972320
"delta": {
22982321
"role": "assistant",
22992322
"content": (
@@ -2379,7 +2402,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23792402
choices=[
23802403
{
23812404
"index": 0,
2382-
"logprobs": chunk["choices"][0]["logprobs"],
2405+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
23832406
"delta": {
23842407
"role": None,
23852408
"content": None,
@@ -2613,7 +2636,7 @@ def generate_streaming(tools, functions, function_call, prompt):
26132636
choices=[
26142637
{
26152638
"index": 0,
2616-
"logprobs": completion["choices"][0]["logprobs"],
2639+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
26172640
"message": {
26182641
"role": "assistant",
26192642
"content": None if content == "" else content,
@@ -3745,7 +3768,7 @@ def chatml_function_calling(
37453768
{
37463769
"finish_reason": "tool_calls",
37473770
"index": 0,
3748-
"logprobs": completion["choices"][0]["logprobs"],
3771+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
37493772
"message": {
37503773
"role": "assistant",
37513774
"content": None,

llama_cpp/llama_types.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,28 @@ class ChatCompletionFunction(TypedDict):
8282
parameters: Dict[str, JsonType] # TODO: make this more specific
8383

8484

85+
class ChatCompletionTopLogprobToken(TypedDict):
86+
token: str
87+
logprob: float
88+
bytes: Optional[List[int]]
89+
90+
91+
class ChatCompletionLogprobToken(ChatCompletionTopLogprobToken):
92+
token: str
93+
logprob: float
94+
bytes: Optional[List[int]]
95+
top_logprobs: List[ChatCompletionTopLogprobToken]
96+
97+
98+
class ChatCompletionLogprobs(TypedDict):
99+
content: Optional[List[ChatCompletionLogprobToken]]
100+
refusal: Optional[List[ChatCompletionLogprobToken]]
101+
102+
85103
class ChatCompletionResponseChoice(TypedDict):
86104
index: int
87105
message: "ChatCompletionResponseMessage"
88-
logprobs: Optional[CompletionLogprobs]
106+
logprobs: Optional[ChatCompletionLogprobs]
89107
finish_reason: Optional[str]
90108

91109

@@ -134,7 +152,7 @@ class ChatCompletionStreamResponseChoice(TypedDict):
134152
ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty
135153
]
136154
finish_reason: Optional[Literal["stop", "length", "tool_calls", "function_call"]]
137-
logprobs: NotRequired[Optional[CompletionLogprobs]]
155+
logprobs: NotRequired[Optional[ChatCompletionLogprobs]]
138156

139157

140158
class CreateChatCompletionStreamResponse(TypedDict):

0 commit comments

Comments
 (0)