@@ -77,6 +77,8 @@ def __call__(
77
77
mirostat_eta : float = 0.1 ,
78
78
logits_processor : Optional [llama .LogitsProcessorList ] = None ,
79
79
grammar : Optional [llama .LlamaGrammar ] = None ,
80
+ logprobs : Optional [bool ] = None ,
81
+ top_logprobs : Optional [int ] = None ,
80
82
** kwargs , # type: ignore
81
83
) -> Union [
82
84
llama_types .CreateChatCompletionResponse ,
@@ -338,7 +340,7 @@ def _convert_completion_to_chat_function(
338
340
}
339
341
],
340
342
},
341
- "logprobs" : None ,
343
+ "logprobs" : completion [ "choices" ][ 0 ][ "logprobs" ] ,
342
344
"finish_reason" : "tool_calls" ,
343
345
}
344
346
],
@@ -391,7 +393,7 @@ def _stream_response_to_function_stream(
391
393
{
392
394
"index" : 0 ,
393
395
"finish_reason" : None ,
394
- "logprobs" : None ,
396
+ "logprobs" : chunk [ "choices" ][ 0 ][ "logprobs" ] ,
395
397
"delta" : {
396
398
"role" : None ,
397
399
"content" : None ,
@@ -426,7 +428,7 @@ def _stream_response_to_function_stream(
426
428
{
427
429
"index" : 0 ,
428
430
"finish_reason" : None ,
429
- "logprobs" : None ,
431
+ "logprobs" : chunk [ "choices" ][ 0 ][ "logprobs" ] ,
430
432
"delta" : {
431
433
"role" : None ,
432
434
"content" : None ,
@@ -491,7 +493,6 @@ def chat_completion_handler(
491
493
temperature : float = 0.2 ,
492
494
top_p : float = 0.95 ,
493
495
top_k : int = 40 ,
494
- logprobs : int = 0 ,
495
496
min_p : float = 0.05 ,
496
497
typical_p : float = 1.0 ,
497
498
stream : bool = False ,
@@ -512,6 +513,8 @@ def chat_completion_handler(
512
513
logits_processor : Optional [llama .LogitsProcessorList ] = None ,
513
514
grammar : Optional [llama .LlamaGrammar ] = None ,
514
515
logit_bias : Optional [Dict [str , float ]] = None ,
516
+ logprobs : Optional [bool ] = None ,
517
+ top_logprobs : Optional [int ] = None ,
515
518
** kwargs , # type: ignore
516
519
) -> Union [
517
520
llama_types .CreateChatCompletionResponse ,
@@ -581,7 +584,7 @@ def chat_completion_handler(
581
584
top_k = top_k ,
582
585
min_p = min_p ,
583
586
typical_p = typical_p ,
584
- logprobs = logprobs ,
587
+ logprobs = top_logprobs if logprobs else None ,
585
588
stream = stream ,
586
589
stop = stop ,
587
590
seed = seed ,
@@ -1628,7 +1631,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
1628
1631
}
1629
1632
],
1630
1633
},
1631
- "logprobs" : None ,
1634
+ "logprobs" : completion [ "choices" ][ 0 ][ "logprobs" ] ,
1632
1635
"finish_reason" : "tool_calls" ,
1633
1636
}
1634
1637
],
@@ -2085,7 +2088,7 @@ def create_completion(stop):
2085
2088
choices = [
2086
2089
{
2087
2090
"index" : 0 ,
2088
- "logprobs" : None ,
2091
+ "logprobs" : completion [ "choices" ][ 0 ][ "logprobs" ] ,
2089
2092
"message" : {
2090
2093
"role" : "assistant" ,
2091
2094
"content" : None if content == "" else content ,
@@ -2311,11 +2314,14 @@ def chatml_function_calling(
2311
2314
model : Optional [str ] = None ,
2312
2315
logits_processor : Optional [llama .LogitsProcessorList ] = None ,
2313
2316
grammar : Optional [llama .LlamaGrammar ] = None ,
2317
+ logprobs : Optional [bool ] = None ,
2318
+ top_logprobs : Optional [int ] = None ,
2314
2319
** kwargs , # type: ignore
2315
2320
) -> Union [
2316
2321
llama_types .CreateChatCompletionResponse ,
2317
2322
Iterator [llama_types .CreateChatCompletionStreamResponse ],
2318
2323
]:
2324
+ print (logprobs )
2319
2325
function_calling_template = (
2320
2326
"{% for message in messages %}"
2321
2327
"<|im_start|>{{ message.role }}\n "
@@ -2437,6 +2443,7 @@ def chatml_function_calling(
2437
2443
model = model ,
2438
2444
logits_processor = logits_processor ,
2439
2445
grammar = grammar ,
2446
+ logprobs = top_logprobs if logprobs else None ,
2440
2447
),
2441
2448
stream = stream ,
2442
2449
)
@@ -2549,6 +2556,7 @@ def chatml_function_calling(
2549
2556
typical_p = typical_p ,
2550
2557
stream = stream ,
2551
2558
stop = ["<|im_end|>" ],
2559
+ logprobs = top_logprobs if logprobs else None ,
2552
2560
max_tokens = None ,
2553
2561
presence_penalty = presence_penalty ,
2554
2562
frequency_penalty = frequency_penalty ,
@@ -2660,7 +2668,7 @@ def chatml_function_calling(
2660
2668
{
2661
2669
"finish_reason" : "tool_calls" ,
2662
2670
"index" : 0 ,
2663
- "logprobs" : None ,
2671
+ "logprobs" : completion [ "choices" ][ 0 ][ "logprobs" ] ,
2664
2672
"message" : {
2665
2673
"role" : "assistant" ,
2666
2674
"content" : None ,
0 commit comments