@@ -379,7 +379,8 @@ def hf_autotokenizer_to_chat_completion_handler(
379
379
380
380
381
381
def hf_tokenizer_config_to_chat_formatter (
382
- tokenizer_config : Dict [str , Any ]
382
+ tokenizer_config : Dict [str , Any ],
383
+ add_generation_prompt : bool = True ,
383
384
) -> ChatFormatter :
384
385
assert isinstance (tokenizer_config , dict )
385
386
@@ -401,31 +402,34 @@ def hf_tokenizer_config_to_chat_formatter(
401
402
lstrip_blocks = True ,
402
403
).from_string (chat_template )
403
404
404
- def format_autotokenizer (
405
+ def format_tokenizer_config (
405
406
messages : List [llama_types .ChatCompletionRequestMessage ],
406
407
** kwargs : Any ,
407
408
) -> ChatFormatterResponse :
408
409
# TODO: veryify this is correct
409
410
# Add a blank assistant message to the end of the messages to prompt the model to generate a response
410
- prompt = env . render (
411
- messages = [
411
+ if add_generation_prompt :
412
+ messages = [
412
413
* messages ,
413
414
llama_types .ChatCompletionRequestAssistantMessage (
414
415
role = "assistant" , content = ""
415
416
),
416
- ],
417
+ ]
418
+ prompt = env .render (
419
+ messages = messages ,
417
420
bos_token = bos_token ,
418
421
eos_token = eos_token ,
419
422
)
420
- return ChatFormatterResponse (prompt = prompt , stop = eos_token )
423
+ return ChatFormatterResponse (prompt = prompt , stop = [ eos_token , bos_token ] )
421
424
422
- return format_autotokenizer
425
+ return format_tokenizer_config
423
426
424
427
425
428
def hf_tokenizer_config_to_chat_completion_handler (
426
429
tokenizer_config : Dict [str , Any ],
430
+ add_generation_prompt : bool = True ,
427
431
) -> LlamaChatCompletionHandler :
428
- chat_formatter = hf_tokenizer_config_to_chat_formatter (tokenizer_config )
432
+ chat_formatter = hf_tokenizer_config_to_chat_formatter (tokenizer_config , add_generation_prompt = add_generation_prompt )
429
433
return chat_formatter_to_chat_completion_handler (chat_formatter )
430
434
431
435
0 commit comments