Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5b982d0

Browse files
committed
fix: use both eos and bos tokens as stop sequences for hf-tokenizer-config chat format.
1 parent 2ce0b8a commit 5b982d0

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

llama_cpp/llama_chat_format.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,8 @@ def hf_autotokenizer_to_chat_completion_handler(
379379

380380

381381
def hf_tokenizer_config_to_chat_formatter(
382-
tokenizer_config: Dict[str, Any]
382+
tokenizer_config: Dict[str, Any],
383+
add_generation_prompt: bool = True,
383384
) -> ChatFormatter:
384385
assert isinstance(tokenizer_config, dict)
385386

@@ -401,31 +402,34 @@ def hf_tokenizer_config_to_chat_formatter(
401402
lstrip_blocks=True,
402403
).from_string(chat_template)
403404

404-
def format_autotokenizer(
405+
def format_tokenizer_config(
405406
messages: List[llama_types.ChatCompletionRequestMessage],
406407
**kwargs: Any,
407408
) -> ChatFormatterResponse:
408409
# TODO: veryify this is correct
409410
# Add a blank assistant message to the end of the messages to prompt the model to generate a response
410-
prompt = env.render(
411-
messages=[
411+
if add_generation_prompt:
412+
messages = [
412413
*messages,
413414
llama_types.ChatCompletionRequestAssistantMessage(
414415
role="assistant", content=""
415416
),
416-
],
417+
]
418+
prompt = env.render(
419+
messages=messages,
417420
bos_token=bos_token,
418421
eos_token=eos_token,
419422
)
420-
return ChatFormatterResponse(prompt=prompt, stop=eos_token)
423+
return ChatFormatterResponse(prompt=prompt, stop=[eos_token, bos_token])
421424

422-
return format_autotokenizer
425+
return format_tokenizer_config
423426

424427

425428
def hf_tokenizer_config_to_chat_completion_handler(
426429
tokenizer_config: Dict[str, Any],
430+
add_generation_prompt: bool = True,
427431
) -> LlamaChatCompletionHandler:
428-
chat_formatter = hf_tokenizer_config_to_chat_formatter(tokenizer_config)
432+
chat_formatter = hf_tokenizer_config_to_chat_formatter(tokenizer_config, add_generation_prompt=add_generation_prompt)
429433
return chat_formatter_to_chat_completion_handler(chat_formatter)
430434

431435

0 commit comments

Comments
 (0)