@@ -339,16 +339,7 @@ def chat_completion_handler(
339
339
stop = stop + rstop
340
340
341
341
if response_format is not None and response_format ["type" ] == "json_object" :
342
- try :
343
- # create grammar from json schema
344
- if "schema" in response_format :
345
- grammar = llama_grammar .LlamaGrammar .from_json_schema (
346
- json .dumps (response_format ["schema" ]), verbose = llama .verbose
347
- )
348
- except Exception as e :
349
- grammar = llama_grammar .LlamaGrammar .from_string (
350
- llama_grammar .JSON_GBNF , verbose = llama .verbose
351
- )
342
+ grammar = _grammar_for_response_format (response_format , verbose = llama .verbose )
352
343
353
344
completion_or_chunks = llama .create_completion (
354
345
prompt = prompt ,
@@ -606,6 +597,35 @@ def _format_chatglm3(
606
597
ret += role
607
598
return ret
608
599
600
+ def _grammar_for_json (verbose :bool = False ):
601
+ return llama_grammar .LlamaGrammar .from_string (llama_grammar .JSON_GBNF , verbose = verbose )
602
+
603
+ def _grammar_for_json_schema (
604
+ schema : str ,
605
+ verbose : bool = False ,
606
+ fallback_to_json : bool = True
607
+ ):
608
+ try :
609
+ return llama_grammar .LlamaGrammar .from_json_schema (schema , verbose = verbose )
610
+ except Exception as e :
611
+ if fallback_to_json :
612
+ return _grammar_for_json (verbose = verbose )
613
+ else :
614
+ raise e
615
+
616
+ def _grammar_for_response_format (
617
+ response_format : llama_types .ChatCompletionRequestResponseFormat ,
618
+ verbose : bool = False
619
+ ):
620
+ if response_format ["type" ] != "json_object" :
621
+ return None
622
+
623
+ if "schema" in response_format :
624
+ return _grammar_for_json_schema (
625
+ json .dumps (response_format ["schema" ]), verbose = verbose
626
+ )
627
+ else :
628
+ return _grammar_for_json (verbose = verbose )
609
629
610
630
### Chat Formats ###
611
631
@@ -1994,16 +2014,7 @@ def __call__(
1994
2014
prompt = llama .input_ids [: llama .n_tokens ].tolist ()
1995
2015
1996
2016
if response_format is not None and response_format ["type" ] == "json_object" :
1997
- try :
1998
- # create grammar from json schema
1999
- if "schema" in response_format :
2000
- grammar = llama_grammar .LlamaGrammar .from_json_schema (
2001
- json .dumps (response_format ["schema" ])
2002
- )
2003
- except Exception as e :
2004
- grammar = llama_grammar .LlamaGrammar .from_string (
2005
- llama_grammar .JSON_GBNF
2006
- )
2017
+ grammar = _grammar_for_response_format (response_format )
2007
2018
2008
2019
return _convert_completion_to_chat (
2009
2020
llama .create_completion (
@@ -2159,26 +2170,10 @@ def chatml_function_calling(
2159
2170
tool_calls = None ,
2160
2171
add_generation_prompt = True ,
2161
2172
)
2173
+
2162
2174
if response_format is not None and response_format ["type" ] == "json_object" :
2163
- try :
2164
- grammar = (
2165
- llama_grammar .LlamaGrammar .from_json_schema (
2166
- json .dumps (response_format ["schema" ])
2167
- )
2168
- if "schema" in response_format
2169
- else None
2170
- )
2171
- except Exception as e :
2172
- if llama .verbose :
2173
- print (
2174
- "Failed to parse response format as JSON schema, falling back to default grammar"
2175
- )
2176
- print (e )
2177
- grammar = (
2178
- llama_grammar .LlamaGrammar .from_string (llama_grammar .JSON_GBNF )
2179
- if grammar is None
2180
- else grammar
2181
- )
2175
+ grammar = _grammar_for_response_format (response_format )
2176
+
2182
2177
return _convert_completion_to_chat (
2183
2178
llama .create_completion (
2184
2179
prompt = prompt ,
0 commit comments