@@ -87,7 +87,7 @@ def generate_text(
87
87
messages : ModelInput ,
88
88
parameters : dict [str , Any ],
89
89
) -> tuple [LazyMessage , PdlLazy [Any ]]:
90
- print ("Asynchronous model call started" , file = stderr )
90
+ print (f "Asynchronous model call started to { block . model } " , file = stderr )
91
91
# global _BACKGROUND_TASKS
92
92
future = asyncio .run_coroutine_threadsafe (
93
93
LitellmModel .async_generate_text (
@@ -110,24 +110,29 @@ def update_end_nanos(future):
110
110
if block .pdl__timing is not None :
111
111
block .pdl__timing .end_nanos = time .time_ns ()
112
112
113
- # report call completion and its duration (and if available queueing time)
114
- res = future .result ()[1 ]
113
+ # report call completion and its duration
115
114
start = (
116
115
block .pdl__timing .start_nanos
117
116
if block .pdl__timing .start_nanos is not None
118
117
else 0
119
118
)
120
119
exec_nanos = block .pdl__timing .end_nanos - start
121
- queue_nanos = 0
122
- if "created" in res :
123
- queue_nanos = (
124
- res ["created" ] * 1000000000 - block .pdl__timing .start_nanos
125
- )
126
- exec_nanos = exec_nanos - queue_nanos
127
120
print (
128
- f"Asynchronous model call to { block .model } completed in { (exec_nanos )/ 1000000 } ms queued for { queue_nanos / 1000000 } ms " ,
121
+ f"Asynchronous model call to { block .model } completed in { (exec_nanos )/ 1000000 } ms" ,
129
122
file = stderr ,
130
123
)
124
+ msg = future .result ()[0 ]
125
+ if msg ["content" ] is not None :
126
+ from termcolor import colored
127
+
128
+ from .pdl_ast import BlockKind
129
+ from .pdl_scheduler import color_of
130
+
131
+ print (
132
+ colored (msg ["content" ], color = color_of (BlockKind .MODEL )),
133
+ file = stderr ,
134
+ )
135
+ print ("\n " , file = stderr )
131
136
132
137
future .add_done_callback (update_end_nanos )
133
138
0 commit comments