@@ -66,7 +66,7 @@ def step_server_config(context, server_fqdn, server_port):
66
66
def step_download_hf_model (context , hf_file , hf_repo ):
67
67
context .model_file = hf_hub_download (repo_id = hf_repo , filename = hf_file )
68
68
if context .debug :
69
- print (f"model file: { context .model_file } \n " )
69
+ print (f"model file: { context .model_file } " )
70
70
71
71
72
72
@step ('a model file {model_file}' )
@@ -137,9 +137,12 @@ def step_start_server(context):
137
137
if 'GITHUB_ACTIONS' in os .environ :
138
138
max_attempts *= 2
139
139
140
+ addrs = socket .getaddrinfo (context .server_fqdn , context .server_port , type = socket .SOCK_STREAM )
141
+ family , typ , proto , _ , sockaddr = addrs [0 ]
142
+
140
143
while True :
141
- with closing (socket .socket (socket . AF_INET , socket . SOCK_STREAM )) as sock :
142
- result = sock .connect_ex (( context . server_fqdn , context . server_port ) )
144
+ with closing (socket .socket (family , typ , proto )) as sock :
145
+ result = sock .connect_ex (sockaddr )
143
146
if result == 0 :
144
147
print ("\x1b [33;46mserver started!\x1b [0m" )
145
148
return
@@ -209,7 +212,7 @@ async def step_request_completion(context, api_error):
209
212
user_api_key = context .user_api_key )
210
213
context .tasks_result .append (completion )
211
214
if context .debug :
212
- print (f"Completion response: { completion } \n " )
215
+ print (f"Completion response: { completion } " )
213
216
if expect_api_error :
214
217
assert completion == 401 , f"completion must be an 401 status code: { completion } "
215
218
@@ -354,7 +357,7 @@ def step_prompt_passkey(context, passkey, i_pos):
354
357
prompt += context .prompt_junk_suffix
355
358
if context .debug :
356
359
passkey_highlight = "\x1b [33m" + passkey + "\x1b [0m"
357
- print (f"Passkey challenge:\n ```{ prompt .replace (passkey , passkey_highlight )} ```\n " )
360
+ print (f"Passkey challenge:\n ```{ prompt .replace (passkey , passkey_highlight )} ```" )
358
361
context .prompts .append (context .prompt_prefix + prompt + context .prompt_suffix )
359
362
context .n_prompts = len (context .prompts )
360
363
@@ -363,7 +366,7 @@ def step_prompt_passkey(context, passkey, i_pos):
363
366
@async_run_until_complete
364
367
async def step_oai_chat_completions (context , api_error ):
365
368
if context .debug :
366
- print (f"Submitting OAI compatible completions request...\n " )
369
+ print (f"Submitting OAI compatible completions request..." )
367
370
expect_api_error = api_error == 'raised'
368
371
completion = await oai_chat_completions (context .prompts .pop (),
369
372
context .system_prompt ,
@@ -508,12 +511,12 @@ async def step_all_embeddings_are_the_same(context):
508
511
embedding1 = np .array (embeddings [i ])
509
512
embedding2 = np .array (embeddings [j ])
510
513
if context .debug :
511
- print (f"embedding1: { embedding1 [- 8 :]} \n " )
512
- print (f"embedding2: { embedding2 [- 8 :]} \n " )
514
+ print (f"embedding1: { embedding1 [- 8 :]} " )
515
+ print (f"embedding2: { embedding2 [- 8 :]} " )
513
516
similarity = np .dot (embedding1 , embedding2 ) / (np .linalg .norm (embedding1 ) * np .linalg .norm (embedding2 ))
514
517
msg = f"Similarity between { i } and { j } : { similarity :.10f} "
515
518
if context .debug :
516
- print (f"{ msg } \n " )
519
+ print (f"{ msg } " )
517
520
assert np .isclose (similarity , 1.0 , rtol = 1e-05 , atol = 1e-08 , equal_nan = False ), msg
518
521
519
522
@@ -630,7 +633,7 @@ async def step_prometheus_metrics_exported(context):
630
633
metrics_raw = await metrics_response .text ()
631
634
metric_exported = False
632
635
if context .debug :
633
- print (f"/metrics answer:\n { metrics_raw } \n " )
636
+ print (f"/metrics answer:\n { metrics_raw } " )
634
637
context .metrics = {}
635
638
for metric in parser .text_string_to_metric_families (metrics_raw ):
636
639
match metric .name :
@@ -932,7 +935,7 @@ def assert_n_tokens_predicted(completion_response, expected_predicted_n=None, re
932
935
last_match = end
933
936
highlighted += content [last_match :]
934
937
if 'DEBUG' in os .environ and os .environ ['DEBUG' ] == 'ON' :
935
- print (f"Checking completion response: { highlighted } \n " )
938
+ print (f"Checking completion response: { highlighted } " )
936
939
assert last_match > 0 , f'/{ re_content } / must match ```{ highlighted } ```'
937
940
if expected_predicted_n and expected_predicted_n > 0 :
938
941
assert n_predicted == expected_predicted_n , (f'invalid number of tokens predicted:'
@@ -942,7 +945,7 @@ def assert_n_tokens_predicted(completion_response, expected_predicted_n=None, re
942
945
async def gather_tasks_results (context ):
943
946
n_tasks = len (context .concurrent_tasks )
944
947
if context .debug :
945
- print (f"Waiting for all { n_tasks } tasks results...\n " )
948
+ print (f"Waiting for all { n_tasks } tasks results..." )
946
949
for task_no in range (n_tasks ):
947
950
context .tasks_result .append (await context .concurrent_tasks .pop ())
948
951
n_completions = len (context .tasks_result )
@@ -959,7 +962,7 @@ async def wait_for_health_status(context,
959
962
slots_processing = None ,
960
963
expected_slots = None ):
961
964
if context .debug :
962
- print (f"Starting checking for health for expected_health_status={ expected_health_status } \n " )
965
+ print (f"Starting checking for health for expected_health_status={ expected_health_status } " )
963
966
interval = 0.5
964
967
counter = 0
965
968
if 'GITHUB_ACTIONS' in os .environ :
@@ -1048,8 +1051,6 @@ def start_server_background(context):
1048
1051
if 'LLAMA_SERVER_BIN_PATH' in os .environ :
1049
1052
context .server_path = os .environ ['LLAMA_SERVER_BIN_PATH' ]
1050
1053
server_listen_addr = context .server_fqdn
1051
- if os .name == 'nt' :
1052
- server_listen_addr = '0.0.0.0'
1053
1054
server_args = [
1054
1055
'--host' , server_listen_addr ,
1055
1056
'--port' , context .server_port ,
@@ -1088,7 +1089,7 @@ def start_server_background(context):
1088
1089
server_args .append ('--verbose' )
1089
1090
if 'SERVER_LOG_FORMAT_JSON' not in os .environ :
1090
1091
server_args .extend (['--log-format' , "text" ])
1091
- print (f"starting server with: { context .server_path } { server_args } \n " )
1092
+ print (f"starting server with: { context .server_path } { server_args } " )
1092
1093
flags = 0
1093
1094
if 'nt' == os .name :
1094
1095
flags |= subprocess .DETACHED_PROCESS
0 commit comments