forked from It4innovations/hyperqueue
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_manager.py
More file actions
137 lines (107 loc) · 4.88 KB
/
test_manager.py
File metadata and controls
137 lines (107 loc) · 4.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from .conftest import HqEnv
def scontrol_return(job_id: str) -> str:
return f"""print(\"\"\"JobId={job_id} JobName=bash
UserId=sboehm00(33646) GroupId=interactive(25200) MCS_label=N/A
Priority=124370 Nice=0 Account=lig8_dev QOS=normal
JobState=RUNNING Reason=None Dependency=(null)
Requeue=0 Restarts=0 BatchFlag=0 Reboot=0 ExitCode=0:0
RunTime=00:01:34 TimeLimit=00:15:00 TimeMin=N/A
SubmitTime=2021-10-07T11:14:47 EligibleTime=2021-10-07T11:14:47
AccrueTime=2021-10-07T11:14:47
StartTime=2021-10-07T11:15:26 EndTime=2021-10-07T11:30:26 Deadline=N/A
PreemptEligibleTime=2021-10-07T11:15:26 PreemptTime=None
SuspendTime=None SecsPreSuspend=0 LastSchedEval=2021-10-07T11:15:26 Scheduler=Main
Partition=m100_all_serial AllocNode:Sid=login01:58040
ReqNodeList=(null) ExcNodeList=(null)
NodeList=login06
BatchHost=login06
NumNodes=1 NumCPUs=4 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
TRES=cpu=4,mem=7600M,node=1,billing=4
Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
MinCPUsNode=1 MinMemoryCPU=1900M MinTmpDiskNode=0
Features=(null) DelayBoot=00:00:00
OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
Command=/usr/bin/bash
WorkDir=/m100/home/userexternal/sboehm00
Power=\"\"\")"""
def qstat_return_walltime(job_id: str) -> str:
return f"""
import sys
import json
assert "{job_id}" in sys.argv
data = {{
"Jobs": {{
"{job_id}": {{
"Resource_List": {{
"walltime": "01:12:34"
}},
"resources_used": {{
"walltime": "00:13:45"
}}
}}
}}
}}
print(json.dumps(data))
"""
def test_manager_autodetect(hq_env: HqEnv):
hq_env.start_server()
with hq_env.mock.mock_program_with_code("qstat", qstat_return_walltime("x1234")):
with hq_env.mock.mock_program_with_code("scontrol", scontrol_return("y5678")):
hq_env.start_worker(cpus=1)
hq_env.start_worker(cpus=1, env={"PBS_ENVIRONMENT": "PBS_BATCH", "PBS_JOBID": "x1234"})
hq_env.start_worker(cpus=1, env={"SLURM_JOB_ID": "y5678"})
table = hq_env.command(["worker", "list"], as_table=True)
table.check_columns_value(["Manager", "Manager Job ID"], 0, ["None", "N/A"])
table.check_columns_value(["Manager", "Manager Job ID"], 1, ["PBS", "x1234"])
table.check_columns_value(["Manager", "Manager Job ID"], 2, ["SLURM", "y5678"])
table = hq_env.command(["worker", "info", "2"], as_table=True)
table.check_row_value("Manager", "PBS")
table.check_row_value("Time Limit", "58m 49s")
table = hq_env.command(["worker", "info", "3"], as_table=True)
table.check_row_value("Manager", "SLURM")
table.check_row_value("Time Limit", "13m 26s")
def test_manager_set_none(hq_env: HqEnv):
hq_env.start_server()
args = ["--manager", "none"]
hq_env.start_worker(cpus=1, args=args)
hq_env.start_worker(cpus=1, args=args, env={"PBS_ENVIRONMENT": "PBS_BATCH", "PBS_JOBID": "x1234"})
hq_env.start_worker(cpus=1, args=args, env={"SLURM_JOB_ID": "y5678"})
table = hq_env.command(["worker", "list"], as_table=True)
for i in [0, 1, 2]:
table.check_columns_value(["Manager", "Manager Job ID"], i, ["None", "N/A"])
def test_manager_pbs_no_env(hq_env: HqEnv):
hq_env.start_server()
p = hq_env.start_worker(cpus=1, args=["--manager", "pbs"], wait_for_start=False)
p.wait(5)
hq_env.check_process_exited(p, 1)
def test_manager_pbs(hq_env: HqEnv):
hq_env.start_server()
with hq_env.mock.mock_program_with_code("qstat", qstat_return_walltime("x1234")):
hq_env.start_worker(
cpus=1,
args=["--manager", "pbs"],
env={"PBS_ENVIRONMENT": "PBS_BATCH", "PBS_JOBID": "x1234"},
)
table = hq_env.command(["worker", "list"], as_table=True)
table.check_columns_value(["Manager", "Manager Job ID"], 0, ["PBS", "x1234"])
table = hq_env.command(["worker", "info", "1"], as_table=True)
table.check_row_value("Group", "x1234")
def test_manager_pbs_no_qstat(hq_env: HqEnv):
hq_env.start_server()
hq_env.start_worker(
cpus=1,
args=["--manager", "pbs"],
env={"PBS_ENVIRONMENT": "PBS_BATCH", "PBS_JOBID": "x1234"},
wait_for_start=False,
)
def test_manager_slurm_no_env(hq_env: HqEnv):
hq_env.start_server()
p = hq_env.start_worker(cpus=1, args=["--manager", "slurm"], wait_for_start=False)
p.wait(5)
hq_env.check_process_exited(p, 1)
def test_manager_slurm(hq_env: HqEnv):
hq_env.start_server()
with hq_env.mock.mock_program_with_code("scontrol", scontrol_return("abcd")):
hq_env.start_worker(cpus=1, args=["--manager", "slurm"], env={"SLURM_JOB_ID": "abcd"})
table = hq_env.command(["worker", "list"], as_table=True)
table.check_columns_value(["Manager", "Manager Job ID"], 0, ["SLURM", "abcd"])