Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4105588

Browse files
committed
--on-notify parameter
1 parent 560bf12 commit 4105588

8 files changed

Lines changed: 112 additions & 8 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/hyperqueue/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ core_affinity = "0.8"
4949
itertools = "0.14.0"
5050
lru = "0.16"
5151
open = "5"
52+
shlex = { version = "1.3" }
5253

5354
# Dashboard
5455
ratatui = { version = "0.29", default-features = false, features = ["crossterm"], optional = true }

crates/hyperqueue/src/bin/hq.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ async fn command_job_wait(gsettings: &GlobalSettings, opts: JobWaitOpts) -> anyh
166166
ToClientMessage::JobInfoResponse(r) => r
167167
)
168168
.await?;
169-
wait_for_jobs(&mut session, &response.jobs, !opts.without_close).await
169+
wait_for_jobs(&mut session, &response.jobs, !opts.without_close, None).await
170170
}
171171

172172
async fn command_job_progress(

crates/hyperqueue/src/client/commands/submit/command.rs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,16 @@ pub struct JobSubmitOpts {
450450
#[arg(long, conflicts_with("wait"))]
451451
progress: bool,
452452

453+
/// Defines a program called when a submitted task emitted notify event.
454+
/// It is relevant only when `--wait` or `--progress` is used.
455+
///
456+
/// Processing event is serialized, so processing an event starts after
457+
/// processing of the previous event is finished.
458+
///
459+
/// Event is passed as the first argument of the called program
460+
#[arg(long)]
461+
on_notify: Option<String>,
462+
453463
/// Attach the stdin to the task
454464
///
455465
/// Captures stdin and start the task with the given stdin.
@@ -669,6 +679,7 @@ pub async fn submit_computation(
669679
stream,
670680
crash_limit,
671681
},
682+
on_notify,
672683
} = opts;
673684

674685
let name = if let Some(name) = name {
@@ -740,7 +751,15 @@ pub async fn submit_computation(
740751
job_id,
741752
};
742753

743-
send_submit_request(gsettings, session, request, wait, progress).await
754+
send_submit_request(
755+
gsettings,
756+
session,
757+
request,
758+
wait,
759+
progress,
760+
on_notify.as_deref(),
761+
)
762+
.await
744763
}
745764

746765
pub(crate) async fn send_submit_request(
@@ -749,13 +768,17 @@ pub(crate) async fn send_submit_request(
749768
request: SubmitRequest,
750769
wait: bool,
751770
progress: bool,
771+
on_notify: Option<&str>,
752772
) -> anyhow::Result<()> {
753773
let job_id = request.job_id.unwrap_or_else(|| JobId::new(0));
754774
let stream_request = if progress || wait {
755775
let mut flags = EventFilterFlags::JOB_EVENTS;
756776
if progress {
757777
flags.insert(EventFilterFlags::TASK_EVENTS);
758778
}
779+
if on_notify.is_some() {
780+
flags.insert(EventFilterFlags::NOTIFY_EVENTS);
781+
}
759782
Some(StreamEvents {
760783
past_events: false,
761784
live_events: true,
@@ -776,7 +799,7 @@ pub(crate) async fn send_submit_request(
776799

777800
gsettings.printer().print_job_submitted(job);
778801
if wait {
779-
wait_for_jobs(session, &[info], true).await?;
802+
wait_for_jobs(session, &[info], true, on_notify).await?;
780803
} else if progress {
781804
wait_for_jobs_with_progress(session, &[info]).await?;
782805
}

crates/hyperqueue/src/client/commands/submit/jobfile.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,5 +226,5 @@ pub async fn submit_computation_from_job_file(
226226
})?)?
227227
};
228228
let request = build_job_submit(jdef, opts.job)?;
229-
send_submit_request(gsettings, session, request, false, false).await
229+
send_submit_request(gsettings, session, request, false, false, None).await
230230
}

crates/hyperqueue/src/client/commands/wait.rs

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
use std::collections::BTreeSet;
2+
use std::ffi::OsStr;
23
use std::io::Write;
4+
use std::os::unix::ffi::OsStrExt;
5+
use std::process::Command;
36
use std::time::{Duration, SystemTime};
47
use tokio::time::sleep;
58

@@ -13,41 +16,88 @@ use crate::client::status::{Status, is_terminated};
1316
use crate::common::arraydef::IntArray;
1417
use crate::common::utils::str::pluralize;
1518
use crate::rpc_call;
16-
use crate::server::event::payload::EventPayload;
19+
use crate::server::event::payload::{EventPayload, TaskNotification};
1720
use crate::server::job::JobTaskCounters;
1821
use crate::transfer::connection::ClientSession;
1922
use crate::transfer::messages::{
2023
FromClientMessage, IdSelector, JobDetailRequest, JobInfo, JobInfoRequest, TaskIdSelector,
2124
TaskSelector, TaskStatusSelector, ToClientMessage, WaitForJobsRequest,
2225
};
2326
use colored::Colorize;
27+
use itertools::Itertools;
2428
use tako::{JobId, JobTaskCount, Set, TaskId};
2529

30+
fn process_on_notify(program: &str, args: &[String], notification: &TaskNotification) {
31+
log::info!(
32+
"Running on_notify callback: {} {:?}: {:?}",
33+
program,
34+
args,
35+
notification
36+
);
37+
let mut child = match Command::new(program)
38+
.args(args)
39+
.arg(OsStr::from_bytes(&notification.message))
40+
.env("HQ_JOB_ID", notification.task_id.job_id().to_string())
41+
.env("HQ_TASK_ID", notification.task_id.job_task_id().to_string())
42+
.env("HQ_WORKER_ID", notification.worker_id.to_string())
43+
.spawn()
44+
{
45+
Ok(child) => child,
46+
Err(e) => {
47+
log::warn!("Failed to run on_notify callback: {}", e);
48+
return;
49+
}
50+
};
51+
match child.wait() {
52+
Ok(s) => {
53+
if !s.success() {
54+
log::warn!("on_notify callback finished with exit code: {}", s);
55+
}
56+
}
57+
Err(e) => {
58+
log::warn!("Failed to run on_notify callback: {}", e);
59+
return;
60+
}
61+
}
62+
}
63+
2664
pub async fn wait_for_jobs(
2765
session: &mut ClientSession,
2866
jobs: &[JobInfo],
2967
wait_for_close: bool,
68+
on_notify: Option<&str>,
3069
) -> anyhow::Result<()> {
3170
let mut unfinished_jobs = Set::new();
3271
for job in jobs {
3372
if !is_terminated(job) || (wait_for_close && job.is_open) {
3473
unfinished_jobs.insert(job.id);
3574
}
3675
}
76+
let on_notify_program_and_args =
77+
on_notify.map(|s| shlex::split(&s).unwrap_or_else(|| vec![s.to_string()]));
3778
while !unfinished_jobs.is_empty() {
3879
if let Some(msg) = session.connection().receive().await {
3980
let msg = msg?;
4081
let job_id = match &msg {
41-
ToClientMessage::Event(event) => match event.payload {
82+
ToClientMessage::Event(event) => match &event.payload {
4283
EventPayload::JobCompleted(job_id) => job_id,
4384
EventPayload::JobIdle(job_id) if !wait_for_close => job_id,
85+
EventPayload::TaskNotify(notification) => {
86+
let program_and_args = on_notify_program_and_args.as_ref().unwrap();
87+
process_on_notify(
88+
&program_and_args[0],
89+
&program_and_args[1..],
90+
notification,
91+
);
92+
continue;
93+
}
4494
_ => continue,
4595
},
4696
_ => {
4797
return Err(anyhow::anyhow!("Unexpected message from server"));
4898
}
4999
};
50-
unfinished_jobs.remove(&job_id);
100+
unfinished_jobs.remove(job_id);
51101
} else {
52102
return Ok(());
53103
}

crates/tako/src/internal/server/rpc.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,9 @@ pub(crate) async fn worker_receive_loop<
343343
}
344344
}
345345
FromWorkerMessage::Notify(notify) => {
346-
todo!()
346+
log::debug!("Task notify received");
347+
comm.client()
348+
.on_task_notify(notify.task_id, worker_id, notify.message);
347349
}
348350
}
349351
}

tests/test_job.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,3 +1343,30 @@ def test_new_submit_with_while_unfinished_deps2(hq_env: HqEnv, tmp_path):
13431343
hq_env.command(["job", "submit-file", "job.toml"])
13441344
hq_env.start_worker(cpus=2, args=["--resource", "x=sum(2)"])
13451345
wait_for_job_state(hq_env, [1, 2], "FINISHED")
1346+
1347+
1348+
def test_on_notify(hq_env: HqEnv):
1349+
hq_env.start_server()
1350+
hq_env.start_workers(2, cpus=2)
1351+
hq_env.command(
1352+
[
1353+
"submit",
1354+
"--array=1-3",
1355+
"--wait",
1356+
"--on-notify=bash -c 'echo $HQ_JOB_ID $HQ_TASK_ID $0 >> log'",
1357+
"--",
1358+
"bash",
1359+
"-c",
1360+
"$HQ task notify msg1 && sleep 2 && $HQ task notify msg2",
1361+
]
1362+
)
1363+
with open("log") as f:
1364+
lines = [s.rstrip().split() for s in f.readlines()]
1365+
assert len(lines) == 6
1366+
assert lines[0][2] == "msg1"
1367+
assert lines[1][2] == "msg1"
1368+
assert lines[2][2] == "msg1"
1369+
assert lines[3][2] == "msg2"
1370+
assert lines[4][2] == "msg2"
1371+
assert lines[5][2] == "msg2"
1372+
assert set(x[1] for x in lines) == {"1", "2", "3"}

0 commit comments

Comments
 (0)