Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit fdcceda

Browse files
committed
Gap scheduling
1 parent b1ad1db commit fdcceda

8 files changed

Lines changed: 429 additions & 74 deletions

File tree

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
use crate::internal::common::resources::ResourceId;
2+
use crate::internal::server::workerload::WorkerResources;
3+
use crate::resources::{ResourceRequest, ResourceRequestVariants, ResourceRqId, ResourceRqMap};
4+
use crate::{Map, ResourceVariantId};
5+
use hashbrown::Equivalent;
6+
use std::cell::RefCell;
7+
8+
#[derive(Default)]
9+
pub(crate) struct SchedulerCache {
10+
inner: RefCell<SchedulerCacheInner>,
11+
}
12+
13+
#[derive(Hash, PartialEq, Eq)]
14+
struct GapKey {
15+
high_priority_rq: ResourceRqId,
16+
low_priority_rq: ResourceRqId,
17+
low_priority_variant: ResourceVariantId,
18+
resources: WorkerResources,
19+
}
20+
21+
#[derive(Hash, PartialEq, Eq)]
22+
struct GapKeyRef<'a> {
23+
high_priority_rq: ResourceRqId,
24+
low_priority_rq: ResourceRqId,
25+
low_priority_variant: ResourceVariantId,
26+
resources: &'a WorkerResources,
27+
}
28+
29+
impl<'a> Equivalent<GapKey> for GapKeyRef<'a> {
30+
fn equivalent(&self, key: &GapKey) -> bool {
31+
self.high_priority_rq == key.high_priority_rq
32+
&& self.low_priority_rq == key.low_priority_rq
33+
&& self.resources == &key.resources
34+
}
35+
}
36+
37+
#[derive(Default)]
38+
struct SchedulerCacheInner {
39+
resource_gaps: Map<GapKey, u32>,
40+
}
41+
42+
impl SchedulerCache {
43+
pub fn get_gap(
44+
&self,
45+
high_priority_rq: ResourceRqId,
46+
low_priority_rq: ResourceRqId,
47+
low_priority_variant: ResourceVariantId,
48+
resources: &WorkerResources,
49+
resource_rq_map: &ResourceRqMap,
50+
) -> u32 {
51+
let key = GapKeyRef {
52+
high_priority_rq,
53+
low_priority_rq,
54+
low_priority_variant,
55+
resources,
56+
};
57+
let mut inner = self.inner.borrow_mut();
58+
59+
if let Some(gap) = inner.resource_gaps.get(&key).copied() {
60+
gap
61+
} else {
62+
let gap = compute_gap(
63+
resource_rq_map.get(high_priority_rq),
64+
resource_rq_map
65+
.get(low_priority_rq)
66+
.get(low_priority_variant),
67+
resources,
68+
);
69+
inner.resource_gaps.insert(
70+
GapKey {
71+
high_priority_rq,
72+
low_priority_rq,
73+
low_priority_variant,
74+
resources: resources.clone(),
75+
},
76+
gap,
77+
);
78+
gap
79+
}
80+
}
81+
}
82+
83+
fn compute_gap(
84+
high_priority_rqv: &ResourceRequestVariants,
85+
low_priority_rq: &ResourceRequest,
86+
resources: &WorkerResources,
87+
) -> u32 {
88+
assert!(!high_priority_rqv.is_multi_node());
89+
assert!(!low_priority_rq.is_multi_node());
90+
if high_priority_rqv.is_trivial() {
91+
let high_priority_rq = high_priority_rqv.get(0.into());
92+
let count = resources.task_max_count_for_request(high_priority_rq);
93+
let mut resources = resources.clone();
94+
resources.remove_multiple(high_priority_rq, count);
95+
resources.task_max_count_for_request(low_priority_rq)
96+
} else {
97+
todo!()
98+
}
99+
}
100+
101+
#[cfg(test)]
102+
mod tests {
103+
use super::*;
104+
use crate::internal::server::workerload::WorkerResources;
105+
use crate::resources::ResourceRequestVariants;
106+
use crate::tests::utils::env::TestEnv;
107+
use crate::tests::utils::resources::ResBuilder;
108+
use crate::tests::utils::task::TaskBuilder;
109+
use crate::tests::utils::worker::WorkerBuilder;
110+
111+
#[test]
112+
fn test_compute_gap() {
113+
let mut rt = TestEnv::new();
114+
rt.new_named_resource("foo");
115+
rt.new_named_resource("bar");
116+
let w = rt.new_worker(&WorkerBuilder::new(4));
117+
let rqv1 = ResBuilder::default().cpus(2).finish_v();
118+
let rq2 = ResBuilder::default().cpus(1).finish();
119+
assert_eq!(compute_gap(&rqv1, &rq2, &rt.worker(w).resources), 0);
120+
let rqv1 = ResBuilder::default().cpus(3).finish_v();
121+
let rq2 = ResBuilder::default().cpus(1).finish();
122+
assert_eq!(compute_gap(&rqv1, &rq2, &rt.worker(w).resources), 1);
123+
let rqv1 = ResBuilder::default().cpus(3).finish_v();
124+
let rq2 = ResBuilder::default().cpus(2).finish();
125+
assert_eq!(compute_gap(&rqv1, &rq2, &rt.worker(w).resources), 0);
126+
127+
let w = rt.new_worker(&WorkerBuilder::new(12).res_sum("foo", 2).res_sum("bar", 1));
128+
let rqv1 = ResBuilder::default().cpus(4).finish_v();
129+
let rq2 = ResBuilder::default().cpus(2).finish();
130+
assert_eq!(compute_gap(&rqv1, &rq2, &rt.worker(w).resources), 0);
131+
let rqv1 = ResBuilder::default().cpus(5).finish_v();
132+
let rq2 = ResBuilder::default().cpus(1).finish();
133+
assert_eq!(compute_gap(&rqv1, &rq2, &rt.worker(w).resources), 2);
134+
let rqv1 = ResBuilder::default().cpus(5).add_compact(1, 2).finish_v();
135+
let rq2 = ResBuilder::default().cpus(1).finish();
136+
assert_eq!(compute_gap(&rqv1, &rq2, &rt.worker(w).resources), 7);
137+
let rqv1 = ResBuilder::default().cpus(5).add_compact(1, 2).finish_v();
138+
let rq2 = ResBuilder::default().cpus(1).add_compact(1, 1).finish();
139+
assert_eq!(compute_gap(&rqv1, &rq2, &rt.worker(w).resources), 0);
140+
let rqv1 = ResBuilder::default().cpus(5).add_compact(1, 2).finish_v();
141+
let rq2 = ResBuilder::default().cpus(1).add_compact(2, 1).finish();
142+
assert_eq!(compute_gap(&rqv1, &rq2, &rt.worker(w).resources), 1);
143+
}
144+
}

crates/tako/src/internal/scheduler/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
mod batches;
2+
mod cache;
23
mod main;
34
mod mapping;
45
pub(crate) mod query;
56
mod solver;
67
mod taskqueue;
78

89
pub(crate) use batches::{PriorityCut, TaskBatch, create_task_batches};
10+
pub(crate) use cache::SchedulerCache;
911
pub(crate) use main::{run_scheduling, run_scheduling_inner, scheduler_loop};
1012
pub(crate) use mapping::{WorkerTaskMapping, create_task_mapping};
1113
pub(crate) use solver::run_scheduling_solver;

crates/tako/src/internal/scheduler/solver.rs

Lines changed: 62 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ pub(crate) fn run_scheduling_solver(
3131
task_queues,
3232
request_map,
3333
worker_groups,
34+
scheduler_cache,
3435
..
3536
} = core.split();
3637
if request_map.is_empty() {
@@ -71,12 +72,6 @@ pub(crate) fn run_scheduling_solver(
7172
let mut placements: Map<(WorkerId, ResourceRqId, ResourceVariantId), (_, u32)> = Map::new();
7273
let mut tasks_count_vars: Map<ResourceRqId, Vec<_>> = Map::new();
7374

74-
// let mut placement_vars: Vec<Option<_>> = vec![None; n_workers * n_batches * n_variants];
75-
//
76-
// let placement_idx = |worker_idx: usize, batch_idx: usize, variant: usize| {
77-
// worker_idx * n_batches * n_variants + batch_idx * n_variants + variant
78-
// };
79-
8075
let mut worker_res_constraint = vec![Vec::new(); n_resources];
8176

8277
let mut var_idx = 0u32;
@@ -180,7 +175,24 @@ pub(crate) fn run_scheduling_solver(
180175
// blocking_variable_vars[(rq_id, s)] is True only if there is at least
181176
// `s` tasks of `rq_id` scheduled
182177
let mut blocked_priority_vars: Map<(ResourceRqId, u32), _> = Map::new();
178+
179+
let mut get_bvar = |solver: &mut LpSolver, blocker_rq_id: ResourceRqId, size: u32| {
180+
*blocked_priority_vars
181+
.entry((blocker_rq_id, size))
182+
.or_insert_with(|| {
183+
// Create a new blocking variable
184+
solver.set_name(|| format!("B{}~{}", blocker_rq_id, size));
185+
let new_v = solver.add_bool_variable(0.0);
186+
let vars = tasks_count_vars.get(&blocker_rq_id).unwrap();
187+
solver.set_name(|| format!("blocker rq{blocker_rq_id} at size {size}"));
188+
let bound = size as f64;
189+
constraint_extra_var(solver, ConstraintType::Min, bound, &vars, new_v, bound);
190+
new_v
191+
})
192+
};
193+
183194
let mut zero_cond = Vec::new();
195+
let mut blocked_by_unbounded: Set<ResourceRqId> = Set::new();
184196

185197
for batch in task_batches.iter() {
186198
let Some(task_counts) = tasks_count_vars.get(&batch.resource_rq_id) else {
@@ -197,9 +209,9 @@ pub(crate) fn run_scheduling_solver(
197209
)
198210
}
199211
let batch_size = batch.size as f64;
200-
let mut blocked_by_unbounded: Set<ResourceRqId> = Set::new();
212+
blocked_by_unbounded.clear();
201213
for cut in &batch.cuts {
202-
for (blocker_rq_id, size) in &cut.blockers {
214+
for (blocker_rq_id, blocking_size) in &cut.blockers {
203215
zero_cond.clear();
204216
let blocker_rqv = request_map.get(*blocker_rq_id);
205217
if batch_rqv.is_multi_node() {
@@ -220,34 +232,53 @@ pub(crate) fn run_scheduling_solver(
220232
if let Some((var, _)) =
221233
placements.get(&(w.id, batch.resource_rq_id, v_id))
222234
{
223-
zero_cond.push(*var);
235+
let gap = scheduler_cache.get_gap(
236+
*blocker_rq_id,
237+
batch.resource_rq_id,
238+
v_id,
239+
&w.resources,
240+
request_map,
241+
);
242+
if gap > 0 {
243+
let cut_size = cut.size as f64;
244+
if let Some(s) = blocking_size {
245+
let blocking_v = get_bvar(&mut solver, *blocker_rq_id, *s);
246+
solver.set_name(|| {
247+
format!(
248+
"w{}: if #rq{blocker_rq_id} < {s} then limit #rq{} to {} + {} (gap) where both rqs may run",
249+
w.id, batch.resource_rq_id, cut.size, gap
250+
)
251+
});
252+
solver.add_constraint(
253+
ConstraintType::Max,
254+
cut_size + batch_size + gap as f64,
255+
[(*var, 1.0), (blocking_v, batch_size)].into_iter(),
256+
);
257+
} else {
258+
solver.set_name(|| {
259+
format!(
260+
"w{}: limit #rq{} to {} + {} (gap) where it can run with rq{blocker_rq_id}",
261+
w.id, batch.resource_rq_id, gap, cut.size,
262+
)
263+
});
264+
solver.add_constraint(
265+
ConstraintType::Max,
266+
cut_size + gap as f64,
267+
[(*var, 1.0)].into_iter(),
268+
);
269+
}
270+
} else {
271+
zero_cond.push(*var);
272+
}
224273
}
225274
}
226275
}
227276
}
228277
if zero_cond.is_empty() {
229278
continue;
230279
}
231-
if let Some(s) = size {
232-
let blocking_v = *blocked_priority_vars
233-
.entry((*blocker_rq_id, *s))
234-
.or_insert_with(|| {
235-
// Create a new blocking variable
236-
solver.set_name(|| format!("B{}~{}", blocker_rq_id, s));
237-
let new_v = solver.add_bool_variable(0.0);
238-
let vars = tasks_count_vars.get(blocker_rq_id).unwrap();
239-
let bound = *s as f64;
240-
solver.set_name(|| format!("blocker rq{blocker_rq_id} at size {s}"));
241-
constraint_extra_var(
242-
&mut solver,
243-
ConstraintType::Min,
244-
bound,
245-
&vars,
246-
new_v,
247-
bound,
248-
);
249-
new_v
250-
});
280+
if let Some(s) = blocking_size {
281+
let blocking_v = get_bvar(&mut solver, *blocker_rq_id, *s);
251282
solver.set_name(|| {
252283
format!(
253284
"if #rq{blocker_rq_id} < {s} then limit #rq{} to {} where both rqs may run",
@@ -258,12 +289,13 @@ pub(crate) fn run_scheduling_solver(
258289
constraint_extra_var(
259290
&mut solver,
260291
ConstraintType::Max,
261-
cut_size + batch_size,
292+
batch_size + cut_size,
262293
&zero_cond,
263294
blocking_v,
264295
batch_size,
265296
);
266297
} else if !blocked_by_unbounded.contains(blocker_rq_id) {
298+
blocked_by_unbounded.insert(*blocker_rq_id);
267299
solver.set_name(|| {
268300
format!(
269301
"limit #rq{} to {} where it can run with rq{blocker_rq_id}",

crates/tako/src/internal/server/core.rs

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::internal::common::resources::map::{
66
};
77
use crate::internal::common::resources::{ResourceId, ResourceRequestVariants, ResourceRqId};
88
use crate::internal::common::{Set, WrappedRcRefCell};
9-
use crate::internal::scheduler::{TaskQueue, TaskQueues};
9+
use crate::internal::scheduler::{SchedulerCache, TaskQueue, TaskQueues};
1010
use crate::internal::server::dataobj::{DataObjectHandle, ObjsToRemoveFromWorkers};
1111
use crate::internal::server::dataobjmap::DataObjectMap;
1212
use crate::internal::server::rpc::ConnectionDescriptor;
@@ -38,6 +38,7 @@ pub(crate) struct CoreSplit<'a> {
3838
pub task_queues: &'a TaskQueues,
3939
pub data_objects: &'a DataObjectMap,
4040
pub worker_groups: &'a Map<String, WorkerGroup>,
41+
pub scheduler_cache: &'a SchedulerCache,
4142
}
4243

4344
#[derive(Default)]
@@ -48,6 +49,7 @@ pub struct Core {
4849
task_queues: TaskQueues,
4950
data_objects: DataObjectMap,
5051
worker_groups: Map<String, WorkerGroup>,
52+
scheduler_cache: SchedulerCache,
5153

5254
maximal_task_id: TaskId,
5355
worker_id_counter: u32,
@@ -108,6 +110,7 @@ impl Core {
108110
task_queues: &self.task_queues,
109111
data_objects: &self.data_objects,
110112
worker_groups: &self.worker_groups,
113+
scheduler_cache: &self.scheduler_cache,
111114
}
112115
}
113116

@@ -148,22 +151,6 @@ impl Core {
148151
&mut self.worker_overview_listeners
149152
}
150153

151-
pub fn park_workers(&mut self) {
152-
todo!()
153-
/*for worker in self.workers.values_mut() {
154-
if worker.is_underloaded()
155-
&& worker
156-
.sn_tasks()
157-
.iter()
158-
.all(|&task_id| self.tasks.get_task(task_id).is_sn_running())
159-
{
160-
log::debug!("Parking worker {}", worker.id);
161-
worker.set_parked_flag(true);
162-
self.parked_resources.insert(worker.resources.clone());
163-
}
164-
}*/
165-
}
166-
167154
pub fn get_worker_listen_port(&self) -> u16 {
168155
self.worker_listen_port
169156
}
@@ -263,8 +250,6 @@ impl Core {
263250

264251
// TODO: move to TaskMap
265252
/// Removes a single task.
266-
/// It can still remain in [`ready_to_assign`], where it will remain until the scheduler picks
267-
/// it up.
268253
#[must_use]
269254
pub fn remove_task(
270255
&mut self,
@@ -564,16 +549,6 @@ mod tests {
564549
self.assert_task_condition(task_ids, |t| t.is_sn_running());
565550
}
566551

567-
pub fn assert_underloaded(&self, worker_ids: &[WorkerId]) {
568-
todo!()
569-
//self.assert_worker_condition(worker_ids, |w| w.is_underloaded());
570-
}
571-
572-
pub fn assert_not_underloaded(&self, worker_ids: &[WorkerId]) {
573-
todo!()
574-
//self.assert_worker_condition(worker_ids, |w| !w.is_underloaded());
575-
}
576-
577552
pub fn remove_from_ready_queue(&mut self, task_id: TaskId) {
578553
let task = self.get_task(task_id);
579554
let resource_rq_id = task.resource_rq_id;

0 commit comments

Comments
 (0)