From 657955b4c52277c7aea6695c76c54843c00fa78c Mon Sep 17 00:00:00 2001
From: DJ Majumdar <code@deepjoy.com>
Date: Thu, 19 Mar 2026 00:28:38 -0700
Subject: [PATCH] perf: reduce SQL round-trips and CPU overhead in scheduler
 hot paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Use result.last_insert_rowid() in insert_history instead of separate
  SELECT query, saving 1 round-trip per task completion
- Replace generic chrono datetime parser with fast fixed-position byte
  parser for the known SQLite format
- Add has_hierarchy flag to skip active_children_count query when no
  parent-child tasks exist
- Batch dependency resolution into 2 queries (DELETE RETURNING + single
  UPDATE RETURNING) instead of 2+2N
- Add fast dispatch path using pop_next() (1 SQL) instead of
  peek_next + gate + claim_task (2 SQL) when no groups, pressure
  sources, or module caps are configured

bench_dispatch_no_groups_500: ~166ms → ~128ms (-23%)
---
 src/scheduler/builder.rs           | 17 ++++++
 src/scheduler/mod.rs               |  7 +++
 src/scheduler/run_loop.rs          | 20 ++++++-
 src/scheduler/spawn/completion.rs  |  8 ++-
 src/store/dependencies.rs          | 66 ++++++++++------------
 src/store/lifecycle/mod.rs         |  6 +-
 src/store/lifecycle/transitions.rs |  1 +
 src/store/mod.rs                   |  9 ++-
 src/store/row_mapping.rs           | 90 ++++++++++++++++++++++++++++--
 src/store/submit/mod.rs            | 26 ++++++++-
 10 files changed, 198 insertions(+), 52 deletions(-)

diff --git a/src/scheduler/builder.rs b/src/scheduler/builder.rs
index ff013bd..daf75b2 100644
--- a/src/scheduler/builder.rs
+++ b/src/scheduler/builder.rs
@@ -402,6 +402,7 @@ impl SchedulerBuilder {
         };
 
         // Build gate from pressure sources + policy.
+        let has_pressure = !self.pressure_sources.is_empty();
         let mut pressure = CompositePressure::new();
         for source in self.pressure_sources {
             pressure.add_source(source);
@@ -435,6 +436,12 @@ impl SchedulerBuilder {
             module_state,
         );
 
+        // Compute fast-dispatch eligibility before consuming builder fields.
+        let has_groups =
+            self.default_group_concurrency > 0 || !self.group_concurrency_overrides.is_empty();
+        let has_monitoring = self.enable_resource_monitoring;
+        let has_module_caps = !scheduler.inner.module_caps.read().unwrap().is_empty();
+
         // Apply group concurrency limits.
         if self.default_group_concurrency > 0 {
             scheduler
@@ -474,6 +481,16 @@ impl SchedulerBuilder {
             ));
         }
 
+        // Enable fast dispatch (single pop_next instead of peek + gate + claim)
+        // when no groups, no resource monitoring, no pressure sources, and no
+        // module caps are configured.
+        if !has_groups && !has_monitoring && !has_pressure && !has_module_caps {
+            scheduler
+                .inner
+                .fast_dispatch
+                .store(true, std::sync::atomic::Ordering::Relaxed);
+        }
+
         Ok(scheduler)
     }
 }
diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs
index eb77028..e3b31af 100644
--- a/src/scheduler/mod.rs
+++ b/src/scheduler/mod.rs
@@ -133,6 +133,11 @@ pub(crate) struct SchedulerInner {
     /// Cleared when `paused_tasks()` returns empty. Avoids a SQL round-trip
     /// per dispatch cycle when no tasks are paused.
     pub(crate) has_paused_tasks: AtomicBool,
+    /// Fast-dispatch mode: when `true`, `try_dispatch` uses `pop_next()`
+    /// (single SQL) instead of `peek_next()` + gate + `claim_task()` (2 SQL).
+    /// Computed at build time: `true` when no groups, no resource monitoring,
+    /// and no module concurrency caps are configured.
+    pub(crate) fast_dispatch: AtomicBool,
     /// Send side of the completion coalescing channel.
     pub(crate) completion_tx: tokio::sync::mpsc::UnboundedSender<CompletionMsg>,
     /// Receive side, `Arc`-wrapped so spawned tasks can try to drain the batch
@@ -268,6 +273,8 @@ impl Scheduler {
                 module_running,
                 // Conservative: true on startup so the first cycle checks.
                 has_paused_tasks: AtomicBool::new(true),
+                // Default to false; builder sets true when safe.
+                fast_dispatch: AtomicBool::new(false),
                 completion_tx,
                 completion_rx: std::sync::Arc::new(Mutex::new(completion_rx)),
             }),
diff --git a/src/scheduler/run_loop.rs b/src/scheduler/run_loop.rs
index 48ec3b2..902bd20 100644
--- a/src/scheduler/run_loop.rs
+++ b/src/scheduler/run_loop.rs
@@ -47,7 +47,17 @@ impl Scheduler {
             return Ok(false);
         }
 
-        // Peek at the next candidate without changing its status.
+        // Fast path: no gate checks needed, use pop_next() (single SQL)
+        // instead of peek_next() + gate.admit() + claim_task() (2 SQL).
+        // pop_next() skips expired tasks via its WHERE clause.
+        if self.inner.fast_dispatch.load(AtomicOrdering::Relaxed) {
+            let Some(task) = self.inner.store.pop_next().await? else {
+                return Ok(false);
+            };
+            return self.spawn_dispatched_task(task).await;
+        }
+
+        // Slow path: peek → gate check → claim.
         let Some(candidate) = self.inner.store.peek_next().await? else {
             return Ok(false);
         };
@@ -102,6 +112,14 @@ impl Scheduler {
             }
         }
 
+        self.spawn_dispatched_task(task).await
+    }
+
+    /// Look up executor and spawn a task that is already in the `running` state.
+    async fn spawn_dispatched_task(
+        &self,
+        task: crate::task::TaskRecord,
+    ) -> Result<bool, StoreError> {
         // Look up executor.
         let Some(executor) = self.inner.registry.get(&task.task_type) else {
             tracing::error!(
diff --git a/src/scheduler/spawn/completion.rs b/src/scheduler/spawn/completion.rs
index eea7665..54c0e33 100644
--- a/src/scheduler/spawn/completion.rs
+++ b/src/scheduler/spawn/completion.rs
@@ -37,7 +37,13 @@ pub(crate) async fn handle_success(
 
     // For the execute phase, check if the task spawned children.
     // If so, transition to waiting instead of completing.
-    if phase == ExecutionPhase::Execute {
+    // Skip the DB query entirely when no tasks have been submitted with parent_id.
+    if phase == ExecutionPhase::Execute
+        && deps
+            .store
+            .has_hierarchy
+            .load(std::sync::atomic::Ordering::Relaxed)
+    {
         match deps.store.active_children_count(task_id).await {
             Ok(count) if count > 0 => {
                 if let Err(e) = deps.store.set_waiting(task_id).await {
diff --git a/src/store/dependencies.rs b/src/store/dependencies.rs
index 35c189f..316cc69 100644
--- a/src/store/dependencies.rs
+++ b/src/store/dependencies.rs
@@ -20,48 +20,45 @@ impl TaskStore {
     }
 
     /// Inner dependency resolution that runs within an existing transaction.
+    ///
+    /// Uses `DELETE ... RETURNING` to combine edge lookup + deletion into a
+    /// single query, then a single batched `UPDATE ... RETURNING` to unblock
+    /// all dependents whose remaining deps are now zero.
     pub(crate) async fn resolve_dependents_inner(
         conn: &mut sqlx::pool::PoolConnection<sqlx::Sqlite>,
         completed_task_id: i64,
     ) -> Result<Vec<i64>, StoreError> {
-        // Find tasks that depend on the completed task.
+        // Step 1: Delete satisfied edges and collect affected task IDs.
         let dependent_ids: Vec<(i64,)> =
-            sqlx::query_as("SELECT task_id FROM task_deps WHERE depends_on_id = ?")
+            sqlx::query_as("DELETE FROM task_deps WHERE depends_on_id = ? RETURNING task_id")
                 .bind(completed_task_id)
                 .fetch_all(&mut **conn)
                 .await?;
 
-        // Remove the satisfied edges.
-        sqlx::query("DELETE FROM task_deps WHERE depends_on_id = ?")
-            .bind(completed_task_id)
-            .execute(&mut **conn)
-            .await?;
-
-        let mut unblocked = Vec::new();
-
-        for (dep_id,) in dependent_ids {
-            // Check if this dependent has any remaining unresolved deps.
-            let (remaining,): (i64,) =
-                sqlx::query_as("SELECT COUNT(*) FROM task_deps WHERE task_id = ?")
-                    .bind(dep_id)
-                    .fetch_one(&mut **conn)
-                    .await?;
+        if dependent_ids.is_empty() {
+            return Ok(Vec::new());
+        }
 
-            if remaining == 0 {
-                // All deps satisfied — unblock.
-                let result = sqlx::query(
-                    "UPDATE tasks SET status = 'pending' WHERE id = ? AND status = 'blocked'",
-                )
-                .bind(dep_id)
-                .execute(&mut **conn)
-                .await?;
-                if result.rows_affected() > 0 {
-                    unblocked.push(dep_id);
-                }
-            }
+        // Step 2: Unblock tasks with zero remaining deps in one UPDATE.
+        let placeholders = dependent_ids
+            .iter()
+            .map(|_| "?")
+            .collect::<Vec<_>>()
+            .join(",");
+        let sql = format!(
+            "UPDATE tasks SET status = 'pending'
+             WHERE status = 'blocked'
+               AND id IN ({placeholders})
+               AND NOT EXISTS (SELECT 1 FROM task_deps WHERE task_deps.task_id = tasks.id)
+             RETURNING id"
+        );
+        let mut q = sqlx::query_as::<_, (i64,)>(&sql);
+        for (dep_id,) in &dependent_ids {
+            q = q.bind(dep_id);
         }
+        let unblocked: Vec<(i64,)> = q.fetch_all(&mut **conn).await?;
 
-        Ok(unblocked)
+        Ok(unblocked.into_iter().map(|(id,)| id).collect())
     }
 
     /// After a task permanently fails, propagate failure to blocked dependents.
@@ -93,18 +90,13 @@ impl TaskStore {
         Box<dyn std::future::Future<Output = Result<(Vec<i64>, Vec<i64>), StoreError>> + Send + 'a>,
     > {
         Box::pin(async move {
+            // Delete edges from the failed task and collect affected task IDs.
             let dependent_rows: Vec<(i64,)> =
-                sqlx::query_as("SELECT task_id FROM task_deps WHERE depends_on_id = ?")
+                sqlx::query_as("DELETE FROM task_deps WHERE depends_on_id = ? RETURNING task_id")
                     .bind(failed_task_id)
                     .fetch_all(&mut **conn)
                     .await?;
 
-            // Clean up edges from the failed task.
-            sqlx::query("DELETE FROM task_deps WHERE depends_on_id = ?")
-                .bind(failed_task_id)
-                .execute(&mut **conn)
-                .await?;
-
             let mut all_failed = Vec::new();
             let mut all_unblocked = Vec::new();
 
diff --git a/src/store/lifecycle/mod.rs b/src/store/lifecycle/mod.rs
index a9294d9..8d3f9e2 100644
--- a/src/store/lifecycle/mod.rs
+++ b/src/store/lifecycle/mod.rs
@@ -62,7 +62,7 @@ pub(crate) async fn insert_history(
     } else {
         task.retry_count
     };
-    sqlx::query(
+    let result = sqlx::query(
         "INSERT INTO task_history (task_type, key, label, priority, status, payload,
             expected_read_bytes, expected_write_bytes, expected_net_rx_bytes, expected_net_tx_bytes,
             actual_read_bytes, actual_write_bytes, actual_net_rx_bytes, actual_net_tx_bytes,
@@ -110,9 +110,7 @@ pub(crate) async fn insert_history(
     .await?;
 
     // Copy tags from task_tags to task_history_tags.
-    let history_rowid = sqlx::query_scalar::<_, i64>("SELECT last_insert_rowid()")
-        .fetch_one(&mut **conn)
-        .await?;
+    let history_rowid = result.last_insert_rowid();
     sqlx::query(
         "INSERT INTO task_history_tags (history_rowid, key, value)
          SELECT ?, key, value FROM task_tags WHERE task_id = ?",
diff --git a/src/store/lifecycle/transitions.rs b/src/store/lifecycle/transitions.rs
index d25f0d0..01c455f 100644
--- a/src/store/lifecycle/transitions.rs
+++ b/src/store/lifecycle/transitions.rs
@@ -122,6 +122,7 @@ impl TaskStore {
                  SELECT id FROM tasks
                  WHERE status = 'pending'
                    AND (run_after IS NULL OR run_after <= strftime('%Y-%m-%d %H:%M:%f', 'now'))
+                   AND (expires_at IS NULL OR expires_at > strftime('%Y-%m-%d %H:%M:%f', 'now'))
                  ORDER BY priority ASC, id ASC
                  LIMIT 1
              )
diff --git a/src/store/mod.rs b/src/store/mod.rs
index f6f5bc6..75d250a 100644
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -168,6 +168,9 @@ pub struct TaskStore {
     /// Fast-path flag: `false` means no tags have been inserted into
     /// `task_tags`, so `populate_tags` can skip the query entirely.
     pub(crate) has_tags: std::sync::Arc<AtomicBool>,
+    /// Fast-path flag: `false` means no tasks with `parent_id` have been
+    /// submitted, so `active_children_count` checks can be skipped.
+    pub(crate) has_hierarchy: std::sync::Arc<AtomicBool>,
 }
 
 impl TaskStore {
@@ -195,8 +198,9 @@ impl TaskStore {
             retention_policy: config.retention_policy,
             prune_interval: config.prune_interval,
             completion_count: std::sync::Arc::new(AtomicU64::new(0)),
-            // Conservative for file-backed stores that may have existing tags.
+            // Conservative for file-backed stores that may have existing tags/hierarchy.
             has_tags: std::sync::Arc::new(AtomicBool::new(true)),
+            has_hierarchy: std::sync::Arc::new(AtomicBool::new(true)),
         };
         store.migrate().await?;
         store.recover_running().await?;
@@ -221,8 +225,9 @@ impl TaskStore {
             retention_policy: Some(RetentionPolicy::MaxCount(10_000)),
             prune_interval: 100,
             completion_count: std::sync::Arc::new(AtomicU64::new(0)),
-            // In-memory stores start empty — no tags to query.
+            // In-memory stores start empty — no tags or hierarchy to query.
             has_tags: std::sync::Arc::new(AtomicBool::new(false)),
+            has_hierarchy: std::sync::Arc::new(AtomicBool::new(false)),
         };
         store.migrate().await?;
         Ok(store)
diff --git a/src/store/row_mapping.rs b/src/store/row_mapping.rs
index e6d0db1..e12bd7d 100644
--- a/src/store/row_mapping.rs
+++ b/src/store/row_mapping.rs
@@ -10,15 +10,59 @@ use crate::task::{
 };
 
 pub(crate) fn parse_datetime(s: &str) -> DateTime<Utc> {
-    // SQLite stores as "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD HH:MM:SS.mmm"
-    // (the latter from backoff-computed run_after). Try with fractional seconds
-    // first, then fall back to whole-second precision.
-    chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S%.f")
-        .or_else(|_| chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S"))
+    // SQLite stores as "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD HH:MM:SS.mmm".
+    // Fast fixed-position byte parser instead of the generic chrono parser.
+    let b = s.as_bytes();
+    if b.len() < 19 {
+        return DateTime::<Utc>::default();
+    }
+
+    let year = parse_4(b, 0);
+    let month = parse_2(b, 5);
+    let day = parse_2(b, 8);
+    let hour = parse_2(b, 11);
+    let min = parse_2(b, 14);
+    let sec = parse_2(b, 17);
+
+    let nanos = if b.len() > 20 && b[19] == b'.' {
+        parse_frac_nanos(b, 20)
+    } else {
+        0
+    };
+
+    chrono::NaiveDate::from_ymd_opt(year, month, day)
+        .and_then(|d| d.and_hms_nano_opt(hour, min, sec, nanos))
         .map(|ndt| ndt.and_utc())
         .unwrap_or_default()
 }
 
+#[inline(always)]
+fn parse_2(b: &[u8], off: usize) -> u32 {
+    (b[off] - b'0') as u32 * 10 + (b[off + 1] - b'0') as u32
+}
+
+#[inline(always)]
+fn parse_4(b: &[u8], off: usize) -> i32 {
+    (b[off] - b'0') as i32 * 1000
+        + (b[off + 1] - b'0') as i32 * 100
+        + (b[off + 2] - b'0') as i32 * 10
+        + (b[off + 3] - b'0') as i32
+}
+
+#[inline(always)]
+fn parse_frac_nanos(b: &[u8], start: usize) -> u32 {
+    let frac_len = (b.len() - start).min(9);
+    let mut val: u32 = 0;
+    for i in 0..frac_len {
+        val = val * 10 + (b[start + i] - b'0') as u32;
+    }
+    // Pad to 9 digits (nanoseconds).
+    for _ in frac_len..9 {
+        val *= 10;
+    }
+    val
+}
+
 pub(crate) fn row_to_task_record(row: &sqlx::sqlite::SqliteRow) -> TaskRecord {
     let priority_val: i32 = row.get("priority");
     let status_str: String = row.get("status");
@@ -136,3 +180,39 @@ pub(crate) fn row_to_history_record(row: &sqlx::sqlite::SqliteRow) -> TaskHistor
         max_retries: row.get("max_retries"),
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_whole_seconds() {
+        let dt = parse_datetime("2024-01-15 09:30:45");
+        assert_eq!(dt.to_string(), "2024-01-15 09:30:45 UTC");
+    }
+
+    #[test]
+    fn parse_fractional_millis() {
+        let dt = parse_datetime("2024-01-15 09:30:45.123");
+        assert_eq!(dt.to_string(), "2024-01-15 09:30:45.123 UTC");
+        assert_eq!(dt.timestamp_subsec_millis(), 123);
+    }
+
+    #[test]
+    fn parse_fractional_micros() {
+        let dt = parse_datetime("2024-01-15 09:30:45.123456");
+        assert_eq!(dt.to_string(), "2024-01-15 09:30:45.123456 UTC");
+    }
+
+    #[test]
+    fn parse_short_string_returns_default() {
+        let dt = parse_datetime("bad");
+        assert_eq!(dt, DateTime::<Utc>::default());
+    }
+
+    #[test]
+    fn parse_empty_returns_default() {
+        let dt = parse_datetime("");
+        assert_eq!(dt, DateTime::<Utc>::default());
+    }
+}
diff --git a/src/store/submit/mod.rs b/src/store/submit/mod.rs
index f73c06e..fd6c9dc 100644
--- a/src/store/submit/mod.rs
+++ b/src/store/submit/mod.rs
@@ -55,6 +55,7 @@ pub(crate) async fn submit_one(
     conn: &mut sqlx::pool::PoolConnection<sqlx::Sqlite>,
     sub: &TaskSubmission,
     has_tags_flag: Option<&std::sync::atomic::AtomicBool>,
+    has_hierarchy_flag: Option<&std::sync::atomic::AtomicBool>,
 ) -> Result<SubmitOutcome, StoreError> {
     if let Some(ref err) = sub.payload_error {
         return Err(StoreError::Serialization(err.clone()));
@@ -127,6 +128,13 @@ pub(crate) async fn submit_one(
     if result.rows_affected() > 0 {
         let task_id = result.last_insert_rowid();
 
+        // Mark hierarchy flag if this task has a parent.
+        if sub.parent_id.is_some() {
+            if let Some(flag) = has_hierarchy_flag {
+                flag.store(true, std::sync::atomic::Ordering::Relaxed);
+            }
+        }
+
         // Insert tags.
         if let Some(flag) = has_tags_flag {
             super::insert_tags_flagged(conn, task_id, &sub.tags, flag).await?;
@@ -188,7 +196,13 @@ impl TaskStore {
 
         let mut conn = self.begin_write().await?;
         tracing::debug!(task_type = %sub.task_type, "store.submit: INSERT start");
-        let outcome = submit_one(&mut conn, sub, Some(&self.has_tags)).await?;
+        let outcome = submit_one(
+            &mut conn,
+            sub,
+            Some(&self.has_tags),
+            Some(&self.has_hierarchy),
+        )
+        .await?;
         tracing::debug!(task_type = %sub.task_type, "store.submit: INSERT end");
         sqlx::query("COMMIT").execute(&mut *conn).await?;
         Ok(outcome)
@@ -242,7 +256,15 @@ impl TaskStore {
                 if last_occurrence[&sub.effective_key()] != global_i {
                     results.push(SubmitOutcome::Duplicate);
                 } else {
-                    results.push(submit_one(&mut conn, sub, Some(&self.has_tags)).await?);
+                    results.push(
+                        submit_one(
+                            &mut conn,
+                            sub,
+                            Some(&self.has_tags),
+                            Some(&self.has_hierarchy),
+                        )
+                        .await?,
+                    );
                 }
             }