huggingface
diff --git a/‎agent/__init__.py‎
Lines changed: 15 additions & 1 deletion b/‎agent/__init__.py‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎agent/context_manager/manager.py‎
Lines changed: 78 additions & 21 deletions b/‎agent/context_manager/manager.py‎
Lines changed: 78 additions & 21 deletions
diff --git a/‎backend/routes/agent.py‎
Lines changed: 44 additions & 0 deletions b/‎backend/routes/agent.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎backend/session_manager.py‎
Lines changed: 63 additions & 0 deletions b/‎backend/session_manager.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎frontend/src/components/Chat/ExpiredBanner.tsx‎
Lines changed: 113 additions & 0 deletions b/‎frontend/src/components/Chat/ExpiredBanner.tsx‎
Lines changed: 113 additions & 0 deletions
@@ -2,6 +2,20 @@
 HF Agent - Main agent module
 """
 
-from agent.core.agent_loop import submission_loop
+import litellm
+
+# Global LiteLLM behavior — set once at package import so both CLI and
+# backend entries share the same config.
+#   drop_params: quietly drop unsupported params rather than raising
+#   suppress_debug_info: hide the noisy "Give Feedback" banner on errors
+#   modify_params: let LiteLLM patch Anthropic's tool-call requirements
+#     (synthesize a dummy tool spec when we call completion on a history
+#     that contains tool_calls but aren't passing `tools=` — happens
+#     during summarization / session seeding).
+litellm.drop_params = True
+litellm.suppress_debug_info = True
+litellm.modify_params = True
+
+from agent.core.agent_loop import submission_loop  # noqa: E402
 
 __all__ = ["submission_loop"]
@@ -68,6 +68,63 @@ def _get_hf_username(hf_token: str | None = None) -> str:
         return "unknown"
 
 
+_COMPACT_PROMPT = (
+    "Please provide a concise summary of the conversation above, focusing on "
+    "key decisions, the 'why' behind the decisions, problems solved, and "
+    "important context needed for developing further. Your summary will be "
+    "given to someone who has never worked on this project before and they "
+    "will be have to be filled in."
+)
+
+# Used when seeding a brand-new session from prior browser-cached messages.
+# Here we're writing a note to *ourselves* — so preserve the tool-call trail,
+# files produced, and planned next steps in first person. Optimized for
+# continuity, not brevity.
+_RESTORE_PROMPT = (
+    "You're about to be restored into a fresh session with no memory of the "
+    "conversation above. Write a first-person note to your future self so "
+    "you can continue right where you left off. Include:\n"
+    "  • What the user originally asked for and what progress you've made.\n"
+    "  • Every tool you called, with arguments and a one-line result summary.\n"
+    "  • Any code, files, scripts, or artifacts you produced (with paths).\n"
+    "  • Key decisions and the reasoning behind them.\n"
+    "  • What you were planning to do next.\n\n"
+    "Don't be cute. Be specific. This is the only context you'll have."
+)
+
+
+async def summarize_messages(
+    messages: list[Message],
+    model_name: str,
+    hf_token: str | None = None,
+    max_tokens: int = 2000,
+    tool_specs: list[dict] | None = None,
+    prompt: str = _COMPACT_PROMPT,
+) -> tuple[str, int]:
+    """Run a summarization prompt against a list of messages.
+
+    ``prompt`` defaults to the compaction prompt (terse, decision-focused).
+    Callers seeding a new session after a restart should pass ``_RESTORE_PROMPT``
+    instead — it preserves the tool-call trail so the agent can answer
+    follow-up questions about what it did.
+
+    Returns ``(summary_text, completion_tokens)``.
+    """
+    from agent.core.llm_params import _resolve_llm_params
+
+    prompt_messages = list(messages) + [Message(role="user", content=prompt)]
+    llm_params = _resolve_llm_params(model_name, hf_token, reasoning_effort="high")
+    response = await acompletion(
+        messages=prompt_messages,
+        max_completion_tokens=max_tokens,
+        tools=tool_specs,
+        **llm_params,
+    )
+    summary = response.choices[0].message.content or ""
+    completion_tokens = response.usage.completion_tokens if response.usage else 0
+    return summary, completion_tokens
+
+
 class ContextManager:
     """Manages conversation context and message history for the agent"""
 
@@ -318,32 +375,32 @@ async def compact(
         if not messages_to_summarize:
             return
 
-        messages_to_summarize.append(
-            Message(
-                role="user",
-                content="Please provide a concise summary of the conversation above, focusing on key decisions, the 'why' behind the decisions, problems solved, and important context needed for developing further. Your summary will be given to someone who has never worked on this project before and they will be have to be filled in.",
-            )
-        )
-
-        from agent.core.llm_params import _resolve_llm_params
-
-        llm_params = _resolve_llm_params(model_name, hf_token, reasoning_effort="high")
-        response = await acompletion(
-            messages=messages_to_summarize,
-            max_completion_tokens=self.compact_size,
-            tools=tool_specs,
-            **llm_params,
-        )
-        summarized_message = Message(
-            role="assistant", content=response.choices[0].message.content
+        summary, completion_tokens = await summarize_messages(
+            messages_to_summarize,
+            model_name=model_name,
+            hf_token=hf_token,
+            max_tokens=self.compact_size,
+            tool_specs=tool_specs,
+            prompt=_COMPACT_PROMPT,
         )
+        summarized_message = Message(role="assistant", content=summary)
 
         # Reconstruct: system + first user msg + summary + recent messages
         head = [system_msg] if system_msg else []
         if first_user_msg:
             head.append(first_user_msg)
         self.items = head + [summarized_message] + recent_messages
 
-        self.running_context_usage = (
-            len(self.system_prompt) // 4 + response.usage.completion_tokens
-        )
+        # Count the actual post-compact context — system prompt + first user
+        # turn + summary + the preserved tail all contribute, not just the
+        # summary. litellm.token_counter uses the model's real tokenizer.
+        from litellm import token_counter
+
+        try:
+            self.running_context_usage = token_counter(
+                model=model_name,
+                messages=[m.model_dump() for m in self.items],
+            )
+        except Exception as e:
+            logger.warning("token_counter failed post-compact (%s); falling back to rough estimate", e)
+            self.running_context_usage = len(self.system_prompt) // 4 + completion_tokens
@@ -227,6 +227,50 @@ async def create_session(
     return SessionResponse(session_id=session_id, ready=True)
 
 
+@router.post("/session/restore-summary", response_model=SessionResponse)
+async def restore_session_summary(
+    request: Request, body: dict, user: dict = Depends(get_current_user)
+) -> SessionResponse:
+    """Create a new session seeded with a summary of the caller's prior
+    conversation. The client sends its cached messages; we run the standard
+    summarization prompt on them and drop the result into the new
+    session's context as a user-role system note.
+    """
+    messages = body.get("messages")
+    if not isinstance(messages, list) or not messages:
+        raise HTTPException(status_code=400, detail="Missing 'messages' array")
+
+    hf_token = None
+    auth_header = request.headers.get("Authorization", "")
+    if auth_header.startswith("Bearer "):
+        hf_token = auth_header[7:]
+    if not hf_token:
+        hf_token = request.cookies.get("hf_access_token")
+    if not hf_token:
+        hf_token = os.environ.get("HF_TOKEN")
+
+    try:
+        session_id = await session_manager.create_session(
+            user_id=user["user_id"], hf_token=hf_token
+        )
+    except SessionCapacityError as e:
+        raise HTTPException(status_code=503, detail=str(e))
+
+    try:
+        summarized = await session_manager.seed_from_summary(session_id, messages)
+    except ValueError as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+        logger.exception("seed_from_summary failed")
+        raise HTTPException(status_code=500, detail=f"Summary failed: {e}")
+
+    logger.info(
+        f"Seeded session {session_id} for {user.get('username', 'unknown')} "
+        f"(summary of {summarized} messages)"
+    )
+    return SessionResponse(session_id=session_id, ready=True)
+
+
 @router.get("/session/{session_id}", response_model=SessionInfo)
 async def get_session(
     session_id: str, user: dict = Depends(get_current_user)
 
@@ -207,6 +207,69 @@ def _create_session_sync():
         logger.info(f"Created session {session_id} for user {user_id}")
         return session_id
 
+    async def seed_from_summary(self, session_id: str, messages: list[dict]) -> int:
+        """Rehydrate a session from cached prior messages via summarization.
+
+        Runs the standard summarization prompt (same one compaction uses)
+        over the provided messages, then seeds the new session's context
+        with that summary. Tool-call pairing concerns disappear because the
+        output is plain text. Returns the number of messages summarized.
+        """
+        from litellm import Message
+
+        from agent.context_manager.manager import _RESTORE_PROMPT, summarize_messages
+
+        agent_session = self.sessions.get(session_id)
+        if not agent_session:
+            raise ValueError(f"Session {session_id} not found")
+
+        # Parse into Message objects, tolerating malformed entries.
+        parsed: list[Message] = []
+        for raw in messages:
+            if raw.get("role") == "system":
+                continue  # the new session has its own system prompt
+            try:
+                parsed.append(Message.model_validate(raw))
+            except Exception as e:
+                logger.warning("Dropping malformed message during seed: %s", e)
+
+        if not parsed:
+            return 0
+
+        session = agent_session.session
+        # Pass the real tool specs so the summarizer sees what the agent
+        # actually has — otherwise Anthropic's modify_params injects a
+        # dummy tool and the summarizer editorializes that the original
+        # tool calls were fabricated.
+        tool_specs = None
+        try:
+            tool_specs = agent_session.tool_router.get_tool_specs_for_llm()
+        except Exception:
+            pass
+        try:
+            summary, _ = await summarize_messages(
+                parsed,
+                model_name=session.config.model_name,
+                hf_token=session.hf_token,
+                max_tokens=4000,
+                prompt=_RESTORE_PROMPT,
+                tool_specs=tool_specs,
+            )
+        except Exception as e:
+            logger.error("Summary call failed during seed: %s", e)
+            raise
+
+        seed = Message(
+            role="user",
+            content=(
+                "[SYSTEM: Your prior memory of this conversation — written "
+                "in your own voice right before restart. Continue from here.]\n\n"
+                + (summary or "(no summary returned)")
+            ),
+        )
+        session.context_manager.items.append(seed)
+        return len(parsed)
+
     @staticmethod
     async def _cleanup_sandbox(session: Session) -> None:
         """Delete the sandbox Space if one was created for this session."""
 
@@ -0,0 +1,113 @@
+/**
+ * Shown inline in a chat when the backend no longer recognizes the
+ * session id (typically: Space was restarted). Lets the user catch the
+ * agent up with a summary of the prior conversation, or start over.
+ */
+import { useState, useCallback } from 'react';
+import { Box, Button, CircularProgress, Typography } from '@mui/material';
+import { apiFetch } from '@/utils/api';
+import { useSessionStore } from '@/store/sessionStore';
+import { useAgentStore } from '@/store/agentStore';
+import { loadBackendMessages } from '@/lib/backend-message-store';
+import { loadMessages } from '@/lib/chat-message-store';
+import { uiMessagesToLLMMessages } from '@/lib/convert-llm-messages';
+import { logger } from '@/utils/logger';
+
+interface Props {
+  sessionId: string;
+}
+
+export default function ExpiredBanner({ sessionId }: Props) {
+  const { renameSession, deleteSession } = useSessionStore();
+  const [busy, setBusy] = useState<'catch-up' | 'start-over' | null>(null);
+  const [error, setError] = useState<string | null>(null);
+
+  const handleCatchUp = useCallback(async () => {
+    setBusy('catch-up');
+    setError(null);
+    try {
+      // Prefer the raw backend-message cache; fall back to reconstructing
+      // from UIMessages (for sessions that predate the backend cache).
+      let messages = loadBackendMessages(sessionId);
+      if (!messages || messages.length === 0) {
+        const uiMsgs = loadMessages(sessionId);
+        if (uiMsgs.length > 0) messages = uiMessagesToLLMMessages(uiMsgs);
+      }
+      if (!messages || messages.length === 0) {
+        setError('Nothing to summarize from this chat.');
+        setBusy(null);
+        return;
+      }
+
+      const res = await apiFetch('/api/session/restore-summary', {
+        method: 'POST',
+        body: JSON.stringify({ messages }),
+      });
+      if (!res.ok) throw new Error(`restore-summary failed: ${res.status}`);
+      const data = await res.json();
+      const newId = data.session_id as string | undefined;
+      if (!newId) throw new Error('no session_id in response');
+
+      useAgentStore.getState().clearSessionState(sessionId);
+      renameSession(sessionId, newId);
+    } catch (e) {
+      logger.warn('Catch-up failed:', e);
+      setError("Couldn't catch up — try starting over.");
+      setBusy(null);
+    }
+  }, [sessionId, renameSession]);
+
+  const handleStartOver = useCallback(() => {
+    setBusy('start-over');
+    useAgentStore.getState().clearSessionState(sessionId);
+    deleteSession(sessionId);
+  }, [sessionId, deleteSession]);
+
+  return (
+    <Box
+      sx={{
+        mx: { xs: 2, md: 'auto' },
+        my: 2,
+        maxWidth: 720,
+        p: 2.5,
+        borderRadius: 2,
+        border: '1px solid',
+        borderColor: 'divider',
+        bgcolor: 'background.paper',
+        boxShadow: '0 1px 3px rgba(0,0,0,0.06)',
+      }}
+    >
+      <Typography variant="body1" sx={{ fontWeight: 600, mb: 0.5 }}>
+        Where were we?
+      </Typography>
+      <Typography variant="body2" sx={{ color: 'text.secondary', mb: 2 }}>
+        Let me skim the conversation so far and pick up right where we left
+        off — or we can start something new.
+      </Typography>
+      <Box sx={{ display: 'flex', gap: 1, flexWrap: 'wrap' }}>
+        <Button
+          variant="contained"
+          onClick={handleCatchUp}
+          disabled={busy !== null}
+          startIcon={busy === 'catch-up' ? <CircularProgress size={16} color="inherit" /> : null}
+          sx={{ textTransform: 'none' }}
+        >
+          {busy === 'catch-up' ? 'Catching up…' : 'Catch me up'}
+        </Button>
+        <Button
+          variant="outlined"
+          onClick={handleStartOver}
+          disabled={busy !== null}
+          sx={{ textTransform: 'none' }}
+        >
+          Start fresh
+        </Button>
+      </Box>
+      {error && (
+        <Typography variant="caption" sx={{ display: 'block', mt: 1.5, color: 'error.main' }}>
+          {error}
+        </Typography>
+      )}
+    </Box>
+  );
+}