|
1 | 1 | import { useState, useEffect, useCallback, useRef, useMemo } from 'react' |
2 | | -import { useParams, useSearchParams, useOutletContext, Link } from 'react-router-dom' |
3 | | -import { backendLogsApi } from '../utils/api' |
| 2 | +import { useParams, useSearchParams, useOutletContext, Link, Navigate } from 'react-router-dom' |
| 3 | +import { backendLogsApi, nodesApi } from '../utils/api' |
4 | 4 | import { formatTimestamp } from '../utils/format' |
5 | 5 | import { apiUrl } from '../utils/basePath' |
6 | 6 | import LoadingSpinner from '../components/LoadingSpinner' |
| 7 | +import { useDistributedMode } from '../hooks/useDistributedMode' |
7 | 8 |
|
8 | 9 | function wsUrl(path) { |
9 | 10 | const proto = window.location.protocol === 'https:' ? 'wss:' : 'ws:' |
@@ -274,11 +275,158 @@ function BackendLogsDetail({ modelId }) { |
274 | 275 | ) |
275 | 276 | } |
276 | 277 |
|
| 278 | +// DistributedBackendLogsResolver runs only in distributed mode. The local |
| 279 | +// /api/backend-logs WebSocket has no backend behind it here (inference lives |
| 280 | +// on workers), so we resolve modelId → hosting node(s) and forward to the |
| 281 | +// per-node logs page. One hit redirects automatically; multiple hits render |
| 282 | +// a picker so the operator can pick which worker's logs to inspect. |
| 283 | +function DistributedBackendLogsResolver({ modelId, fromTimestamp }) { |
| 284 | + const [hits, setHits] = useState(null) // [{ node, model }] once resolved |
| 285 | + const [error, setError] = useState(null) |
| 286 | + |
| 287 | + useEffect(() => { |
| 288 | + let cancelled = false |
| 289 | + ;(async () => { |
| 290 | + try { |
| 291 | + const nodes = await nodesApi.list() |
| 292 | + const nodeList = Array.isArray(nodes) ? nodes : [] |
| 293 | + // Fan out to each node and collect entries that match this model. |
| 294 | + // Per-node failures are tolerated — a single offline worker shouldn't |
| 295 | + // hide logs available on its peers. |
| 296 | + const perNode = await Promise.all(nodeList.map(async (node) => { |
| 297 | + try { |
| 298 | + const models = await nodesApi.getModels(node.id) |
| 299 | + const matches = (Array.isArray(models) ? models : []).filter(m => m.model_name === modelId) |
| 300 | + return matches.map(m => ({ node, model: m })) |
| 301 | + } catch { |
| 302 | + return [] |
| 303 | + } |
| 304 | + })) |
| 305 | + if (cancelled) return |
| 306 | + setHits(perNode.flat()) |
| 307 | + } catch (err) { |
| 308 | + if (!cancelled) setError(err) |
| 309 | + } |
| 310 | + })() |
| 311 | + return () => { cancelled = true } |
| 312 | + }, [modelId]) |
| 313 | + |
| 314 | + if (error) { |
| 315 | + return ( |
| 316 | + <div className="page page--wide"> |
| 317 | + <div className="empty-state"> |
| 318 | + <div className="empty-state-icon"><i className="fas fa-exclamation-triangle" /></div> |
| 319 | + <h2 className="empty-state-title">Failed to resolve hosting nodes</h2> |
| 320 | + <p className="empty-state-text">{error.message}</p> |
| 321 | + </div> |
| 322 | + </div> |
| 323 | + ) |
| 324 | + } |
| 325 | + |
| 326 | + if (hits === null) { |
| 327 | + return ( |
| 328 | + <div style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}> |
| 329 | + <LoadingSpinner size="lg" /> |
| 330 | + </div> |
| 331 | + ) |
| 332 | + } |
| 333 | + |
| 334 | + if (hits.length === 0) { |
| 335 | + return ( |
| 336 | + <div className="page page--wide"> |
| 337 | + <div className="empty-state"> |
| 338 | + <div className="empty-state-icon"><i className="fas fa-terminal" /></div> |
| 339 | + <h2 className="empty-state-title">Model not loaded on any worker</h2> |
| 340 | + <p className="empty-state-text"> |
| 341 | + <span style={{ fontFamily: 'var(--font-mono)' }}>{modelId}</span> isn't currently loaded on any node in the cluster. |
| 342 | + Check the <Link to="/app/nodes" style={{ color: 'var(--color-primary)' }}>Nodes page</Link> to see which models are running where. |
| 343 | + </p> |
| 344 | + </div> |
| 345 | + </div> |
| 346 | + ) |
| 347 | + } |
| 348 | + |
| 349 | + // Bare model name aggregates this node's replicas via the worker's log |
| 350 | + // store; preserve ?from= so the deep-link from a trace still scrolls to |
| 351 | + // the right line on arrival. |
| 352 | + const buildHref = (nodeId) => { |
| 353 | + const base = `/app/node-backend-logs/${nodeId}/${encodeURIComponent(modelId)}` |
| 354 | + return fromTimestamp ? `${base}?from=${encodeURIComponent(fromTimestamp)}` : base |
| 355 | + } |
| 356 | + |
| 357 | + if (hits.length === 1) { |
| 358 | + return <Navigate to={buildHref(hits[0].node.id)} replace /> |
| 359 | + } |
| 360 | + |
| 361 | + // Multiple workers host this model — let the operator pick. |
| 362 | + return ( |
| 363 | + <div className="page page--wide"> |
| 364 | + <div className="page-header"> |
| 365 | + <div> |
| 366 | + <h1 className="page-title" style={{ marginBottom: 0 }}> |
| 367 | + <i className="fas fa-terminal" style={{ fontSize: '0.8em', marginRight: 'var(--spacing-sm)' }} /> |
| 368 | + {modelId} |
| 369 | + </h1> |
| 370 | + <p className="page-subtitle" style={{ marginTop: 'var(--spacing-xs)' }}> |
| 371 | + Hosted on {hits.length} workers — pick one to view its logs. |
| 372 | + </p> |
| 373 | + </div> |
| 374 | + </div> |
| 375 | + <div style={{ display: 'flex', flexDirection: 'column', gap: 'var(--spacing-xs)' }}> |
| 376 | + {hits.map(({ node, model }) => ( |
| 377 | + <Link |
| 378 | + key={`${node.id}#${model.replica_index ?? 0}`} |
| 379 | + to={buildHref(node.id)} |
| 380 | + style={{ |
| 381 | + display: 'flex', alignItems: 'center', justifyContent: 'space-between', |
| 382 | + padding: 'var(--spacing-sm) var(--spacing-md)', |
| 383 | + background: 'var(--color-bg-primary)', border: '1px solid var(--color-border)', |
| 384 | + borderRadius: 'var(--radius-md)', textDecoration: 'none', color: 'inherit', |
| 385 | + }} |
| 386 | + > |
| 387 | + <div> |
| 388 | + <div style={{ fontWeight: 500 }}>{node.name || node.id}</div> |
| 389 | + <div style={{ fontSize: '0.75rem', color: 'var(--color-text-secondary)', fontFamily: 'var(--font-mono)' }}> |
| 390 | + {node.id}{model.replica_index ? ` · replica ${model.replica_index}` : ''} · {model.state} |
| 391 | + </div> |
| 392 | + </div> |
| 393 | + <i className="fas fa-chevron-right" style={{ color: 'var(--color-text-muted)' }} /> |
| 394 | + </Link> |
| 395 | + ))} |
| 396 | + </div> |
| 397 | + </div> |
| 398 | + ) |
| 399 | +} |
| 400 | + |
| 401 | +// BackendLogsRouter picks between the local WebSocket view (standalone) and |
| 402 | +// the distributed resolver. The probe runs once via useDistributedMode so a |
| 403 | +// 503 from /api/nodes (the canonical "distributed disabled" signal) keeps the |
| 404 | +// existing standalone path intact. |
| 405 | +function BackendLogsRouter({ modelId }) { |
| 406 | + const [searchParams] = useSearchParams() |
| 407 | + const fromTimestamp = searchParams.get('from') |
| 408 | + const { enabled: distributedMode, loading } = useDistributedMode() |
| 409 | + |
| 410 | + if (loading) { |
| 411 | + return ( |
| 412 | + <div style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}> |
| 413 | + <LoadingSpinner size="lg" /> |
| 414 | + </div> |
| 415 | + ) |
| 416 | + } |
| 417 | + |
| 418 | + if (distributedMode) { |
| 419 | + return <DistributedBackendLogsResolver modelId={modelId} fromTimestamp={fromTimestamp} /> |
| 420 | + } |
| 421 | + |
| 422 | + return <BackendLogsDetail modelId={modelId} /> |
| 423 | +} |
| 424 | + |
277 | 425 | export default function BackendLogs() { |
278 | 426 | const { modelId } = useParams() |
279 | 427 |
|
280 | 428 | if (modelId) { |
281 | | - return <BackendLogsDetail modelId={decodeURIComponent(modelId)} /> |
| 429 | + return <BackendLogsRouter modelId={decodeURIComponent(modelId)} /> |
282 | 430 | } |
283 | 431 |
|
284 | 432 | // No model specified — redirect to System page |
|
0 commit comments