mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 02:05:22 -04:00
Installing a heavy dependency like vllm crashes in a "stale — restarting" loop:
it restarts mid-install, reuses the cached wheels, then stalls again.
The download/install watchdog (cookbookRunning.js) keyed its stall signal purely
off the downloaded-byte counter ("1.81G/2.49G"). A dependency install spends long
stretches with NO byte counter — pip dependency resolution and the native CUDA
build/compile — so the signal froze and after STALE_PROGRESS_MS the watchdog
declared it stale and auto-restarted it mid-build, looping forever.
Extract the signal into a pure computeProgressSignal (cookbookProgressSignal.js):
keep the byte counter for the download phase (so a genuinely stuck download is
still caught, and an animating-but-frozen ETA frame is NOT mistaken for progress),
and when there's no byte counter fall back to a fingerprint of the output tail so
resolver/compile lines count as progress. Only a truly frozen tail now reads as
stalled.
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
import uiModule from './ui.js';
|
||||
import { _diagnose, _showDiagnosis, _clearDiagnosis } from './cookbook-diagnosis.js';
|
||||
import { registerMenuDismiss } from './escMenuStack.js';
|
||||
import { computeProgressSignal } from './cookbookProgressSignal.js';
|
||||
|
||||
// Human-friendly badge label for a task's internal status. Avoids surfacing
|
||||
// the word "error" in the sidebar — a server the user stopped or one that
|
||||
@@ -2443,7 +2444,11 @@ async function _reconnectTask(el, task) {
|
||||
// back to %/aggregate only when no byte counter is present.
|
||||
const _byteMatches = [...snapshot.matchAll(/([\d.]+\s?[KMGT])B?\s*\/\s*[\d.]+\s?[KMGT]B?/gi)];
|
||||
const _bytes = _byteMatches.length ? _byteMatches[_byteMatches.length - 1][1].replace(/\s/g, '') : null;
|
||||
const curProgress = _bytes || (_dlAgg != null ? String(_dlAgg) : (lastPct || '0'));
|
||||
// When there's no byte counter (pip resolve / native build phase of a
|
||||
// dependency install), key off the output tail so new build lines count
|
||||
// as progress — otherwise a long quiet build is falsely declared stale
|
||||
// and restarted mid-build, looping forever (#1568).
|
||||
const curProgress = computeProgressSignal(_bytes, _dlAgg, lastPct, snapshot);
|
||||
const _fetchPctMatches = [...snapshot.matchAll(/Fetching\s+\d+\s+files:\s*(\d+)%/g)];
|
||||
const _fetchPct = _fetchPctMatches.length ? parseInt(_fetchPctMatches[_fetchPctMatches.length - 1][1]) : null;
|
||||
const _startupStalled = !_bytes && ((_dlAgg === 0) || (_fetchPct === 0)) && curProgress === '0';
|
||||
|
||||
Reference in New Issue
Block a user