mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
fix: diagnose vllm serve runtime issues (#1198)
This commit is contained in:
@@ -559,6 +559,21 @@ def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_op
|
|||||||
runner_lines.append('fi')
|
runner_lines.append('fi')
|
||||||
|
|
||||||
|
|
||||||
|
def _append_vllm_linux_preflight_lines(runner_lines: list[str]) -> None:
|
||||||
|
"""Append Linux vLLM readiness lines that identify the runtime being used."""
|
||||||
|
# Keep the user install bin visible for Odysseus-managed `pip install --user`
|
||||||
|
# installs, but then report the actual CLI path so external runtimes are clear.
|
||||||
|
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
|
||||||
|
runner_lines.append('ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"')
|
||||||
|
runner_lines.append('if [ -z "$ODYSSEUS_VLLM_BIN" ]; then')
|
||||||
|
runner_lines.append(' echo "ERROR: vLLM is not installed."')
|
||||||
|
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
|
||||||
|
runner_lines.append('else')
|
||||||
|
runner_lines.append(' echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"')
|
||||||
|
runner_lines.append(' ODYSSEUS_VLLM_VERSION="$("$ODYSSEUS_VLLM_BIN" --version 2>&1 | head -n 1 || true)"')
|
||||||
|
runner_lines.append(' if [ -n "$ODYSSEUS_VLLM_VERSION" ]; then echo "[odysseus] vLLM version: $ODYSSEUS_VLLM_VERSION"; fi')
|
||||||
|
runner_lines.append('fi')
|
||||||
|
|
||||||
def _append_serve_exit_code_lines(
|
def _append_serve_exit_code_lines(
|
||||||
runner_lines: list[str],
|
runner_lines: list[str],
|
||||||
*,
|
*,
|
||||||
@@ -860,6 +875,16 @@ def _diagnose_serve_output(text: str) -> dict | None:
|
|||||||
"Model requires custom code or newer model support.",
|
"Model requires custom code or newer model support.",
|
||||||
[{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}],
|
[{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}],
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
r"There is no module or parameter named ['\"]lm_head\.input_scale['\"]|lm_head\.input_scale|weight_scale_2",
|
||||||
|
"vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"label": "upgrade vLLM through the environment that provides this CLI, or use a compatible checkpoint",
|
||||||
|
"op": "manual",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
),
|
||||||
(
|
(
|
||||||
r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer",
|
r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer",
|
||||||
"vLLM/Transformers kernel package mismatch.",
|
"vLLM/Transformers kernel package mismatch.",
|
||||||
|
|||||||
@@ -38,9 +38,10 @@ from routes.cookbook_helpers import (
|
|||||||
_ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase,
|
_ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase,
|
||||||
_safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines,
|
_safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines,
|
||||||
_append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
|
_append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
|
||||||
_ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache,
|
_append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
|
||||||
_user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
|
_pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
|
||||||
ModelDownloadRequest, ServeRequest, _diagnose_serve_output,
|
_diagnose_serve_output,
|
||||||
|
ModelDownloadRequest, ServeRequest,
|
||||||
)
|
)
|
||||||
|
|
||||||
_HF_TOKEN_STATUS_SNIPPET = (
|
_HF_TOKEN_STATUS_SNIPPET = (
|
||||||
@@ -1084,14 +1085,7 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
runner_lines.append(' echo "ERROR: vLLM does not run on macOS. Use Ollama or llama.cpp (Metal) instead."')
|
runner_lines.append(' echo "ERROR: vLLM does not run on macOS. Use Ollama or llama.cpp (Metal) instead."')
|
||||||
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=1')
|
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=1')
|
||||||
runner_lines.append('fi')
|
runner_lines.append('fi')
|
||||||
# Put ~/.local/bin on PATH first — without a venv, vllm installs
|
_append_vllm_linux_preflight_lines(runner_lines)
|
||||||
# there via --user and the non-login serve shell otherwise can't
|
|
||||||
# find the `vllm` CLI ("command not found"). Mirrors llama.cpp above.
|
|
||||||
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
|
|
||||||
runner_lines.append('if ! command -v vllm &>/dev/null; then')
|
|
||||||
runner_lines.append(' echo "ERROR: vLLM is not installed."')
|
|
||||||
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
|
|
||||||
runner_lines.append('fi')
|
|
||||||
elif "sglang.launch_server" in req.cmd:
|
elif "sglang.launch_server" in req.cmd:
|
||||||
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
|
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
|
||||||
runner_lines.append('if ! command -v sglang &>/dev/null; then')
|
runner_lines.append('if ! command -v sglang &>/dev/null; then')
|
||||||
|
|||||||
@@ -166,6 +166,18 @@ export const ERROR_PATTERNS = [
|
|||||||
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
pattern: /There is no module or parameter named ['"]lm_head\.input_scale['"]|lm_head\.input_scale|weight_scale_2/i,
|
||||||
|
message: 'vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.',
|
||||||
|
suggestion: 'Suggested action: upgrade vLLM through the environment that provides this CLI (package manager, venv, Docker image, or source checkout), or choose a compatible checkpoint.',
|
||||||
|
fixes: [
|
||||||
|
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') },
|
||||||
|
{
|
||||||
|
label: 'Copy upgrade hint',
|
||||||
|
action: () => _copyText('Upgrade the vLLM environment that provides the selected vllm CLI, or use a compatible checkpoint. Do not assume Odysseus owns PATH/system/source/Docker installs.'),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
pattern: /not divisib|must be divisible|attention heads.*divisible/i,
|
pattern: /not divisib|must be divisible|attention heads.*divisible/i,
|
||||||
message: 'Tensor parallel size incompatible with model dimensions.',
|
message: 'Tensor parallel size incompatible with model dimensions.',
|
||||||
|
|||||||
@@ -0,0 +1,15 @@
|
|||||||
|
from routes.cookbook_helpers import _diagnose_serve_output
|
||||||
|
|
||||||
|
|
||||||
|
def test_diagnose_vllm_modelopt_lm_head_error():
|
||||||
|
output = """
|
||||||
|
ValueError: There is no module or parameter named 'lm_head.input_scale'
|
||||||
|
Engine core initialization failed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
diagnosis = _diagnose_serve_output(output)
|
||||||
|
|
||||||
|
assert diagnosis is not None
|
||||||
|
assert "ModelOpt LM-head" in diagnosis["message"]
|
||||||
|
assert diagnosis["suggestions"][0]["op"] == "manual"
|
||||||
|
assert "provides this CLI" in diagnosis["suggestions"][0]["label"]
|
||||||
@@ -11,6 +11,7 @@ from routes.cookbook_helpers import (
|
|||||||
_append_serve_exit_code_lines,
|
_append_serve_exit_code_lines,
|
||||||
_append_serve_preflight_exit_lines,
|
_append_serve_preflight_exit_lines,
|
||||||
_llama_cpp_rebuild_cmd,
|
_llama_cpp_rebuild_cmd,
|
||||||
|
_append_vllm_linux_preflight_lines,
|
||||||
_local_tooling_path_export,
|
_local_tooling_path_export,
|
||||||
_pip_install_attempt,
|
_pip_install_attempt,
|
||||||
_pip_install_fallback_chain,
|
_pip_install_fallback_chain,
|
||||||
@@ -192,6 +193,19 @@ def test_serve_runner_installs_llama_cpp_server_extra():
|
|||||||
assert "_pip_install_fallback_chain('llama-cpp-python[server]'" in src
|
assert "_pip_install_fallback_chain('llama-cpp-python[server]'" in src
|
||||||
|
|
||||||
|
|
||||||
|
def test_vllm_preflight_reports_cli_and_version():
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
_append_vllm_linux_preflight_lines(lines)
|
||||||
|
script = "\n".join(lines)
|
||||||
|
|
||||||
|
assert 'export PATH="$HOME/.local/bin:$PATH"' in script
|
||||||
|
assert 'ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"' in script
|
||||||
|
assert 'echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"' in script
|
||||||
|
assert '"$ODYSSEUS_VLLM_BIN" --version' in script
|
||||||
|
assert 'ODYSSEUS_PREFLIGHT_EXIT=127' in script
|
||||||
|
|
||||||
|
|
||||||
def test_venv_safe_local_pip_install_strips_user_flags_only_for_local_venv():
|
def test_venv_safe_local_pip_install_strips_user_flags_only_for_local_venv():
|
||||||
cmd = 'python3 -m pip install -U --user --break-system-packages "vllm"'
|
cmd = 'python3 -m pip install -U --user --break-system-packages "vllm"'
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user