mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
Improve Cookbook serve reliability
This commit is contained in:
@@ -67,7 +67,15 @@ After generating the key, you can also install it from the host with:
|
|||||||
ssh-copy-id -i data/ssh/id_ed25519.pub user@server
|
ssh-copy-id -i data/ssh/id_ed25519.pub user@server
|
||||||
```
|
```
|
||||||
Cookbook local downloads are stored in `./data/huggingface`, mounted as
|
Cookbook local downloads are stored in `./data/huggingface`, mounted as
|
||||||
`~/.cache/huggingface` inside the Odysseus container.
|
`~/.cache/huggingface` inside the Odysseus container. Cookbook-installed
|
||||||
|
serve engines and Python CLIs are stored in `./data/local`, mounted as
|
||||||
|
`~/.local`, so vLLM/llama.cpp installs survive container recreation.
|
||||||
|
|
||||||
|
After downloading a model, open **Cookbook -> Serve**, pick the cached model,
|
||||||
|
and launch it. When the server answers `/v1/models`, Odysseus adds it to the
|
||||||
|
chat model picker automatically. For NVIDIA GPUs in Docker, install the NVIDIA
|
||||||
|
Container Toolkit and add `gpus: all` to the `odysseus` service if `nvidia-smi`
|
||||||
|
is not visible inside the container.
|
||||||
|
|
||||||
Useful checks:
|
Useful checks:
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -12,6 +12,10 @@ services:
|
|||||||
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
|
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
|
||||||
# container, so persist its HuggingFace cache under ./data/huggingface.
|
# container, so persist its HuggingFace cache under ./data/huggingface.
|
||||||
- ./data/huggingface:/app/.cache/huggingface
|
- ./data/huggingface:/app/.cache/huggingface
|
||||||
|
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
|
||||||
|
# land under /app/.local for the odysseus user. Persist them so a
|
||||||
|
# container recreate does not silently remove installed serve engines.
|
||||||
|
- ./data/local:/app/.local
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
# Lets the container reach local services on the Docker host, including
|
# Lets the container reach local services on the Docker host, including
|
||||||
# Ollama at http://host.docker.internal:11434.
|
# Ollama at http://host.docker.internal:11434.
|
||||||
|
|||||||
@@ -121,6 +121,11 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
"Model requires custom code or newer model support.",
|
"Model requires custom code or newer model support.",
|
||||||
[{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}],
|
[{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}],
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer",
|
||||||
|
"vLLM/Transformers kernel package mismatch.",
|
||||||
|
[{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}],
|
||||||
|
),
|
||||||
(
|
(
|
||||||
r"Address already in use|bind.*address.*in use",
|
r"Address already in use|bind.*address.*in use",
|
||||||
"Port is already in use.",
|
"Port is already in use.",
|
||||||
|
|||||||
@@ -293,6 +293,21 @@ export const ERROR_PATTERNS = [
|
|||||||
}},
|
}},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i,
|
||||||
|
message: 'vLLM/Transformers kernel package mismatch.',
|
||||||
|
fixes: [
|
||||||
|
{ label: 'Update vLLM/Transformers/kernels', action: (panel) => {
|
||||||
|
const taskEl = panel.closest('.cookbook-task');
|
||||||
|
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
|
||||||
|
const host = task?.remoteHost || '';
|
||||||
|
const prefix = _buildEnvPrefix();
|
||||||
|
const pipCmd = prefix ? prefix + ' python3 -m pip install -U vllm transformers kernels' : 'python3 -m pip install -U vllm transformers kernels';
|
||||||
|
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
|
||||||
|
_launchServeTask('update-vllm-stack', 'pip-update', cmd);
|
||||||
|
}},
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
pattern: /ollama.*command not found/i,
|
pattern: /ollama.*command not found/i,
|
||||||
message: 'Ollama is not installed on this server. Run: curl -fsSL https://ollama.com/install.sh | sh',
|
message: 'Ollama is not installed on this server. Run: curl -fsSL https://ollama.com/install.sh | sh',
|
||||||
|
|||||||
@@ -2158,10 +2158,6 @@ async function _reconnectTask(el, task) {
|
|||||||
task._serveReady = true;
|
task._serveReady = true;
|
||||||
_updateTask(task.sessionId, { _serveReady: true });
|
_updateTask(task.sessionId, { _serveReady: true });
|
||||||
}
|
}
|
||||||
if (!task._serveReady && task.ts && (Date.now() - task.ts) > 300000) {
|
|
||||||
task._serveReady = true;
|
|
||||||
_updateTask(task.sessionId, { _serveReady: true });
|
|
||||||
}
|
|
||||||
if (info.phase) {
|
if (info.phase) {
|
||||||
badge.textContent = info.phase;
|
badge.textContent = info.phase;
|
||||||
// Always the green "running" style — loading/warming is the same
|
// Always the green "running" style — loading/warming is the same
|
||||||
|
|||||||
Reference in New Issue
Block a user