mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 01:35:36 -04:00
fix(cookbook): auto-register a local endpoint when serving an LLM (#1380)
Serving a diffusion model auto-registered an image endpoint so it appeared in the model picker, but serving an LLM (llama.cpp/vLLM/SGLang/Ollama) did not — a downloaded-and-served model never showed up until the user manually ran /setup. Add _auto_register_llm_endpoint (text sibling of _auto_register_image_endpoint): parse the serve port (explicit --port, else Ollama 11434, else llama.cpp 8080), point an endpoint at http://host:port/v1, dedupe by base_url, and set supports_tools from --enable-auto-tool-choice. Wire it into /api/model/serve for any non-pip, non-diffusion serve.
This commit is contained in:
@@ -809,6 +809,80 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
def _auto_register_llm_endpoint(req: ServeRequest, remote: str | None) -> str | None:
|
||||||
|
"""Register a freshly-served LLM as a model endpoint so it appears in the
|
||||||
|
model picker without a manual /setup step — the text-model sibling of
|
||||||
|
_auto_register_image_endpoint.
|
||||||
|
|
||||||
|
Cookbook serve commands launch an OpenAI-compatible server (llama.cpp's
|
||||||
|
llama-server, vLLM, SGLang, or Ollama) on a known port. We point an
|
||||||
|
endpoint at that server's /v1; the picker auto-discovers the model id by
|
||||||
|
probing /v1/models and dims the endpoint until the server is reachable,
|
||||||
|
so registering immediately (before the server finishes loading) is safe.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from core.database import SessionLocal, ModelEndpoint
|
||||||
|
|
||||||
|
# Port: an explicit --port wins. Otherwise fall back by backend — Ollama
|
||||||
|
# is the only server in our generated commands that omits --port.
|
||||||
|
port_match = re.search(r'--port\s+(\d+)', req.cmd)
|
||||||
|
if port_match:
|
||||||
|
port = int(port_match.group(1))
|
||||||
|
elif "ollama" in req.cmd:
|
||||||
|
port = 11434
|
||||||
|
else:
|
||||||
|
port = 8080 # llama.cpp's llama-server default — the Apple Silicon path
|
||||||
|
|
||||||
|
# Determine host (mirrors the image path: SSH alias for remote serves).
|
||||||
|
if remote:
|
||||||
|
host = remote.split("@")[-1] if "@" in remote else remote
|
||||||
|
else:
|
||||||
|
host = "localhost"
|
||||||
|
|
||||||
|
base_url = f"http://{host}:{port}/v1"
|
||||||
|
|
||||||
|
short_name = req.repo_id.split("/")[-1] if "/" in req.repo_id else req.repo_id
|
||||||
|
display_name = short_name or "Local model"
|
||||||
|
|
||||||
|
# If the serve command opts models into OpenAI tool-calling, record it so
|
||||||
|
# agent_loop trusts emitted tool_calls instead of the name heuristic.
|
||||||
|
supports_tools = True if "--enable-auto-tool-choice" in req.cmd else None
|
||||||
|
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
# Reuse an endpoint already pointed at this URL instead of duplicating.
|
||||||
|
existing = db.query(ModelEndpoint).filter(ModelEndpoint.base_url == base_url).first()
|
||||||
|
if existing:
|
||||||
|
existing.is_enabled = True
|
||||||
|
existing.model_type = "llm"
|
||||||
|
existing.name = display_name
|
||||||
|
if supports_tools is not None:
|
||||||
|
existing.supports_tools = supports_tools
|
||||||
|
db.commit()
|
||||||
|
logger.info(f"Updated existing local model endpoint: {base_url}")
|
||||||
|
return existing.id
|
||||||
|
|
||||||
|
ep_id = f"local-{uuid.uuid4().hex[:8]}"
|
||||||
|
ep = ModelEndpoint(
|
||||||
|
id=ep_id,
|
||||||
|
name=display_name,
|
||||||
|
base_url=base_url,
|
||||||
|
api_key=None,
|
||||||
|
is_enabled=True,
|
||||||
|
model_type="llm",
|
||||||
|
supports_tools=supports_tools,
|
||||||
|
)
|
||||||
|
db.add(ep)
|
||||||
|
db.commit()
|
||||||
|
logger.info(f"Auto-registered local model endpoint: {display_name} @ {base_url}")
|
||||||
|
return ep_id
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to auto-register local model endpoint: {e}")
|
||||||
|
db.rollback()
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
@router.post("/api/model/serve")
|
@router.post("/api/model/serve")
|
||||||
async def model_serve(request: Request, req: ServeRequest):
|
async def model_serve(request: Request, req: ServeRequest):
|
||||||
"""Launch a model server in a tmux session (or PowerShell background process on Windows).
|
"""Launch a model server in a tmux session (or PowerShell background process on Windows).
|
||||||
@@ -1152,11 +1226,16 @@ def setup_cookbook_routes() -> APIRouter:
|
|||||||
stderr = (await proc.stderr.read()).decode(errors="replace")
|
stderr = (await proc.stderr.read()).decode(errors="replace")
|
||||||
return {"ok": False, "error": stderr, "session_id": session_id}
|
return {"ok": False, "error": stderr, "session_id": session_id}
|
||||||
|
|
||||||
# Auto-register as model endpoint if serving a diffusion model
|
# Auto-register a model endpoint so the served model shows up in the model
|
||||||
|
# picker with no manual /setup step. Diffusion models get an image
|
||||||
|
# endpoint; any other real model serve (i.e. not a pip-install task) gets
|
||||||
|
# a local LLM endpoint pointed at its /v1.
|
||||||
endpoint_id = None
|
endpoint_id = None
|
||||||
is_diffusion = "diffusion_server.py" in req.cmd
|
is_diffusion = "diffusion_server.py" in req.cmd
|
||||||
if is_diffusion:
|
if is_diffusion:
|
||||||
endpoint_id = _auto_register_image_endpoint(req, remote)
|
endpoint_id = _auto_register_image_endpoint(req, remote)
|
||||||
|
elif not is_pip_install:
|
||||||
|
endpoint_id = _auto_register_llm_endpoint(req, remote)
|
||||||
|
|
||||||
# Log to assistant
|
# Log to assistant
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user