fix(cookbook): auto-register a local endpoint when serving an LLM (#1380)

Serving a diffusion model auto-registered an image endpoint so it appeared in the model picker, but serving an LLM (llama.cpp/vLLM/SGLang/Ollama) did not — a downloaded-and-served model never showed up until the user manually ran /setup. Add _auto_register_llm_endpoint (text sibling of _auto_register_image_endpoint): parse the serve port (explicit --port, else Ollama 11434, else llama.cpp 8080), point an endpoint at http://host:port/v1, dedupe by base_url, and set supports_tools from --enable-auto-tool-choice. Wire it into /api/model/serve for any non-pip, non-diffusion serve.
2026-06-16 01:35:36 -04:00 · 2026-06-03 08:24:17 +03:00
parent bfbbc9b479
commit 87fc675ccb
1 changed files with 80 additions and 1 deletions
@@ -809,6 +809,80 @@ def setup_cookbook_routes() -> APIRouter:
        finally:
            db.close()
    def _auto_register_llm_endpoint(req: ServeRequest, remote: str | None) -> str | None:
        """Register a freshly-served LLM as a model endpoint so it appears in the
        model picker without a manual /setup step — the text-model sibling of
        _auto_register_image_endpoint.
        Cookbook serve commands launch an OpenAI-compatible server (llama.cpp's
        llama-server, vLLM, SGLang, or Ollama) on a known port. We point an
        endpoint at that server's /v1; the picker auto-discovers the model id by
        probing /v1/models and dims the endpoint until the server is reachable,
        so registering immediately (before the server finishes loading) is safe.
        """
        import re
        from core.database import SessionLocal, ModelEndpoint
        # Port: an explicit --port wins. Otherwise fall back by backend — Ollama
        # is the only server in our generated commands that omits --port.
        port_match = re.search(r'--port\s+(\d+)', req.cmd)
        if port_match:
            port = int(port_match.group(1))
        elif "ollama" in req.cmd:
            port = 11434
        else:
            port = 8080  # llama.cpp's llama-server default — the Apple Silicon path
        # Determine host (mirrors the image path: SSH alias for remote serves).
        if remote:
            host = remote.split("@")[-1] if "@" in remote else remote
        else:
            host = "localhost"
        base_url = f"http://{host}:{port}/v1"
        short_name = req.repo_id.split("/")[-1] if "/" in req.repo_id else req.repo_id
        display_name = short_name or "Local model"
        # If the serve command opts models into OpenAI tool-calling, record it so
        # agent_loop trusts emitted tool_calls instead of the name heuristic.
        supports_tools = True if "--enable-auto-tool-choice" in req.cmd else None
        db = SessionLocal()
        try:
            # Reuse an endpoint already pointed at this URL instead of duplicating.
            existing = db.query(ModelEndpoint).filter(ModelEndpoint.base_url == base_url).first()
            if existing:
                existing.is_enabled = True
                existing.model_type = "llm"
                existing.name = display_name
                if supports_tools is not None:
                    existing.supports_tools = supports_tools
                db.commit()
                logger.info(f"Updated existing local model endpoint: {base_url}")
                return existing.id
            ep_id = f"local-{uuid.uuid4().hex[:8]}"
            ep = ModelEndpoint(
                id=ep_id,
                name=display_name,
                base_url=base_url,
                api_key=None,
                is_enabled=True,
                model_type="llm",
                supports_tools=supports_tools,
            )
            db.add(ep)
            db.commit()
            logger.info(f"Auto-registered local model endpoint: {display_name} @ {base_url}")
            return ep_id
        except Exception as e:
            logger.error(f"Failed to auto-register local model endpoint: {e}")
            db.rollback()
            return None
        finally:
            db.close()
    @router.post("/api/model/serve")
    async def model_serve(request: Request, req: ServeRequest):
        """Launch a model server in a tmux session (or PowerShell background process on Windows).
@@ -1152,11 +1226,16 @@ def setup_cookbook_routes() -> APIRouter:
                stderr = (await proc.stderr.read()).decode(errors="replace")
                return {"ok": False, "error": stderr, "session_id": session_id}
-        # Auto-register as model endpoint if serving a diffusion model
+        # Auto-register a model endpoint so the served model shows up in the model
        # picker with no manual /setup step. Diffusion models get an image
        # endpoint; any other real model serve (i.e. not a pip-install task) gets
        # a local LLM endpoint pointed at its /v1.
        endpoint_id = None
        is_diffusion = "diffusion_server.py" in req.cmd
        if is_diffusion:
            endpoint_id = _auto_register_image_endpoint(req, remote)
        elif not is_pip_install:
            endpoint_id = _auto_register_llm_endpoint(req, remote)
        # Log to assistant
        try: