mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 02:05:22 -04:00
Models: detect bare Ollama URLs as online
_ping_endpoint() is the reachability fallback the model-endpoint POST handler invokes when _probe_endpoint() returns no model ids. It GETs base + "/models" and, on any sub-500 response, returns immediately with `reachable = (status < 400)`. That early return runs before the Ollama-native /api/version / /api/tags fallback below it. For an Ollama URL without /v1 (the quickstart accepts both http://localhost:11434 and http://127.0.0.1:11434, and the reporter on #1025 explicitly tried both), the OpenAI-style probe target is http://127.0.0.1:11434/models. Ollama returns 404 there because /models only lives under /v1. _ping_endpoint then returned reachable=False and the picker showed "Added (offline — will retry on next load)" on an install that was running fine. /api/version was never tried. Same shape for http://127.0.0.1:11434/api (the native Ollama root): /api/models is also 404, same premature offline verdict. _probe_endpoint() does fall through to /api/tags on a 4xx (the response raises via raise_for_status), so the endpoint quietly recovers once cached_models becomes non-empty on the next background refresh — matching the second commenter's "had to disconnect manually then reconnect for it to be detected" note. The bug is most visible while no models are pulled yet (cached_models stays empty, _ping_endpoint keeps voting offline). Fix: - Hoist the Ollama-shaped-URL test (port == 11434 or "ollama" in hostname — the same condition _probe_endpoint already uses) to the top of the function so both code paths share it. - Stop short-circuiting on 4xx when the URL looks like Ollama: fall through to the existing /api/version + /api/tags reachability loop so an alive Ollama gets recognised even when its OpenAI surface has the wrong prefix for the user's input. - Fix the `root` computation in that loop to strip a trailing /api as well as /v1, so http://127.0.0.1:11434/api no longer gets probed at /api/api/version. - 4xx on non-Ollama hosts keeps the current semantics: a 401 from api.openai.com/v1/models is still a definitive offline verdict, not a reason to GET /api/version on OpenAI. Closes #1025.
This commit is contained in:
+28
-7
@@ -348,7 +348,24 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
|||||||
if api_key:
|
if api_key:
|
||||||
headers["Authorization"] = f"Bearer {api_key}"
|
headers["Authorization"] = f"Bearer {api_key}"
|
||||||
|
|
||||||
|
# Ollama exposes /v1/models (OpenAI-compatible) AND native /api/version,
|
||||||
|
# /api/tags. The OpenAI-style GET base + "/models" returns 404 when the
|
||||||
|
# base is the host root or the native /api root (e.g. http://localhost:11434,
|
||||||
|
# http://localhost:11434/api) because /models lives under /v1 there. Treat
|
||||||
|
# 4xx on a port-11434 / Ollama-named base as "try the native paths" rather
|
||||||
|
# than as a definitive offline verdict — Ollama is reachable, it just
|
||||||
|
# doesn't speak OpenAI on that prefix. Without this gate the quickstart
|
||||||
|
# marks an alive Ollama as offline whenever cached_models is empty (issue
|
||||||
|
# #1025): _probe_endpoint() falls through to /api/tags on the same 404, but
|
||||||
|
# _ping_endpoint() was returning before that fallback could run.
|
||||||
|
parsed_base = urlparse(base)
|
||||||
|
looks_like_ollama = (
|
||||||
|
parsed_base.port == 11434
|
||||||
|
or "ollama" in (parsed_base.hostname or "").lower()
|
||||||
|
)
|
||||||
|
|
||||||
url = base + "/models"
|
url = base + "/models"
|
||||||
|
last_error: Optional[str] = None
|
||||||
try:
|
try:
|
||||||
r = httpx.get(url, headers=headers, timeout=timeout)
|
r = httpx.get(url, headers=headers, timeout=timeout)
|
||||||
if 300 <= r.status_code < 400:
|
if 300 <= r.status_code < 400:
|
||||||
@@ -360,17 +377,21 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
|||||||
"error": "That is Odysseus, not a model server. Use the Ollama URL, usually http://host.docker.internal:11434/v1 in Docker.",
|
"error": "That is Odysseus, not a model server. Use the Ollama URL, usually http://host.docker.internal:11434/v1 in Docker.",
|
||||||
}
|
}
|
||||||
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code} redirect"}
|
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code} redirect"}
|
||||||
if r.status_code < 500:
|
if r.status_code < 400:
|
||||||
return {"reachable": r.status_code < 400, "status_code": r.status_code, "error": None if r.status_code < 400 else f"HTTP {r.status_code}"}
|
return {"reachable": True, "status_code": r.status_code, "error": None}
|
||||||
|
if r.status_code < 500 and not looks_like_ollama:
|
||||||
|
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"}
|
||||||
|
last_error = f"HTTP {r.status_code}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_error = str(e)[:120]
|
last_error = str(e)[:120]
|
||||||
else:
|
|
||||||
last_error = f"HTTP {r.status_code}"
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed = urlparse(base)
|
if looks_like_ollama:
|
||||||
if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower():
|
root = base
|
||||||
root = base[:-3].rstrip("/") if base.endswith("/v1") else base
|
for suffix in ("/v1", "/api"):
|
||||||
|
if root.endswith(suffix):
|
||||||
|
root = root[: -len(suffix)].rstrip("/")
|
||||||
|
break
|
||||||
for path in ("/api/version", "/api/tags"):
|
for path in ("/api/version", "/api/tags"):
|
||||||
try:
|
try:
|
||||||
r = httpx.get(root + path, timeout=timeout)
|
r = httpx.get(root + path, timeout=timeout)
|
||||||
|
|||||||
Reference in New Issue
Block a user