Improve Ollama setup and model endpoint handling

2026-06-16 09:45:24 -04:00 · 2026-06-01 10:00:15 +09:00
parent 051751adcd
commit fc7f107b22
22 changed files with 982 additions and 131 deletions
@@ -160,6 +160,7 @@ def _provider_label(url: str) -> str:
    if "googleapis.com" in u or "generativelanguage" in u: return "Google"
    if "together.xyz" in u or "together.ai" in u: return "Together"
    if "fireworks.ai" in u: return "Fireworks"
+    if "ollama" in u or ":11434" in u: return "Ollama"
    if "localhost" in u or "127.0.0.1" in u: return "local endpoint"
    try:
        from urllib.parse import urlparse
@@ -375,8 +376,20 @@ def list_model_ids(base_chat_url: str, timeout: int = LLMConfig.DEFAULT_TIMEOUT,
            h.update(headers)
        r = httpx.get(base_chat_url.replace("/chat/completions", "/models"), headers=h, timeout=timeout)
        r.raise_for_status()
-        return [m.get("id") for m in (r.json().get("data") or []) if m.get("id")]
+        data = r.json()
+        ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
+        if ids:
+            return ids
+        return [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
    except Exception:
+        try:
+            if ":11434" in base_chat_url or "ollama" in base_chat_url.lower():
+                root = base_chat_url.replace("/v1/chat/completions", "").replace("/chat/completions", "").rstrip("/")
+                r = httpx.get(root + "/api/tags", timeout=timeout)
+                r.raise_for_status()
+                return [m.get("name") or m.get("model") for m in (r.json().get("models") or []) if m.get("name") or m.get("model")]
+        except Exception:
+            pass
        return []

 def normalize_model_id(endpoint_url: str, requested: str, timeout: int = LLMConfig.DEFAULT_TIMEOUT) -> Optional[str]:
@@ -3,8 +3,10 @@ import json
 import time
 import httpx
 import logging
+import os
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Dict, Any, Optional
+from urllib.parse import urlparse

 logger = logging.getLogger(__name__)

@@ -75,7 +77,11 @@ class ModelDiscovery:

    def _get_hosts(self) -> List[str]:
        """Get all hosts to scan, using env override, Tailscale, or default."""
-        import os
+        def _append_host(out: List[str], host: str) -> None:
+            host = (host or "").strip()
+            if not host or host in out:
+                return
+            out.append(host)

        # Manual override takes priority
        extra = os.getenv("LLM_HOSTS", "").strip()
@@ -84,6 +90,7 @@ class ModelDiscovery:
            # Always include the default host too
            if self.default_host not in hosts:
                hosts.insert(0, self.default_host)
+            _append_host(hosts, "host.docker.internal")
            return hosts

        # Try Tailscale discovery
@@ -92,10 +99,23 @@ class ModelDiscovery:
            # Ensure default_host is included
            if self.default_host not in ts_hosts:
                ts_hosts.insert(0, self.default_host)
+            _append_host(ts_hosts, "host.docker.internal")
            return ts_hosts

-        # Fallback to single host
-        return [self.default_host]
+        hosts = [self.default_host]
+        # Docker desktop/Linux compose maps this to the host machine. That is
+        # the common "I started Ollama normally on this computer" case.
+        _append_host(hosts, "host.docker.internal")
+        for env_name in ("OLLAMA_BASE_URL", "OLLAMA_URL"):
+            raw = os.getenv(env_name, "").strip()
+            if not raw:
+                continue
+            try:
+                parsed = urlparse(raw if "://" in raw else "http://" + raw)
+                _append_host(hosts, parsed.hostname or "")
+            except Exception:
+                pass
+        return hosts

    def _check_port(self, host: str, port: int) -> Optional[Dict[str, Any]]:
        """Check a single host:port for models."""
@@ -125,8 +145,10 @@ class ModelDiscovery:

        logger.info(f"Scanning {len(hosts)} hosts for models: {hosts}")

-        # Build list of (host, port) to check
-        targets = [(h, p) for h in hosts for p in range(8000, 8021)]
+        # Build list of (host, port) to check. 8000-8020 catches vLLM,
+        # llama.cpp, SGLang, and Cookbook serves; 11434 catches Ollama.
+        ports = list(range(8000, 8021)) + [11434]
+        targets = [(h, p) for h in hosts for p in ports]

        seen_models = set()  # dedupe by (port, model_ids) to avoid same machine via different IPs