Improve Ollama setup and model endpoint handling

This commit is contained in:
pewdiepie-archdaemon
2026-06-01 10:00:15 +09:00
parent 051751adcd
commit fc7f107b22
22 changed files with 982 additions and 131 deletions
+14 -1
View File
@@ -160,6 +160,7 @@ def _provider_label(url: str) -> str:
if "googleapis.com" in u or "generativelanguage" in u: return "Google"
if "together.xyz" in u or "together.ai" in u: return "Together"
if "fireworks.ai" in u: return "Fireworks"
if "ollama" in u or ":11434" in u: return "Ollama"
if "localhost" in u or "127.0.0.1" in u: return "local endpoint"
try:
from urllib.parse import urlparse
@@ -375,8 +376,20 @@ def list_model_ids(base_chat_url: str, timeout: int = LLMConfig.DEFAULT_TIMEOUT,
h.update(headers)
r = httpx.get(base_chat_url.replace("/chat/completions", "/models"), headers=h, timeout=timeout)
r.raise_for_status()
return [m.get("id") for m in (r.json().get("data") or []) if m.get("id")]
data = r.json()
ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
if ids:
return ids
return [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
except Exception:
try:
if ":11434" in base_chat_url or "ollama" in base_chat_url.lower():
root = base_chat_url.replace("/v1/chat/completions", "").replace("/chat/completions", "").rstrip("/")
r = httpx.get(root + "/api/tags", timeout=timeout)
r.raise_for_status()
return [m.get("name") or m.get("model") for m in (r.json().get("models") or []) if m.get("name") or m.get("model")]
except Exception:
pass
return []
def normalize_model_id(endpoint_url: str, requested: str, timeout: int = LLMConfig.DEFAULT_TIMEOUT) -> Optional[str]:
+27 -5
View File
@@ -3,8 +3,10 @@ import json
import time
import httpx
import logging
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Any, Optional
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
@@ -75,7 +77,11 @@ class ModelDiscovery:
def _get_hosts(self) -> List[str]:
"""Get all hosts to scan, using env override, Tailscale, or default."""
import os
def _append_host(out: List[str], host: str) -> None:
host = (host or "").strip()
if not host or host in out:
return
out.append(host)
# Manual override takes priority
extra = os.getenv("LLM_HOSTS", "").strip()
@@ -84,6 +90,7 @@ class ModelDiscovery:
# Always include the default host too
if self.default_host not in hosts:
hosts.insert(0, self.default_host)
_append_host(hosts, "host.docker.internal")
return hosts
# Try Tailscale discovery
@@ -92,10 +99,23 @@ class ModelDiscovery:
# Ensure default_host is included
if self.default_host not in ts_hosts:
ts_hosts.insert(0, self.default_host)
_append_host(ts_hosts, "host.docker.internal")
return ts_hosts
# Fallback to single host
return [self.default_host]
hosts = [self.default_host]
# Docker desktop/Linux compose maps this to the host machine. That is
# the common "I started Ollama normally on this computer" case.
_append_host(hosts, "host.docker.internal")
for env_name in ("OLLAMA_BASE_URL", "OLLAMA_URL"):
raw = os.getenv(env_name, "").strip()
if not raw:
continue
try:
parsed = urlparse(raw if "://" in raw else "http://" + raw)
_append_host(hosts, parsed.hostname or "")
except Exception:
pass
return hosts
def _check_port(self, host: str, port: int) -> Optional[Dict[str, Any]]:
"""Check a single host:port for models."""
@@ -125,8 +145,10 @@ class ModelDiscovery:
logger.info(f"Scanning {len(hosts)} hosts for models: {hosts}")
# Build list of (host, port) to check
targets = [(h, p) for h in hosts for p in range(8000, 8021)]
# Build list of (host, port) to check. 8000-8020 catches vLLM,
# llama.cpp, SGLang, and Cookbook serves; 11434 catches Ollama.
ports = list(range(8000, 8021)) + [11434]
targets = [(h, p) for h in hosts for p in ports]
seen_models = set() # dedupe by (port, model_ids) to avoid same machine via different IPs