diff --git a/src/llm_core.py b/src/llm_core.py index 9faf8d362..38f4b1c29 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -777,10 +777,17 @@ def _provider_label(url: str) -> str: pass if _is_ollama_native_url(url): return "Ollama" try: - host = (urlparse(url).hostname or "").lower() + _parsed_local = urlparse(url) + host = (_parsed_local.hostname or "").lower() + port = _parsed_local.port except Exception: return "provider" if host in {"localhost", "127.0.0.1", "::1", "0.0.0.0"}: + # A port alone is not authoritative: vLLM, SGLang, llama.cpp and plain + # OpenAI-compatible servers all routinely share 8000/8080, so naming the + # serving tool from the port here would mislabel real setups. The tool is + # identified by probing llama-server's native /props endpoint during + # discovery (see ModelDiscovery._fingerprint_provider); this stays neutral. return "local endpoint" return host or "provider" diff --git a/src/model_discovery.py b/src/model_discovery.py index 506fcb6c4..e740d6f44 100644 --- a/src/model_discovery.py +++ b/src/model_discovery.py @@ -163,6 +163,21 @@ class ModelDiscovery: return "lmstudio" except Exception: pass + # llama.cpp's llama-server exposes a native /props endpoint (no /v1 prefix) + # describing the loaded model, slots, and chat template — distinct from + # LM Studio (/api/v1/models) and vLLM (/version, /metrics). + try: + r = httpx.get(f"http://{host}:{port}/props", timeout=1.5) + if r.is_success: + props = r.json() or {} + if isinstance(props, dict) and ( + "default_generation_settings" in props + or "total_slots" in props + or "chat_template" in props + ): + return "llamacpp" + except Exception: + pass return None def _check_port(self, host: str, port: int) -> Optional[Dict[str, Any]]: @@ -194,10 +209,11 @@ class ModelDiscovery: logger.info(f"Scanning {len(hosts)} hosts for models: {hosts}") - # Well-known ports: 8000-8020 (vLLM, llama.cpp, SGLang, Cookbook), - # 1234 (LM Studio), 11434 (Ollama), 11435 for APFEL as its default port is - # occupied by Ollama. The env vars can add more ports which will be merged in. - ports = list(range(8000, 8021)) + [1234, 11434, 11435] + # Well-known ports: 8000-8020 (vLLM, SGLang, Cookbook), 8080 (llama.cpp / + # llama-server default), 1234 (LM Studio), 11434 (Ollama), 11435 for APFEL + # as its default port is occupied by Ollama. The env vars can add more + # ports which will be merged in. + ports = list(range(8000, 8021)) + [8080, 1234, 11434, 11435] ports += [p for p in sorted(self._extra_ports) if p not in ports] targets = [(h, p) for h in hosts for p in ports] diff --git a/static/js/admin.js b/static/js/admin.js index 62b0108a6..a54a9d007 100644 --- a/static/js/admin.js +++ b/static/js/admin.js @@ -1603,8 +1603,8 @@ function initEndpointForm() { wrap.style.cssText = 'display:flex;align-items:center;padding:8px 0;'; wrap.appendChild(wp.element); const txt = document.createElement('span'); - txt.textContent = 'Scanning ports 8000-8020 and 11434 for model servers...'; - txt.style.cssText = 'opacity:0.7;'; + txt.textContent = 'Scanning ports 8000-8020, 8080, 1234, 11434, and 11435 for model servers...'; + txt.style.cssText = 'font-size:12px;opacity:0.7;'; wrap.appendChild(txt); msg.appendChild(wrap); discoverBtn._wp = wp; @@ -1619,12 +1619,24 @@ function initEndpointForm() { } else { // Auto-add each discovered endpoint. Server dedupes on base_url // and returns `existing: true` for already-registered ones. + // Map fingerprinted provider IDs to friendly display names. + const _PROVIDER_DISPLAY = { + llamacpp: 'llama.cpp', lmstudio: 'LM Studio', vllm: 'vLLM', + ollama: 'Ollama', + }; let added = 0; let skipped = 0; for (const item of items) { const base = item.url.replace('/chat/completions', '').replace(/\/$/, ''); + const providerDisplay = _PROVIDER_DISPLAY[item.provider] || null; const fd = new FormData(); fd.append('base_url', base); + if (providerDisplay) { + // Use "Provider (host:port)" so the endpoint is immediately + // identifiable in the list, e.g. "llama.cpp (localhost:8080)". + const hostPart = base.replace(/^https?:\/\//, '').split('/')[0]; + fd.append('name', `${providerDisplay} (${hostPart})`); + } fd.append('endpoint_kind', 'local'); fd.append('model_refresh_mode', 'auto'); fd.append('skip_probe', 'false'); @@ -1638,7 +1650,12 @@ function initEndpointForm() { } } const totalModels = items.reduce((n, i) => n + (i.models ? i.models.length : 0), 0); - const parts = [`Found ${items.length} server${items.length !== 1 ? 's' : ''} with ${totalModels} model${totalModels !== 1 ? 's' : ''}`]; + const serverNames = items.map(i => + (_PROVIDER_DISPLAY[i.provider] || i.url.replace(/^https?:\/\//, '').split('/')[0]) + ); + const parts = [ + `Found ${items.length} server${items.length !== 1 ? 's' : ''} (${serverNames.join(', ')}) with ${totalModels} model${totalModels !== 1 ? 's' : ''}`, + ]; if (added) parts.push(`added ${added} new`); if (skipped) parts.push(`${skipped} already added`); msg.innerHTML = parts.join(' — '); diff --git a/static/js/providers.js b/static/js/providers.js index da50fff89..54556faeb 100644 --- a/static/js/providers.js +++ b/static/js/providers.js @@ -133,11 +133,20 @@ export function providerLabel(endpointUrl) { try { host = new URL(endpointUrl).hostname; } catch (_) { - // Not a full URL (e.g. bare host[:port]) — strip scheme/path/port best-effort. - host = endpointUrl.replace(/^[a-z]+:\/\//i, "").split("/")[0].split(":")[0]; + // Not a full URL (e.g. bare host[:port]) — strip scheme/path best-effort. + const stripped = endpointUrl.replace(/^[a-z]+:\/\//i, "").split("/")[0]; + const colonIdx = stripped.lastIndexOf(":"); + host = colonIdx >= 0 ? stripped.slice(0, colonIdx) : stripped; } if (!host) return null; - if (/^(localhost|127\.|0\.0\.0\.0|::1|192\.168\.|10\.|172\.(1[6-9]|2\d|3[01])\.)/i.test(host)) { + const isLoopback = /^(localhost|127\.|0\.0\.0\.0|::1)/.test(host); + if (isLoopback) { + // Don't name the serving tool from the port — it isn't authoritative + // (vLLM/SGLang/llama.cpp share 8000/8080). Discovery identifies the tool by + // probing /props and stores the result as the endpoint's name instead. + return "Local"; + } + if (/^(192\.168\.|10\.|172\.(1[6-9]|2\d|3[01])\.)/i.test(host)) { return "Local"; } for (const [re, label] of _ENDPOINT_LABELS) { diff --git a/static/js/slashCommands.js b/static/js/slashCommands.js index 8ddb84c91..92adb8814 100644 --- a/static/js/slashCommands.js +++ b/static/js/slashCommands.js @@ -208,6 +208,8 @@ function _showSetupEndpointChoices() { '
http://localhost:11434/v1
' + '
or
' + '
http://llm-host.local:8000/v1
' + + '
or llama.cpp (llama-server):
' + + '
http://localhost:8080/v1
' + '' + '
' + '
' + SETUP_API_ICON + 'API setup
' + @@ -238,6 +240,12 @@ function _showSetupEndpointChoicesStreamed(options = {}) { text: 'http://llm-host.local:8000/v1', copyText: 'http://llm-host.local:8000/v1', }, + { kind: 'p', text: 'or llama.cpp (llama-server):' }, + { + kind: 'code', + text: 'http://localhost:8080/v1', + copyText: 'http://localhost:8080/v1', + }, { kind: 'heading', html: SETUP_API_ICON + 'API setup' }, { kind: 'p', text: 'Paste provider name then API key (example):' }, { diff --git a/tests/test_llamacpp_discovery.py b/tests/test_llamacpp_discovery.py new file mode 100644 index 000000000..d00f7b3b3 --- /dev/null +++ b/tests/test_llamacpp_discovery.py @@ -0,0 +1,178 @@ +"""Tests for llama.cpp (llama-server) local discovery: the default scan list +includes llama-server's port 8080, and `_fingerprint_provider` identifies a +llama-server via its native ``/props`` endpoint without misfiring on LM Studio, +Ollama, or plain OpenAI-compatible servers. + +Companion to test_lmstudio_discovery.py; the llama.cpp fingerprint is checked +*after* the LM Studio one, so LM Studio still wins when both could match. +""" +from src.model_discovery import ModelDiscovery + + +class _FakeResponse: + def __init__(self, payload, ok=True): + self._payload = payload + self.is_success = ok + + def json(self): + return self._payload + + +# ════════════════════════════════════════════════════════════ +# discover_models — scan list includes 8080 (llama-server default) +# ════════════════════════════════════════════════════════════ + +class TestLlamaCppScanPort: + def test_discover_models_scans_port_8080(self, monkeypatch): + """llama-server's default port 8080 must be among the scan targets.""" + discovery = ModelDiscovery(default_host="localhost") + scanned_ports = [] + + def fake_check_port(host, port): + scanned_ports.append(port) + return None + + monkeypatch.setattr(discovery, "_check_port", fake_check_port) + monkeypatch.setattr( + "src.model_discovery.discover_tailscale_hosts", lambda: [], + ) + + discovery.discover_models() + assert 8080 in scanned_ports + + +# ════════════════════════════════════════════════════════════ +# _fingerprint_provider — llama-server via /props +# ════════════════════════════════════════════════════════════ + +class TestLlamaCppFingerprint: + # A representative llama-server /props payload (trimmed to the keys the + # fingerprint relies on). + LLAMACPP_PROPS = { + "default_generation_settings": {"n_ctx": 4096, "temperature": 0.8}, + "total_slots": 1, + "chat_template": "{{ messages }}", + "model_path": "/models/gemma-4-12b-it-Q4_K_M.gguf", + } + + def test_llamacpp_props_detected(self, monkeypatch): + """A server that isn't LM Studio but answers /props as llama-server → + 'llamacpp'.""" + discovery = ModelDiscovery(default_host="localhost") + + def fake_get(url, timeout=None): + if url.endswith("/api/v1/models"): + # OpenAI-compatible shape, not the LM Studio native shape. + return _FakeResponse({"data": [{"id": "gemma-4-12b"}]}) + if url.endswith("/props"): + return _FakeResponse(self.LLAMACPP_PROPS) + return _FakeResponse({}, ok=False) + + monkeypatch.setattr("src.model_discovery.httpx.get", fake_get) + assert discovery._fingerprint_provider("localhost", 8080) == "llamacpp" + + def test_lmstudio_still_wins_when_both_match(self, monkeypatch): + """If /api/v1/models reports the LM Studio native shape, LM Studio is + returned even when /props would also match.""" + discovery = ModelDiscovery(default_host="localhost") + lmstudio_native = { + "models": [{"type": "llm", "key": "qwen3.6-27b", + "architecture": "qwen35", "format": "gguf"}] + } + + def fake_get(url, timeout=None): + if url.endswith("/api/v1/models"): + return _FakeResponse(lmstudio_native) + if url.endswith("/props"): + return _FakeResponse(self.LLAMACPP_PROPS) + return _FakeResponse({}, ok=False) + + monkeypatch.setattr("src.model_discovery.httpx.get", fake_get) + assert discovery._fingerprint_provider("localhost", 8080) == "lmstudio" + + def test_props_without_llamacpp_keys_not_detected(self, monkeypatch): + """A /props-style response lacking llama-server marker keys → None.""" + discovery = ModelDiscovery(default_host="localhost") + + def fake_get(url, timeout=None): + if url.endswith("/api/v1/models"): + return _FakeResponse({"data": []}) + if url.endswith("/props"): + return _FakeResponse({"unrelated": "value"}) + return _FakeResponse({}, ok=False) + + monkeypatch.setattr("src.model_discovery.httpx.get", fake_get) + assert discovery._fingerprint_provider("localhost", 8080) is None + + def test_props_unreachable_returns_none(self, monkeypatch): + """No /api/v1/models and a failing /props → None (not an exception).""" + discovery = ModelDiscovery(default_host="localhost") + + def fake_get(url, timeout=None): + if url.endswith("/api/v1/models"): + return _FakeResponse({}, ok=False) + raise OSError("connection refused") + + monkeypatch.setattr("src.model_discovery.httpx.get", fake_get) + assert discovery._fingerprint_provider("localhost", 8080) is None + + def test_check_port_attaches_llamacpp_provider(self, monkeypatch): + """End-to-end: _check_port tags a discovered llama-server as 'llamacpp'.""" + discovery = ModelDiscovery(default_host="localhost") + + def fake_get(url, timeout=None): + if url.endswith("/v1/models"): + return _FakeResponse({"data": [{"id": "gemma-4-12b"}]}) + if url.endswith("/api/v1/models"): + return _FakeResponse({"data": [{"id": "gemma-4-12b"}]}) + if url.endswith("/props"): + return _FakeResponse(self.LLAMACPP_PROPS) + return _FakeResponse({}, ok=False) + + monkeypatch.setattr("src.model_discovery.httpx.get", fake_get) + result = discovery._check_port("localhost", 8080) + assert result is not None + assert result["provider"] == "llamacpp" + assert result["models"] == ["gemma-4-12b"] + + +# ════════════════════════════════════════════════════════════ +# Docker loopback rewrite — host.docker.internal:8080 in scan +# ════════════════════════════════════════════════════════════ + +class TestDockerLoopbackScan: + def test_host_docker_internal_in_scan_hosts(self, monkeypatch): + """When no LLM_HOSTS env override is set, host.docker.internal must be + included in the scan host list so llama-server on the Docker host is + discovered from inside the container.""" + monkeypatch.delenv("LLM_HOSTS", raising=False) + monkeypatch.setattr( + "src.model_discovery.discover_tailscale_hosts", lambda: [], + ) + discovery = ModelDiscovery(default_host="localhost") + hosts = discovery._get_hosts() + assert "host.docker.internal" in hosts + + def test_discovered_endpoint_url_uses_provided_host(self, monkeypatch): + """When host.docker.internal:8080 is probed, the returned base_url + contains host.docker.internal — not a rewritten 127.0.0.1.""" + from src.model_discovery import ModelDiscovery as _MD + + discovery = _MD(default_host="localhost") + + def fake_get(url, timeout=None): + if url.endswith("/v1/models") or url.endswith("/api/v1/models"): + return _FakeResponse({"data": [{"id": "gemma-4-12b"}]}) + if url.endswith("/props"): + return _FakeResponse({ + "default_generation_settings": {"n_ctx": 4096}, + "total_slots": 1, + "chat_template": "{{ messages }}", + }) + return _FakeResponse({}, ok=False) + + monkeypatch.setattr("src.model_discovery.httpx.get", fake_get) + result = discovery._check_port("host.docker.internal", 8080) + assert result is not None + assert "host.docker.internal" in result["url"] + assert "127.0.0.1" not in result["url"] diff --git a/tests/test_provider_classification.py b/tests/test_provider_classification.py index 02f20d8ba..62c713e31 100644 --- a/tests/test_provider_classification.py +++ b/tests/test_provider_classification.py @@ -93,10 +93,19 @@ class TestProviderLabel: def test_known_labels(self, url, expected): assert _provider_label(url) == expected - def test_local_non_ollama_endpoint(self): - # A loopback host that isn't on the native Ollama /api path is just a - # generic local endpoint (e.g. an OpenAI-compatible local server). - assert _provider_label("http://localhost:8080/v1") == "local endpoint" + @pytest.mark.parametrize("url", [ + "http://localhost:8080/v1", + "http://127.0.0.1:8080/v1", + "http://localhost:8000/v1", + "http://localhost:1234/v1", + "http://localhost:9999/v1", + ]) + def test_local_non_ollama_endpoint(self, url): + # The serving tool is NOT inferred from the port: vLLM, SGLang, llama.cpp + # and plain OpenAI-compatible servers all share 8000/8080, so a port-only + # label would mislabel real setups. The tool is identified by /props + # fingerprinting during discovery; this helper stays neutral. + assert _provider_label(url) == "local endpoint" def test_unknown_host_returns_host(self): assert _provider_label("https://api.unknown-llm.example/v1") == "api.unknown-llm.example" diff --git a/tests/test_provider_label_js.py b/tests/test_provider_label_js.py new file mode 100644 index 000000000..39b1a1f5d --- /dev/null +++ b/tests/test_provider_label_js.py @@ -0,0 +1,54 @@ +"""providerLabel() in providers.js must NOT name the serving tool from the port, +mirroring the Python _provider_label() in src/llm_core.py. + +A port is not authoritative: vLLM, SGLang, llama.cpp and plain OpenAI-compatible +servers all routinely share 8000/8080, so a port-only label would mislabel real +setups (e.g. a vLLM box on :8080 shown as "llama.cpp"). The actual tool is +identified by probing /props during discovery and stored as the endpoint's name. +The rule here: loopback → "Local"; private-LAN IPs → "Local"; known remote +provider hosts → their provider name. +""" +import json +import re +import shutil +import subprocess +from pathlib import Path + +import pytest + +_REPO = Path(__file__).resolve().parent.parent +_SRC = _REPO / "static" / "js" / "providers.js" +_HAS_NODE = shutil.which("node") is not None + + +def _provider_label(url: str) -> str | None: + src = _SRC.read_text(encoding="utf-8") + # Strip the `export` keyword so the module runs standalone. + src_runnable = src.replace("export function providerLabel", "function providerLabel") + src_runnable = src_runnable.replace("export default {", "const _default = {") + js = src_runnable + f"\nconsole.log(JSON.stringify(providerLabel({json.dumps(url)})));" + proc = subprocess.run( + ["node", "--input-type=module"], + input=js, capture_output=True, text=True, encoding="utf-8", + cwd=str(_REPO), timeout=30, + ) + assert proc.returncode == 0, proc.stderr + return json.loads(proc.stdout.strip()) + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +@pytest.mark.parametrize("url,expected", [ + # Loopback never names the tool from the port — it isn't authoritative. + ("http://localhost:8080/v1", "Local"), + ("http://127.0.0.1:8080/v1", "Local"), + ("http://localhost:8000/v1", "Local"), + ("http://localhost:1234/v1", "Local"), + ("http://localhost:11434/api", "Local"), + ("http://localhost:9999/v1", "Local"), + # Known remote provider hosts are still labeled by host suffix. + ("https://api.openai.com/v1", "OpenAI"), + ("https://api.groq.com/openai/v1","Groq"), + ("http://192.168.1.50:8080", "Local"), # private LAN: no port branding +]) +def test_provider_label_neutral_for_loopback(url, expected): + assert _provider_label(url) == expected diff --git a/tests/test_setup_llamacpp_hint_js.py b/tests/test_setup_llamacpp_hint_js.py new file mode 100644 index 000000000..2eef9483c --- /dev/null +++ b/tests/test_setup_llamacpp_hint_js.py @@ -0,0 +1,17 @@ +"""The /setup guide must offer a llama.cpp (llama-server) local example. + +Without it, the port-8080 "llama.cpp" provider label (src/llm_core.py +_provider_label) is never reachable from first-run setup — a user pasting a +local endpoint only saw the Ollama and generic examples. Both the static-HTML +and the streamed-blocks renderings of the setup guide must carry the example. +""" +from pathlib import Path + +_SRC = Path(__file__).resolve().parent.parent / "static" / "js" / "slashCommands.js" + + +def test_setup_guide_offers_llamacpp_local_example(): + src = _SRC.read_text(encoding="utf-8") + # The example URL appears in both the HTML-string and streamed renderings. + assert src.count("http://localhost:8080/v1") >= 2 + assert "llama.cpp (llama-server)" in src