feat(discovery): detect llama.cpp servers and label local providers (#4729)

* feat(discovery): detect llama.cpp servers and label local providers Scan port 8080 (llama-server) and 11435 (APFEL) during discovery, fingerprint llama.cpp via its native /props endpoint, and label well-known local serving ports (8080 llama.cpp, 8000 vLLM, 1234 LM Studio, 11434 Ollama) consistently in both the Python provider helper and the JS endpoint UI. Adds a llama.cpp hint to the /setup slash command. * fix(discovery): don't infer the serving tool from the port alone Per review: vLLM, SGLang, llama.cpp and plain OpenAI-compatible servers all share 8000/8080, so labeling by port mislabels real setups (a vLLM box on 8080 shown as llama.cpp). Drop the port->tool assertions from _provider_label and providerLabel; the authoritative signal is the /props fingerprint done during discovery, which is unchanged. Loopback now reads a neutral 'local endpoint' / 'Local'. Tests updated to assert the neutral labels.
2026-06-27 23:25:22 -04:00 · 2026-06-23 23:39:56 +02:00
parent 72c0bde8a9
commit e0ccf250a4
9 changed files with 330 additions and 15 deletions
@@ -0,0 +1,178 @@
+"""Tests for llama.cpp (llama-server) local discovery: the default scan list
+includes llama-server's port 8080, and `_fingerprint_provider` identifies a
+llama-server via its native ``/props`` endpoint without misfiring on LM Studio,
+Ollama, or plain OpenAI-compatible servers.
+
+Companion to test_lmstudio_discovery.py; the llama.cpp fingerprint is checked
+*after* the LM Studio one, so LM Studio still wins when both could match.
+"""
+from src.model_discovery import ModelDiscovery
+
+
+class _FakeResponse:
+    def __init__(self, payload, ok=True):
+        self._payload = payload
+        self.is_success = ok
+
+    def json(self):
+        return self._payload
+
+
+# ════════════════════════════════════════════════════════════
+# discover_models — scan list includes 8080 (llama-server default)
+# ════════════════════════════════════════════════════════════
+
+class TestLlamaCppScanPort:
+    def test_discover_models_scans_port_8080(self, monkeypatch):
+        """llama-server's default port 8080 must be among the scan targets."""
+        discovery = ModelDiscovery(default_host="localhost")
+        scanned_ports = []
+
+        def fake_check_port(host, port):
+            scanned_ports.append(port)
+            return None
+
+        monkeypatch.setattr(discovery, "_check_port", fake_check_port)
+        monkeypatch.setattr(
+            "src.model_discovery.discover_tailscale_hosts", lambda: [],
+        )
+
+        discovery.discover_models()
+        assert 8080 in scanned_ports
+
+
+# ════════════════════════════════════════════════════════════
+# _fingerprint_provider — llama-server via /props
+# ════════════════════════════════════════════════════════════
+
+class TestLlamaCppFingerprint:
+    # A representative llama-server /props payload (trimmed to the keys the
+    # fingerprint relies on).
+    LLAMACPP_PROPS = {
+        "default_generation_settings": {"n_ctx": 4096, "temperature": 0.8},
+        "total_slots": 1,
+        "chat_template": "{{ messages }}",
+        "model_path": "/models/gemma-4-12b-it-Q4_K_M.gguf",
+    }
+
+    def test_llamacpp_props_detected(self, monkeypatch):
+        """A server that isn't LM Studio but answers /props as llama-server →
+        'llamacpp'."""
+        discovery = ModelDiscovery(default_host="localhost")
+
+        def fake_get(url, timeout=None):
+            if url.endswith("/api/v1/models"):
+                # OpenAI-compatible shape, not the LM Studio native shape.
+                return _FakeResponse({"data": [{"id": "gemma-4-12b"}]})
+            if url.endswith("/props"):
+                return _FakeResponse(self.LLAMACPP_PROPS)
+            return _FakeResponse({}, ok=False)
+
+        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
+        assert discovery._fingerprint_provider("localhost", 8080) == "llamacpp"
+
+    def test_lmstudio_still_wins_when_both_match(self, monkeypatch):
+        """If /api/v1/models reports the LM Studio native shape, LM Studio is
+        returned even when /props would also match."""
+        discovery = ModelDiscovery(default_host="localhost")
+        lmstudio_native = {
+            "models": [{"type": "llm", "key": "qwen3.6-27b",
+                        "architecture": "qwen35", "format": "gguf"}]
+        }
+
+        def fake_get(url, timeout=None):
+            if url.endswith("/api/v1/models"):
+                return _FakeResponse(lmstudio_native)
+            if url.endswith("/props"):
+                return _FakeResponse(self.LLAMACPP_PROPS)
+            return _FakeResponse({}, ok=False)
+
+        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
+        assert discovery._fingerprint_provider("localhost", 8080) == "lmstudio"
+
+    def test_props_without_llamacpp_keys_not_detected(self, monkeypatch):
+        """A /props-style response lacking llama-server marker keys → None."""
+        discovery = ModelDiscovery(default_host="localhost")
+
+        def fake_get(url, timeout=None):
+            if url.endswith("/api/v1/models"):
+                return _FakeResponse({"data": []})
+            if url.endswith("/props"):
+                return _FakeResponse({"unrelated": "value"})
+            return _FakeResponse({}, ok=False)
+
+        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
+        assert discovery._fingerprint_provider("localhost", 8080) is None
+
+    def test_props_unreachable_returns_none(self, monkeypatch):
+        """No /api/v1/models and a failing /props → None (not an exception)."""
+        discovery = ModelDiscovery(default_host="localhost")
+
+        def fake_get(url, timeout=None):
+            if url.endswith("/api/v1/models"):
+                return _FakeResponse({}, ok=False)
+            raise OSError("connection refused")
+
+        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
+        assert discovery._fingerprint_provider("localhost", 8080) is None
+
+    def test_check_port_attaches_llamacpp_provider(self, monkeypatch):
+        """End-to-end: _check_port tags a discovered llama-server as 'llamacpp'."""
+        discovery = ModelDiscovery(default_host="localhost")
+
+        def fake_get(url, timeout=None):
+            if url.endswith("/v1/models"):
+                return _FakeResponse({"data": [{"id": "gemma-4-12b"}]})
+            if url.endswith("/api/v1/models"):
+                return _FakeResponse({"data": [{"id": "gemma-4-12b"}]})
+            if url.endswith("/props"):
+                return _FakeResponse(self.LLAMACPP_PROPS)
+            return _FakeResponse({}, ok=False)
+
+        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
+        result = discovery._check_port("localhost", 8080)
+        assert result is not None
+        assert result["provider"] == "llamacpp"
+        assert result["models"] == ["gemma-4-12b"]
+
+
+# ════════════════════════════════════════════════════════════
+# Docker loopback rewrite — host.docker.internal:8080 in scan
+# ════════════════════════════════════════════════════════════
+
+class TestDockerLoopbackScan:
+    def test_host_docker_internal_in_scan_hosts(self, monkeypatch):
+        """When no LLM_HOSTS env override is set, host.docker.internal must be
+        included in the scan host list so llama-server on the Docker host is
+        discovered from inside the container."""
+        monkeypatch.delenv("LLM_HOSTS", raising=False)
+        monkeypatch.setattr(
+            "src.model_discovery.discover_tailscale_hosts", lambda: [],
+        )
+        discovery = ModelDiscovery(default_host="localhost")
+        hosts = discovery._get_hosts()
+        assert "host.docker.internal" in hosts
+
+    def test_discovered_endpoint_url_uses_provided_host(self, monkeypatch):
+        """When host.docker.internal:8080 is probed, the returned base_url
+        contains host.docker.internal — not a rewritten 127.0.0.1."""
+        from src.model_discovery import ModelDiscovery as _MD
+
+        discovery = _MD(default_host="localhost")
+
+        def fake_get(url, timeout=None):
+            if url.endswith("/v1/models") or url.endswith("/api/v1/models"):
+                return _FakeResponse({"data": [{"id": "gemma-4-12b"}]})
+            if url.endswith("/props"):
+                return _FakeResponse({
+                    "default_generation_settings": {"n_ctx": 4096},
+                    "total_slots": 1,
+                    "chat_template": "{{ messages }}",
+                })
+            return _FakeResponse({}, ok=False)
+
+        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
+        result = discovery._check_port("host.docker.internal", 8080)
+        assert result is not None
+        assert "host.docker.internal" in result["url"]
+        assert "127.0.0.1" not in result["url"]
@@ -93,10 +93,19 @@ class TestProviderLabel:
    def test_known_labels(self, url, expected):
        assert _provider_label(url) == expected

-    def test_local_non_ollama_endpoint(self):
-        # A loopback host that isn't on the native Ollama /api path is just a
-        # generic local endpoint (e.g. an OpenAI-compatible local server).
-        assert _provider_label("http://localhost:8080/v1") == "local endpoint"
+    @pytest.mark.parametrize("url", [
+        "http://localhost:8080/v1",
+        "http://127.0.0.1:8080/v1",
+        "http://localhost:8000/v1",
+        "http://localhost:1234/v1",
+        "http://localhost:9999/v1",
+    ])
+    def test_local_non_ollama_endpoint(self, url):
+        # The serving tool is NOT inferred from the port: vLLM, SGLang, llama.cpp
+        # and plain OpenAI-compatible servers all share 8000/8080, so a port-only
+        # label would mislabel real setups. The tool is identified by /props
+        # fingerprinting during discovery; this helper stays neutral.
+        assert _provider_label(url) == "local endpoint"

    def test_unknown_host_returns_host(self):
        assert _provider_label("https://api.unknown-llm.example/v1") == "api.unknown-llm.example"
@@ -0,0 +1,54 @@
+"""providerLabel() in providers.js must NOT name the serving tool from the port,
+mirroring the Python _provider_label() in src/llm_core.py.
+
+A port is not authoritative: vLLM, SGLang, llama.cpp and plain OpenAI-compatible
+servers all routinely share 8000/8080, so a port-only label would mislabel real
+setups (e.g. a vLLM box on :8080 shown as "llama.cpp"). The actual tool is
+identified by probing /props during discovery and stored as the endpoint's name.
+The rule here: loopback → "Local"; private-LAN IPs → "Local"; known remote
+provider hosts → their provider name.
+"""
+import json
+import re
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+_REPO = Path(__file__).resolve().parent.parent
+_SRC = _REPO / "static" / "js" / "providers.js"
+_HAS_NODE = shutil.which("node") is not None
+
+
+def _provider_label(url: str) -> str | None:
+    src = _SRC.read_text(encoding="utf-8")
+    # Strip the `export` keyword so the module runs standalone.
+    src_runnable = src.replace("export function providerLabel", "function providerLabel")
+    src_runnable = src_runnable.replace("export default {", "const _default = {")
+    js = src_runnable + f"\nconsole.log(JSON.stringify(providerLabel({json.dumps(url)})));"
+    proc = subprocess.run(
+        ["node", "--input-type=module"],
+        input=js, capture_output=True, text=True, encoding="utf-8",
+        cwd=str(_REPO), timeout=30,
+    )
+    assert proc.returncode == 0, proc.stderr
+    return json.loads(proc.stdout.strip())
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+@pytest.mark.parametrize("url,expected", [
+    # Loopback never names the tool from the port — it isn't authoritative.
+    ("http://localhost:8080/v1",      "Local"),
+    ("http://127.0.0.1:8080/v1",      "Local"),
+    ("http://localhost:8000/v1",      "Local"),
+    ("http://localhost:1234/v1",      "Local"),
+    ("http://localhost:11434/api",    "Local"),
+    ("http://localhost:9999/v1",      "Local"),
+    # Known remote provider hosts are still labeled by host suffix.
+    ("https://api.openai.com/v1",     "OpenAI"),
+    ("https://api.groq.com/openai/v1","Groq"),
+    ("http://192.168.1.50:8080",      "Local"),      # private LAN: no port branding
+])
+def test_provider_label_neutral_for_loopback(url, expected):
+    assert _provider_label(url) == expected
@@ -0,0 +1,17 @@
+"""The /setup guide must offer a llama.cpp (llama-server) local example.
+
+Without it, the port-8080 "llama.cpp" provider label (src/llm_core.py
+_provider_label) is never reachable from first-run setup — a user pasting a
+local endpoint only saw the Ollama and generic examples. Both the static-HTML
+and the streamed-blocks renderings of the setup guide must carry the example.
+"""
+from pathlib import Path
+
+_SRC = Path(__file__).resolve().parent.parent / "static" / "js" / "slashCommands.js"
+
+
+def test_setup_guide_offers_llamacpp_local_example():
+    src = _SRC.read_text(encoding="utf-8")
+    # The example URL appears in both the HTML-string and streamed renderings.
+    assert src.count("http://localhost:8080/v1") >= 2
+    assert "llama.cpp (llama-server)" in src