odysseus/tests/test_llamacpp_discovery.py

"""Tests for llama.cpp (llama-server) local discovery: the default scan list
includes llama-server's port 8080, and `_fingerprint_provider` identifies a
llama-server via its native ``/props`` endpoint without misfiring on LM Studio,
Ollama, or plain OpenAI-compatible servers.

Companion to test_lmstudio_discovery.py; the llama.cpp fingerprint is checked
*after* the LM Studio one, so LM Studio still wins when both could match.
"""
from src.model_discovery import ModelDiscovery


class _FakeResponse:
    def __init__(self, payload, ok=True):
        self._payload = payload
        self.is_success = ok

    def json(self):
        return self._payload


# ════════════════════════════════════════════════════════════
# discover_models — scan list includes 8080 (llama-server default)
# ════════════════════════════════════════════════════════════

class TestLlamaCppScanPort:
    def test_discover_models_scans_port_8080(self, monkeypatch):
        """llama-server's default port 8080 must be among the scan targets."""
        discovery = ModelDiscovery(default_host="localhost")
        scanned_ports = []

        def fake_check_port(host, port):
            scanned_ports.append(port)
            return None

        monkeypatch.setattr(discovery, "_check_port", fake_check_port)
        monkeypatch.setattr(
            "src.model_discovery.discover_tailscale_hosts", lambda: [],
        )

        discovery.discover_models()
        assert 8080 in scanned_ports


# ════════════════════════════════════════════════════════════
# _fingerprint_provider — llama-server via /props
# ════════════════════════════════════════════════════════════

class TestLlamaCppFingerprint:
    # A representative llama-server /props payload (trimmed to the keys the
    # fingerprint relies on).
    LLAMACPP_PROPS = {
        "default_generation_settings": {"n_ctx": 4096, "temperature": 0.8},
        "total_slots": 1,
        "chat_template": "{{ messages }}",
        "model_path": "/models/gemma-4-12b-it-Q4_K_M.gguf",
    }

    def test_llamacpp_props_detected(self, monkeypatch):
        """A server that isn't LM Studio but answers /props as llama-server →
        'llamacpp'."""
        discovery = ModelDiscovery(default_host="localhost")

        def fake_get(url, timeout=None):
            if url.endswith("/api/v1/models"):
                # OpenAI-compatible shape, not the LM Studio native shape.
                return _FakeResponse({"data": [{"id": "gemma-4-12b"}]})
            if url.endswith("/props"):
                return _FakeResponse(self.LLAMACPP_PROPS)
            return _FakeResponse({}, ok=False)

        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
        assert discovery._fingerprint_provider("localhost", 8080) == "llamacpp"

    def test_lmstudio_still_wins_when_both_match(self, monkeypatch):
        """If /api/v1/models reports the LM Studio native shape, LM Studio is
        returned even when /props would also match."""
        discovery = ModelDiscovery(default_host="localhost")
        lmstudio_native = {
            "models": [{"type": "llm", "key": "qwen3.6-27b",
                        "architecture": "qwen35", "format": "gguf"}]
        }

        def fake_get(url, timeout=None):
            if url.endswith("/api/v1/models"):
                return _FakeResponse(lmstudio_native)
            if url.endswith("/props"):
                return _FakeResponse(self.LLAMACPP_PROPS)
            return _FakeResponse({}, ok=False)

        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
        assert discovery._fingerprint_provider("localhost", 8080) == "lmstudio"

    def test_props_without_llamacpp_keys_not_detected(self, monkeypatch):
        """A /props-style response lacking llama-server marker keys → None."""
        discovery = ModelDiscovery(default_host="localhost")

        def fake_get(url, timeout=None):
            if url.endswith("/api/v1/models"):
                return _FakeResponse({"data": []})
            if url.endswith("/props"):
                return _FakeResponse({"unrelated": "value"})
            return _FakeResponse({}, ok=False)

        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
        assert discovery._fingerprint_provider("localhost", 8080) is None

    def test_props_unreachable_returns_none(self, monkeypatch):
        """No /api/v1/models and a failing /props → None (not an exception)."""
        discovery = ModelDiscovery(default_host="localhost")

        def fake_get(url, timeout=None):
            if url.endswith("/api/v1/models"):
                return _FakeResponse({}, ok=False)
            raise OSError("connection refused")

        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
        assert discovery._fingerprint_provider("localhost", 8080) is None

    def test_check_port_attaches_llamacpp_provider(self, monkeypatch):
        """End-to-end: _check_port tags a discovered llama-server as 'llamacpp'."""
        discovery = ModelDiscovery(default_host="localhost")

        def fake_get(url, timeout=None):
            if url.endswith("/v1/models"):
                return _FakeResponse({"data": [{"id": "gemma-4-12b"}]})
            if url.endswith("/api/v1/models"):
                return _FakeResponse({"data": [{"id": "gemma-4-12b"}]})
            if url.endswith("/props"):
                return _FakeResponse(self.LLAMACPP_PROPS)
            return _FakeResponse({}, ok=False)

        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
        result = discovery._check_port("localhost", 8080)
        assert result is not None
        assert result["provider"] == "llamacpp"
        assert result["models"] == ["gemma-4-12b"]


# ════════════════════════════════════════════════════════════
# Docker loopback rewrite — host.docker.internal:8080 in scan
# ════════════════════════════════════════════════════════════

class TestDockerLoopbackScan:
    def test_host_docker_internal_in_scan_hosts(self, monkeypatch):
        """When no LLM_HOSTS env override is set, host.docker.internal must be
        included in the scan host list so llama-server on the Docker host is
        discovered from inside the container."""
        monkeypatch.delenv("LLM_HOSTS", raising=False)
        monkeypatch.setattr(
            "src.model_discovery.discover_tailscale_hosts", lambda: [],
        )
        discovery = ModelDiscovery(default_host="localhost")
        hosts = discovery._get_hosts()
        assert "host.docker.internal" in hosts

    def test_discovered_endpoint_url_uses_provided_host(self, monkeypatch):
        """When host.docker.internal:8080 is probed, the returned base_url
        contains host.docker.internal — not a rewritten 127.0.0.1."""
        from src.model_discovery import ModelDiscovery as _MD

        discovery = _MD(default_host="localhost")

        def fake_get(url, timeout=None):
            if url.endswith("/v1/models") or url.endswith("/api/v1/models"):
                return _FakeResponse({"data": [{"id": "gemma-4-12b"}]})
            if url.endswith("/props"):
                return _FakeResponse({
                    "default_generation_settings": {"n_ctx": 4096},
                    "total_slots": 1,
                    "chat_template": "{{ messages }}",
                })
            return _FakeResponse({}, ok=False)

        monkeypatch.setattr("src.model_discovery.httpx.get", fake_get)
        result = discovery._check_port("host.docker.internal", 8080)
        assert result is not None
        assert "host.docker.internal" in result["url"]
        assert "127.0.0.1" not in result["url"]