From 20cf323ca47cd9746127e1f88f93283dd7fcb28b Mon Sep 17 00:00:00 2001
From: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
Date: Sat, 27 Jun 2026 21:46:33 +0100
Subject: [PATCH] test: split provider detection tests (#4933)

---
 ...py => test_provider_detection_builders.py} | 68 +------------------
 tests/test_provider_detection_detect.py       | 47 +++++++++++++
 tests/test_provider_detection_host_match.py   | 37 ++++++++++
 3 files changed, 85 insertions(+), 67 deletions(-)
 rename tests/{test_provider_detection.py => test_provider_detection_builders.py} (57%)
 create mode 100644 tests/test_provider_detection_detect.py
 create mode 100644 tests/test_provider_detection_host_match.py

diff --git a/tests/test_provider_detection.py b/tests/test_provider_detection_builders.py
similarity index 57%
rename from tests/test_provider_detection.py
rename to tests/test_provider_detection_builders.py
index a97b419d6..82ed8bd2c 100644
--- a/tests/test_provider_detection.py
+++ b/tests/test_provider_detection_builders.py
@@ -1,4 +1,4 @@
-"""Provider detection tests (re: #768).
+"""Provider detection tests — build_chat_url / build_models_url routing (re: #768).
 
 These import the *real* helpers from ``src.llm_core`` (not local copies) so a
 regression in hostname matching is actually caught. The point of the change
@@ -13,72 +13,6 @@ from src import endpoint_resolver
 from src.endpoint_resolver import build_chat_url, build_models_url
 
 
-class TestHostMatch:
-    def test_exact_host(self):
-        assert llm_core._host_match("https://anthropic.com/v1", "anthropic.com")
-
-    def test_subdomain(self):
-        assert llm_core._host_match("https://api.anthropic.com/v1", "anthropic.com")
-
-    def test_multiple_domains(self):
-        assert llm_core._host_match("https://api.together.ai/v1", "together.xyz", "together.ai")
-
-    def test_trailing_dot_fqdn(self):
-        # A fully-qualified host with a trailing dot is legal and resolvable.
-        assert llm_core._host_match("https://api.anthropic.com./v1", "anthropic.com")
-
-    def test_domain_in_path_does_not_match(self):
-        assert not llm_core._host_match("https://myproxy.internal/anthropic.com/v1", "anthropic.com")
-
-    def test_domain_in_query_does_not_match(self):
-        assert not llm_core._host_match("https://example.com/v1?ref=anthropic.com", "anthropic.com")
-
-    def test_lookalike_host_does_not_match(self):
-        assert not llm_core._host_match("https://anthropic.com.example/v1", "anthropic.com")
-
-    def test_none_and_empty_safe(self):
-        assert not llm_core._host_match(None, "anthropic.com")
-        assert not llm_core._host_match("", "anthropic.com")
-
-
-class TestDetectProviderRealHosts:
-    def test_chatgpt_subscription_codex_backend(self):
-        assert llm_core._detect_provider("https://chatgpt.com/backend-api/codex") == "chatgpt-subscription"
-        assert llm_core._detect_provider("https://chatgpt.com/backend-api/codex/responses") == "chatgpt-subscription"
-
-    def test_anthropic(self):
-        assert llm_core._detect_provider("https://api.anthropic.com") == "anthropic"
-
-    def test_openrouter(self):
-        assert llm_core._detect_provider("https://openrouter.ai/api/v1") == "openrouter"
-
-    def test_groq_openai_compat_path(self):
-        # Groq's base carries an /openai/v1 path; detection must still see the host.
-        assert llm_core._detect_provider("https://api.groq.com/openai/v1") == "groq"
-
-    def test_ollama_native_unchanged(self):
-        assert llm_core._detect_provider("https://ollama.com/api") == "ollama"
-
-    def test_unknown_host_defaults_to_openai(self):
-        assert llm_core._detect_provider("https://api.example.com/v1") == "openai"
-
-
-class TestDetectProviderRejectsSubstringFalsePositives:
-    """The regression that motivated #768: substring matching mislabeled these."""
-
-    def test_provider_domain_in_path(self):
-        assert llm_core._detect_provider("https://myproxy.internal/anthropic.com/v1") == "openai"
-
-    def test_provider_domain_in_query(self):
-        assert llm_core._detect_provider("https://example.com/v1?ref=anthropic.com") == "openai"
-
-    def test_lookalike_host(self):
-        assert llm_core._detect_provider("https://anthropic.com.example/v1") == "openai"
-
-    def test_none_safe(self):
-        assert llm_core._detect_provider(None) == "openai"
-
-
 class TestBuildersRejectLookalikeHosts:
     """build_chat_url / build_models_url must route look-alike and
     domain-in-path hosts to the OpenAI-compatible default, not the
diff --git a/tests/test_provider_detection_detect.py b/tests/test_provider_detection_detect.py
new file mode 100644
index 000000000..8731a00d5
--- /dev/null
+++ b/tests/test_provider_detection_detect.py
@@ -0,0 +1,47 @@
+"""Provider detection tests — _detect_provider real hosts and false-positive rejection (re: #768).
+
+These import the *real* helpers from ``src.llm_core`` (not local copies) so a
+regression in hostname matching is actually caught. The point of the change
+under test is that provider detection keys off the URL's *hostname*, not a
+substring of the whole URL — so a domain appearing in a path/query, or a
+look-alike host, must not be misclassified.
+"""
+from src import llm_core
+
+
+class TestDetectProviderRealHosts:
+    def test_chatgpt_subscription_codex_backend(self):
+        assert llm_core._detect_provider("https://chatgpt.com/backend-api/codex") == "chatgpt-subscription"
+        assert llm_core._detect_provider("https://chatgpt.com/backend-api/codex/responses") == "chatgpt-subscription"
+
+    def test_anthropic(self):
+        assert llm_core._detect_provider("https://api.anthropic.com") == "anthropic"
+
+    def test_openrouter(self):
+        assert llm_core._detect_provider("https://openrouter.ai/api/v1") == "openrouter"
+
+    def test_groq_openai_compat_path(self):
+        # Groq's base carries an /openai/v1 path; detection must still see the host.
+        assert llm_core._detect_provider("https://api.groq.com/openai/v1") == "groq"
+
+    def test_ollama_native_unchanged(self):
+        assert llm_core._detect_provider("https://ollama.com/api") == "ollama"
+
+    def test_unknown_host_defaults_to_openai(self):
+        assert llm_core._detect_provider("https://api.example.com/v1") == "openai"
+
+
+class TestDetectProviderRejectsSubstringFalsePositives:
+    """The regression that motivated #768: substring matching mislabeled these."""
+
+    def test_provider_domain_in_path(self):
+        assert llm_core._detect_provider("https://myproxy.internal/anthropic.com/v1") == "openai"
+
+    def test_provider_domain_in_query(self):
+        assert llm_core._detect_provider("https://example.com/v1?ref=anthropic.com") == "openai"
+
+    def test_lookalike_host(self):
+        assert llm_core._detect_provider("https://anthropic.com.example/v1") == "openai"
+
+    def test_none_safe(self):
+        assert llm_core._detect_provider(None) == "openai"
diff --git a/tests/test_provider_detection_host_match.py b/tests/test_provider_detection_host_match.py
new file mode 100644
index 000000000..487b7dc87
--- /dev/null
+++ b/tests/test_provider_detection_host_match.py
@@ -0,0 +1,37 @@
+"""Provider detection tests — hostname matching helpers (re: #768).
+
+These import the *real* helpers from ``src.llm_core`` (not local copies) so a
+regression in hostname matching is actually caught. The point of the change
+under test is that provider detection keys off the URL's *hostname*, not a
+substring of the whole URL — so a domain appearing in a path/query, or a
+look-alike host, must not be misclassified.
+"""
+from src import llm_core
+
+
+class TestHostMatch:
+    def test_exact_host(self):
+        assert llm_core._host_match("https://anthropic.com/v1", "anthropic.com")
+
+    def test_subdomain(self):
+        assert llm_core._host_match("https://api.anthropic.com/v1", "anthropic.com")
+
+    def test_multiple_domains(self):
+        assert llm_core._host_match("https://api.together.ai/v1", "together.xyz", "together.ai")
+
+    def test_trailing_dot_fqdn(self):
+        # A fully-qualified host with a trailing dot is legal and resolvable.
+        assert llm_core._host_match("https://api.anthropic.com./v1", "anthropic.com")
+
+    def test_domain_in_path_does_not_match(self):
+        assert not llm_core._host_match("https://myproxy.internal/anthropic.com/v1", "anthropic.com")
+
+    def test_domain_in_query_does_not_match(self):
+        assert not llm_core._host_match("https://example.com/v1?ref=anthropic.com", "anthropic.com")
+
+    def test_lookalike_host_does_not_match(self):
+        assert not llm_core._host_match("https://anthropic.com.example/v1", "anthropic.com")
+
+    def test_none_and_empty_safe(self):
+        assert not llm_core._host_match(None, "anthropic.com")
+        assert not llm_core._host_match("", "anthropic.com")