From 20cf323ca47cd9746127e1f88f93283dd7fcb28b Mon Sep 17 00:00:00 2001 From: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com> Date: Sat, 27 Jun 2026 21:46:33 +0100 Subject: [PATCH] test: split provider detection tests (#4933) --- ...py => test_provider_detection_builders.py} | 68 +------------------ tests/test_provider_detection_detect.py | 47 +++++++++++++ tests/test_provider_detection_host_match.py | 37 ++++++++++ 3 files changed, 85 insertions(+), 67 deletions(-) rename tests/{test_provider_detection.py => test_provider_detection_builders.py} (57%) create mode 100644 tests/test_provider_detection_detect.py create mode 100644 tests/test_provider_detection_host_match.py diff --git a/tests/test_provider_detection.py b/tests/test_provider_detection_builders.py similarity index 57% rename from tests/test_provider_detection.py rename to tests/test_provider_detection_builders.py index a97b419d6..82ed8bd2c 100644 --- a/tests/test_provider_detection.py +++ b/tests/test_provider_detection_builders.py @@ -1,4 +1,4 @@ -"""Provider detection tests (re: #768). +"""Provider detection tests — build_chat_url / build_models_url routing (re: #768). These import the *real* helpers from ``src.llm_core`` (not local copies) so a regression in hostname matching is actually caught. The point of the change @@ -13,72 +13,6 @@ from src import endpoint_resolver from src.endpoint_resolver import build_chat_url, build_models_url -class TestHostMatch: - def test_exact_host(self): - assert llm_core._host_match("https://anthropic.com/v1", "anthropic.com") - - def test_subdomain(self): - assert llm_core._host_match("https://api.anthropic.com/v1", "anthropic.com") - - def test_multiple_domains(self): - assert llm_core._host_match("https://api.together.ai/v1", "together.xyz", "together.ai") - - def test_trailing_dot_fqdn(self): - # A fully-qualified host with a trailing dot is legal and resolvable. - assert llm_core._host_match("https://api.anthropic.com./v1", "anthropic.com") - - def test_domain_in_path_does_not_match(self): - assert not llm_core._host_match("https://myproxy.internal/anthropic.com/v1", "anthropic.com") - - def test_domain_in_query_does_not_match(self): - assert not llm_core._host_match("https://example.com/v1?ref=anthropic.com", "anthropic.com") - - def test_lookalike_host_does_not_match(self): - assert not llm_core._host_match("https://anthropic.com.example/v1", "anthropic.com") - - def test_none_and_empty_safe(self): - assert not llm_core._host_match(None, "anthropic.com") - assert not llm_core._host_match("", "anthropic.com") - - -class TestDetectProviderRealHosts: - def test_chatgpt_subscription_codex_backend(self): - assert llm_core._detect_provider("https://chatgpt.com/backend-api/codex") == "chatgpt-subscription" - assert llm_core._detect_provider("https://chatgpt.com/backend-api/codex/responses") == "chatgpt-subscription" - - def test_anthropic(self): - assert llm_core._detect_provider("https://api.anthropic.com") == "anthropic" - - def test_openrouter(self): - assert llm_core._detect_provider("https://openrouter.ai/api/v1") == "openrouter" - - def test_groq_openai_compat_path(self): - # Groq's base carries an /openai/v1 path; detection must still see the host. - assert llm_core._detect_provider("https://api.groq.com/openai/v1") == "groq" - - def test_ollama_native_unchanged(self): - assert llm_core._detect_provider("https://ollama.com/api") == "ollama" - - def test_unknown_host_defaults_to_openai(self): - assert llm_core._detect_provider("https://api.example.com/v1") == "openai" - - -class TestDetectProviderRejectsSubstringFalsePositives: - """The regression that motivated #768: substring matching mislabeled these.""" - - def test_provider_domain_in_path(self): - assert llm_core._detect_provider("https://myproxy.internal/anthropic.com/v1") == "openai" - - def test_provider_domain_in_query(self): - assert llm_core._detect_provider("https://example.com/v1?ref=anthropic.com") == "openai" - - def test_lookalike_host(self): - assert llm_core._detect_provider("https://anthropic.com.example/v1") == "openai" - - def test_none_safe(self): - assert llm_core._detect_provider(None) == "openai" - - class TestBuildersRejectLookalikeHosts: """build_chat_url / build_models_url must route look-alike and domain-in-path hosts to the OpenAI-compatible default, not the diff --git a/tests/test_provider_detection_detect.py b/tests/test_provider_detection_detect.py new file mode 100644 index 000000000..8731a00d5 --- /dev/null +++ b/tests/test_provider_detection_detect.py @@ -0,0 +1,47 @@ +"""Provider detection tests — _detect_provider real hosts and false-positive rejection (re: #768). + +These import the *real* helpers from ``src.llm_core`` (not local copies) so a +regression in hostname matching is actually caught. The point of the change +under test is that provider detection keys off the URL's *hostname*, not a +substring of the whole URL — so a domain appearing in a path/query, or a +look-alike host, must not be misclassified. +""" +from src import llm_core + + +class TestDetectProviderRealHosts: + def test_chatgpt_subscription_codex_backend(self): + assert llm_core._detect_provider("https://chatgpt.com/backend-api/codex") == "chatgpt-subscription" + assert llm_core._detect_provider("https://chatgpt.com/backend-api/codex/responses") == "chatgpt-subscription" + + def test_anthropic(self): + assert llm_core._detect_provider("https://api.anthropic.com") == "anthropic" + + def test_openrouter(self): + assert llm_core._detect_provider("https://openrouter.ai/api/v1") == "openrouter" + + def test_groq_openai_compat_path(self): + # Groq's base carries an /openai/v1 path; detection must still see the host. + assert llm_core._detect_provider("https://api.groq.com/openai/v1") == "groq" + + def test_ollama_native_unchanged(self): + assert llm_core._detect_provider("https://ollama.com/api") == "ollama" + + def test_unknown_host_defaults_to_openai(self): + assert llm_core._detect_provider("https://api.example.com/v1") == "openai" + + +class TestDetectProviderRejectsSubstringFalsePositives: + """The regression that motivated #768: substring matching mislabeled these.""" + + def test_provider_domain_in_path(self): + assert llm_core._detect_provider("https://myproxy.internal/anthropic.com/v1") == "openai" + + def test_provider_domain_in_query(self): + assert llm_core._detect_provider("https://example.com/v1?ref=anthropic.com") == "openai" + + def test_lookalike_host(self): + assert llm_core._detect_provider("https://anthropic.com.example/v1") == "openai" + + def test_none_safe(self): + assert llm_core._detect_provider(None) == "openai" diff --git a/tests/test_provider_detection_host_match.py b/tests/test_provider_detection_host_match.py new file mode 100644 index 000000000..487b7dc87 --- /dev/null +++ b/tests/test_provider_detection_host_match.py @@ -0,0 +1,37 @@ +"""Provider detection tests — hostname matching helpers (re: #768). + +These import the *real* helpers from ``src.llm_core`` (not local copies) so a +regression in hostname matching is actually caught. The point of the change +under test is that provider detection keys off the URL's *hostname*, not a +substring of the whole URL — so a domain appearing in a path/query, or a +look-alike host, must not be misclassified. +""" +from src import llm_core + + +class TestHostMatch: + def test_exact_host(self): + assert llm_core._host_match("https://anthropic.com/v1", "anthropic.com") + + def test_subdomain(self): + assert llm_core._host_match("https://api.anthropic.com/v1", "anthropic.com") + + def test_multiple_domains(self): + assert llm_core._host_match("https://api.together.ai/v1", "together.xyz", "together.ai") + + def test_trailing_dot_fqdn(self): + # A fully-qualified host with a trailing dot is legal and resolvable. + assert llm_core._host_match("https://api.anthropic.com./v1", "anthropic.com") + + def test_domain_in_path_does_not_match(self): + assert not llm_core._host_match("https://myproxy.internal/anthropic.com/v1", "anthropic.com") + + def test_domain_in_query_does_not_match(self): + assert not llm_core._host_match("https://example.com/v1?ref=anthropic.com", "anthropic.com") + + def test_lookalike_host_does_not_match(self): + assert not llm_core._host_match("https://anthropic.com.example/v1", "anthropic.com") + + def test_none_and_empty_safe(self): + assert not llm_core._host_match(None, "anthropic.com") + assert not llm_core._host_match("", "anthropic.com")