diff --git a/src/llm_core.py b/src/llm_core.py
index 89c153809..88061c9ea 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -457,15 +457,25 @@ def _detect_provider(url: str) -> str:
 
 def _is_self_hosted_openai_compatible(url: str) -> bool:
     """True for custom/local OpenAI-compatible servers (llama.cpp, LM Studio,
-    vLLM, text-generation-webui, etc.) as opposed to api.openai.com itself.
+    vLLM, text-generation-webui, etc.) as opposed to cloud APIs.
 
     Used to gate llama.cpp-server-specific payload extras (``session_id``,
-    ``cache_prompt``) — sending unrecognized top-level fields to OpenAI's
-    actual API returns a 400 ("Unrecognized request argument"), but
-    self-hosted servers generally ignore unknown fields and many (notably
-    llama.cpp's server) use them for KV-cache slot affinity (issue #2927).
+    ``cache_prompt``) used for KV-cache slot affinity (issue #2927). Strict
+    cloud providers reject unrecognized top-level fields (api.openai.com
+    returns 400, Mistral returns 422 "extra_forbidden", issue #3793), and any
+    unknown OpenAI-compatible host used to be treated as self-hosted, so those
+    fields leaked to every strict provider added as a custom endpoint.
+
+    A server only counts as self-hosted when it also resolves as local:
+    loopback/private/tailscale host, or the endpoint explicitly configured
+    with kind "local". A self-hosted server exposed via a public hostname
+    loses the affinity hint unless its endpoint kind is set to "local" -
+    a lost perf hint, versus a hard 4xx on every request the other way.
     """
-    return _detect_provider(url) == "openai" and not _host_match(url, "openai.com")
+    if _detect_provider(url) != "openai" or _host_match(url, "openai.com"):
+        return False
+    from src.model_context import is_local_endpoint
+    return is_local_endpoint(url)
 
 
 def _apply_local_cache_affinity(payload: Dict, url: str, session_id: Optional[str]) -> None:
diff --git a/src/model_context.py b/src/model_context.py
index a2ce9f638..0b04b20cc 100644
--- a/src/model_context.py
+++ b/src/model_context.py
@@ -5,6 +5,7 @@ Query and cache model context window sizes from OpenAI-compatible APIs.
 Provides token estimation for context usage tracking.
 """
 
+import ipaddress
 import logging
 import sys
 from typing import Dict, List, Optional, Tuple
@@ -19,7 +20,20 @@ _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.interna
 _PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
                      "172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
                      "172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
-                     "172.30.", "172.31.", "192.168.", "100.")
+                     "172.30.", "172.31.", "192.168.")
+
+# Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
+# A bare "100." prefix would classify public addresses (e.g. AWS ranges
+# under 100.x outside the CGNAT block) as local; routes/model_routes.py
+# already narrows this the same way for endpoint classification.
+_TAILSCALE_CGNAT = ipaddress.ip_network("100.64.0.0/10")
+
+
+def _in_tailscale_range(host: str) -> bool:
+    try:
+        return ipaddress.ip_address(host) in _TAILSCALE_CGNAT
+    except ValueError:
+        return False
 
 
 def _normalize_base_for_compare(url: str) -> str:
@@ -64,7 +78,7 @@ def _configured_endpoint_kind(url: str) -> Optional[str]:
         return None
 
 
-def _is_local_endpoint(url: str) -> bool:
+def is_local_endpoint(url: str) -> bool:
     """Check if URL points to a local/private/tailscale address."""
     kind = _configured_endpoint_kind(url)
     if kind in ("api", "proxy"):
@@ -73,7 +87,7 @@ def _is_local_endpoint(url: str) -> bool:
         return True
     try:
         host = urlparse(url).hostname or ""
-        return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES)
+        return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
     except Exception:
         return False
 
@@ -219,7 +233,7 @@ def get_context_length(endpoint_url: str, model: str) -> int:
     Falls back to DEFAULT_CONTEXT if unavailable.
     """
     configured_kind = _configured_endpoint_kind(endpoint_url)
-    is_local = _is_local_endpoint(endpoint_url)
+    is_local = is_local_endpoint(endpoint_url)
     # Key on (endpoint_url, model): the same model id can be served by two
     # different remote endpoints with different real context windows (e.g. a
     # capped proxy vs. the full provider), so caching by model id alone would
@@ -273,7 +287,7 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
         return DEFAULT_CONTEXT
 
     # Try llama.cpp /slots endpoint first — reports actual serving context
-    if _is_local_endpoint(endpoint_url):
+    if is_local_endpoint(endpoint_url):
         try:
             base = endpoint_url.split("/v1")[0] if "/v1" in endpoint_url else endpoint_url.rsplit("/", 1)[0]
             r = httpx.get(f"{base}/slots", timeout=REQUEST_TIMEOUT)
@@ -337,7 +351,7 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
     # For local/self-hosted endpoints, trust the API value (user set --max-model-len)
     # For cloud APIs, use the larger value (API can report low defaults)
     if api_ctx and known:
-        _is_local = _is_local_endpoint(endpoint_url)
+        _is_local = is_local_endpoint(endpoint_url)
         if _is_local and api_ctx < known:
             logger.info(f"Local endpoint reports {api_ctx} for {model} (known max: {known}) — using API value")
             return api_ctx
diff --git a/tests/test_cache_affinity_local_only.py b/tests/test_cache_affinity_local_only.py
new file mode 100644
index 000000000..3fe8a10cc
--- /dev/null
+++ b/tests/test_cache_affinity_local_only.py
@@ -0,0 +1,94 @@
+"""llama.cpp slot-affinity fields must never reach cloud providers (#3793).
+
+_apply_local_cache_affinity adds session_id + cache_prompt to outgoing
+payloads for KV-cache slot affinity (#2927). The old gate treated any unknown
+OpenAI-compatible host as self-hosted, so strict cloud APIs added as custom
+endpoints (Mistral at api.mistral.ai) received the extra fields and rejected
+every request with 422 extra_forbidden. Self-hosted now also requires the
+endpoint to resolve as local: loopback/private/tailscale host, or endpoint
+kind explicitly configured as "local".
+"""
+import pytest
+
+import src.llm_core as llm_core
+import src.model_context as model_context
+
+
+def _affinity_fields(url, monkeypatch, kind=None):
+    monkeypatch.setattr(model_context, "_configured_endpoint_kind", lambda _u: kind)
+    payload = {}
+    llm_core._apply_local_cache_affinity(payload, url, "sess-123")
+    return payload
+
+
+def test_mistral_cloud_api_gets_no_affinity_fields(monkeypatch):
+    # The #3793 repro: Mistral rejects unknown body fields with 422.
+    payload = _affinity_fields("https://api.mistral.ai/v1", monkeypatch)
+    assert payload == {}
+
+
+def test_openai_api_gets_no_affinity_fields(monkeypatch):
+    payload = _affinity_fields("https://api.openai.com/v1", monkeypatch)
+    assert payload == {}
+
+
+def test_unknown_public_host_gets_no_affinity_fields(monkeypatch):
+    # Any strict cloud provider added as a custom endpoint, not just Mistral.
+    payload = _affinity_fields("https://llm.example-cloud.com/v1", monkeypatch)
+    assert payload == {}
+
+
+def test_localhost_server_gets_affinity_fields(monkeypatch):
+    payload = _affinity_fields("http://localhost:8080/v1", monkeypatch)
+    assert payload == {"session_id": "sess-123", "cache_prompt": True}
+
+
+def test_private_lan_server_gets_affinity_fields(monkeypatch):
+    payload = _affinity_fields("http://192.168.1.50:8000/v1", monkeypatch)
+    assert payload == {"session_id": "sess-123", "cache_prompt": True}
+
+
+def test_public_host_with_local_kind_override_gets_affinity_fields(monkeypatch):
+    # Escape hatch: a self-hosted llama.cpp exposed via a tunnel keeps the
+    # slot-affinity hint when its endpoint kind is configured as "local".
+    payload = _affinity_fields("https://my-llama.example.com/v1", monkeypatch, kind="local")
+    assert payload == {"session_id": "sess-123", "cache_prompt": True}
+
+
+def test_no_session_id_is_a_noop(monkeypatch):
+    monkeypatch.setattr(model_context, "_configured_endpoint_kind", lambda _u: None)
+    payload = {}
+    llm_core._apply_local_cache_affinity(payload, "http://localhost:8080/v1", None)
+    assert payload == {}
+
+
+# Cloud-host sweep absorbed from #3839 (credit: Shabablinchikow) - every cloud
+# API that falls through provider detection to the OpenAI-compatible default
+# must stay clean, not just the Mistral host from the original report.
+@pytest.mark.parametrize("url", [
+    "https://api.mistral.ai/v1/chat/completions",
+    "https://api.deepseek.com/v1/chat/completions",
+    "https://api.x.ai/v1/chat/completions",
+    "https://api.together.xyz/v1/chat/completions",
+    "https://api.fireworks.ai/inference/v1/chat/completions",
+    "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
+])
+def test_cloud_openai_compatible_hosts_get_no_affinity_fields(monkeypatch, url):
+    assert _affinity_fields(url, monkeypatch) == {}
+
+
+# Tailscale CGNAT boundaries (review finding on #3945): only 100.64.0.0/10 is
+# Tailscale; the rest of 100.0.0.0/8 contains public ranges, and a strict
+# provider addressed by one must not receive the llama.cpp extras.
+def test_host_just_below_cgnat_gets_no_affinity_fields(monkeypatch):
+    assert _affinity_fields("http://100.63.255.255/v1", monkeypatch) == {}
+
+
+def test_host_just_above_cgnat_gets_no_affinity_fields(monkeypatch):
+    assert _affinity_fields("http://100.128.0.1/v1", monkeypatch) == {}
+
+
+@pytest.mark.parametrize("host", ["100.64.0.1", "100.100.50.2", "100.127.255.254"])
+def test_hosts_inside_cgnat_get_affinity_fields(monkeypatch, host):
+    payload = _affinity_fields(f"http://{host}:8080/v1", monkeypatch)
+    assert payload == {"session_id": "sess-123", "cache_prompt": True}
diff --git a/tests/test_context_cache_per_endpoint.py b/tests/test_context_cache_per_endpoint.py
index 3bffd7bad..efabea46a 100644
--- a/tests/test_context_cache_per_endpoint.py
+++ b/tests/test_context_cache_per_endpoint.py
@@ -11,7 +11,7 @@ import src.model_context as mc
 
 def _setup(monkeypatch, windows):
     """windows: {endpoint_url: context_length}. Force the remote path."""
-    monkeypatch.setattr(mc, "_is_local_endpoint", lambda url: False)
+    monkeypatch.setattr(mc, "is_local_endpoint", lambda url: False)
     monkeypatch.setattr(mc, "_configured_endpoint_kind", lambda url: "api")
     monkeypatch.setattr(mc, "_query_context_length", lambda url, model: windows[url])
     mc._context_cache.clear()
diff --git a/tests/test_model_context.py b/tests/test_model_context.py
index 31a105c93..ba6556a44 100644
--- a/tests/test_model_context.py
+++ b/tests/test_model_context.py
@@ -6,7 +6,7 @@ import types
 import pytest
 
 import src.model_context as model_context
-from src.model_context import _is_local_endpoint, estimate_tokens, _lookup_known
+from src.model_context import is_local_endpoint, estimate_tokens, _lookup_known
 
 
 class _Column:
@@ -56,20 +56,20 @@ def _install_endpoint_db(monkeypatch, rows):
 
 class TestIsLocalEndpoint:
     def test_localhost(self):
-        assert _is_local_endpoint("http://localhost:5000/v1/chat/completions") is True
+        assert is_local_endpoint("http://localhost:5000/v1/chat/completions") is True
 
     def test_loopback_ipv4(self):
-        assert _is_local_endpoint("http://127.0.0.1:8080/v1/chat/completions") is True
+        assert is_local_endpoint("http://127.0.0.1:8080/v1/chat/completions") is True
 
     def test_private_192_168(self):
-        assert _is_local_endpoint("http://192.168.1.1:11434/v1/chat/completions") is True
+        assert is_local_endpoint("http://192.168.1.1:11434/v1/chat/completions") is True
 
     def test_private_10(self):
-        assert _is_local_endpoint("http://10.0.0.5:8000/v1/chat/completions") is True
+        assert is_local_endpoint("http://10.0.0.5:8000/v1/chat/completions") is True
 
     def test_tailscale_100(self):
         # 100.64.0.0/10 is the CGNAT range Tailscale uses.
-        assert _is_local_endpoint("http://100.64.0.1:5000/v1/chat/completions") is True
+        assert is_local_endpoint("http://100.64.0.1:5000/v1/chat/completions") is True
 
     def test_configured_tailscale_proxy_is_remote(self, monkeypatch):
         _install_endpoint_db(monkeypatch, [
@@ -81,19 +81,19 @@ class TestIsLocalEndpoint:
             )
         ])
 
-        assert _is_local_endpoint("http://100.117.136.97:34521/v1/chat/completions") is False
+        assert is_local_endpoint("http://100.117.136.97:34521/v1/chat/completions") is False
 
     def test_openai_is_remote(self):
-        assert _is_local_endpoint("https://api.openai.com/v1/chat/completions") is False
+        assert is_local_endpoint("https://api.openai.com/v1/chat/completions") is False
 
     def test_anthropic_is_remote(self):
-        assert _is_local_endpoint("https://api.anthropic.com/v1/messages") is False
+        assert is_local_endpoint("https://api.anthropic.com/v1/messages") is False
 
     def test_empty_url(self):
-        assert _is_local_endpoint("") is False
+        assert is_local_endpoint("") is False
 
     def test_malformed_url(self):
-        assert _is_local_endpoint("not-a-url") is False
+        assert is_local_endpoint("not-a-url") is False
 
 
 class TestEstimateTokens: