From c3fcaf15b7fc20511a7d8fbf8ab24be82da6f266 Mon Sep 17 00:00:00 2001 From: Maruf Hasan <170166811+MarufHasan-dev@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:06:12 +0600 Subject: [PATCH] feat(providers): add NVIDIA AI provider endpoint support (#3456) * feat: add NVIDIA as an AI provider (integrate.api.nvidia.com) * feat: add NVIDIA option to provider settings dropdown and aliases * test: add NVIDIA provider detection and endpoint tests * Add NVIDIA to _HOST_TO_CURATED and expand non-chat model filtering - nvidia.com -> 'nvidia' curated key for proper provider routing - _NON_CHAT_PREFIXES: bge, snowflake/arctic-embed, nvidia/nv-embed - _NON_CHAT_CONTAINS: content-safety, -safety, -reward, nvclip, kosmos, fuyu, deplot, vila, neva, gliner, riva, -parse, -embedqa, -nemoretriever * Expand non-chat model filtering for NVIDIA embedding/guard/video models Add _NON_CHAT_PREFIXES: embed, recurrent Add _NON_CHAT_CONTAINS: topic-control, guard, calibration, ai-synthetic-video, cosmos-reason2 Catches remaining unfiltered non-chat models from NVIDIA catalog: embedding (llama-nemotron-embed, embed-qa), guard (llama-guard, nemoguard-topic-control), calibration (ising-calibration), video (ai-synthetic-video-detector, cosmos-reason2), recurrent (recurrentgemma-2b) * Filter non-chat models in _probe_endpoint via _is_chat_model() Previously _is_chat_model() was only used in the per-model probe and _first_chat_model(), so non-chat models still appeared in the model picker even though they were filtered in those specific paths. Applying the filter at _probe_endpoint() return ensures non-chat models (embeddings, safety guards, reward, calibration, video detectors, CLIP, VLM, translation, parsing, recurrent, etc.) never enter cached_models and never appear in the picker. * Fix _NON_CHAT_CONTAINS to catch org-prefixed embedding models Prefix checks (mid.startswith) miss models with org prefixes like baai/bge-m3, nvidia/embed-qa-4, google/recurrentgemma-2b, etc. Adding the same terms to _NON_CHAT_CONTAINS ensures they are caught regardless of the org prefix. Adds: embed, bge, recurrent, starcoder, gemma-2b * fix(model-routes): drop collision-prone substrings from global non-chat filter The NVIDIA PR added several substrings to the shared _NON_CHAT_PREFIXES and _NON_CHAT_CONTAINS tuples. These are intended to filter out embedding, retrieval, safety, and vision models from NVIDIA's catalog that are not chat-completions-capable. However, four of the added substrings collide with legitimate chat models served by other providers: - gemma-2b matches google/gemma-2b-it (instruct chat model) - starcoder matches bigcode/starcoder2-15b (code completion model) - recurrent matches google/recurrentgemma-2b (language model) - guard matches meta-llama/Llama-Guard-3-8B (safety classifier) Removing these four from the global tuples keeps the NVIDIA-specific filtering intact (safety, embedding, retrieval, and vision models are still caught by other tokens such as content-safety, -safety, -reward, embed, bge, -embedqa, -nemoretriever, nvclip, deplot, etc.) while preventing false negatives for instruct/code models on other providers. Tests added for gemma-2b-it, google/gemma-2b-it, and bigcode/starcoder2-15b-instruct asserting they are recognized as chat models. Co-authored-by: Kenny Van de Maele * fix(nvidia): remove duplicate bge/embed tokens from _NON_CHAT_CONTAINS Tokens already present in _NON_CHAT_PREFIXES, making the CONTAINS entries redundant since the prefix check runs first. Co-authored-by: Kenny Van de Maele * fix(nvidia): move bge to CONTAINS, add llama-guard, remove stray blanks Co-authored-by: Kenny Van de Maele * style: fix indentation of groq and xai test cases in test_provider_endpoints.py --------- Co-authored-by: Kenny Van de Maele --- routes/model_routes.py | 14 +++++++++++--- src/llm_core.py | 3 +++ static/index.html | 1 + static/js/providers.js | 1 + static/js/slashCommands.js | 6 +++++- tests/test_model_routes.py | 2 ++ tests/test_provider_classification.py | 2 ++ tests/test_provider_endpoints.py | 4 ++++ 8 files changed, 29 insertions(+), 4 deletions(-) diff --git a/routes/model_routes.py b/routes/model_routes.py index 864035884..b88fa3ef1 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -283,6 +283,7 @@ _HOST_TO_CURATED = ( ("fireworks.ai", "fireworks"), ("googleapis.com", "google"), ("x.ai", "xai"), + ("nvidia.com", "nvidia"), ("openrouter.ai", "openrouter"), ("ollama.com", "ollama"), ) @@ -477,10 +478,17 @@ _NON_CHAT_PREFIXES = ( "dall-e", "tts-", "whisper", "text-embedding", "embedding", "davinci", "babbage", "moderation", "omni-moderation", "sora", "gpt-image", "chatgpt-image", + # embedding / retrieval / non-chat models (common across providers) + "snowflake/arctic-embed", "nvidia/nv-embed", "embed", ) _NON_CHAT_CONTAINS = ( "-realtime", "-transcribe", "-tts", "-codex", - "codex-", + "codex-", "content-safety", "-safety", "-reward", "nvclip", + "kosmos", "fuyu", "deplot", "vila", "neva", + "gliner", "riva", "-parse", "-embedqa", "-nemoretriever", + "topic-control", "calibration", + "ai-synthetic-video", "cosmos-reason2", + "bge", "llama-guard", ) _NON_CHAT_EXACT_PREFIXES = ( "gpt-audio", # gpt-audio, gpt-audio-mini etc. (not gpt-4o-audio-preview which is chat) @@ -731,7 +739,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis for _e in _PROVIDER_CURATED.get(_ck, []): if _e not in set(models) and not any(m.startswith(_e) for m in models): models.append(_e) - return models + return [m for m in models if _is_chat_model(m)] except httpx.HTTPStatusError as e: if api_key: status = e.response.status_code if e.response is not None else "unknown" @@ -755,7 +763,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis data = r.json() models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")] if models: - return models + return [m for m in models if _is_chat_model(m)] except Exception as e: logger.debug(f"Ollama /api/tags probe failed for {base}: {e}") # Fall back to curated list if the provider has a URL-based match (e.g. z.ai has no /models endpoint) diff --git a/src/llm_core.py b/src/llm_core.py index 07b149ebe..b012638fa 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -444,6 +444,8 @@ def _detect_provider(url: str) -> str: return "openrouter" if _host_match(url, "groq.com"): return "groq" + if _host_match(url, "nvidia.com"): + return "nvidia" from src.chatgpt_subscription import is_chatgpt_subscription_base if is_chatgpt_subscription_base(url): return "chatgpt-subscription" @@ -489,6 +491,7 @@ def _provider_label(url: str) -> str: if is_copilot_base(url): return "GitHub Copilot" if _host_match(url, "mistral.ai"): return "Mistral" if _host_match(url, "deepseek.com"): return "DeepSeek" + if _host_match(url, "nvidia.com"): return "NVIDIA" if _host_match(url, "googleapis.com"): return "Google" if _host_match(url, "together.xyz", "together.ai"): return "Together" if _host_match(url, "fireworks.ai"): return "Fireworks" diff --git a/static/index.html b/static/index.html index 4ca33c072..60a2764d9 100644 --- a/static/index.html +++ b/static/index.html @@ -2095,6 +2095,7 @@ +