mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 17:55:26 -04:00
feat(providers): add NVIDIA AI provider endpoint support (#3456)
* feat: add NVIDIA as an AI provider (integrate.api.nvidia.com) * feat: add NVIDIA option to provider settings dropdown and aliases * test: add NVIDIA provider detection and endpoint tests * Add NVIDIA to _HOST_TO_CURATED and expand non-chat model filtering - nvidia.com -> 'nvidia' curated key for proper provider routing - _NON_CHAT_PREFIXES: bge, snowflake/arctic-embed, nvidia/nv-embed - _NON_CHAT_CONTAINS: content-safety, -safety, -reward, nvclip, kosmos, fuyu, deplot, vila, neva, gliner, riva, -parse, -embedqa, -nemoretriever * Expand non-chat model filtering for NVIDIA embedding/guard/video models Add _NON_CHAT_PREFIXES: embed, recurrent Add _NON_CHAT_CONTAINS: topic-control, guard, calibration, ai-synthetic-video, cosmos-reason2 Catches remaining unfiltered non-chat models from NVIDIA catalog: embedding (llama-nemotron-embed, embed-qa), guard (llama-guard, nemoguard-topic-control), calibration (ising-calibration), video (ai-synthetic-video-detector, cosmos-reason2), recurrent (recurrentgemma-2b) * Filter non-chat models in _probe_endpoint via _is_chat_model() Previously _is_chat_model() was only used in the per-model probe and _first_chat_model(), so non-chat models still appeared in the model picker even though they were filtered in those specific paths. Applying the filter at _probe_endpoint() return ensures non-chat models (embeddings, safety guards, reward, calibration, video detectors, CLIP, VLM, translation, parsing, recurrent, etc.) never enter cached_models and never appear in the picker. * Fix _NON_CHAT_CONTAINS to catch org-prefixed embedding models Prefix checks (mid.startswith) miss models with org prefixes like baai/bge-m3, nvidia/embed-qa-4, google/recurrentgemma-2b, etc. Adding the same terms to _NON_CHAT_CONTAINS ensures they are caught regardless of the org prefix. Adds: embed, bge, recurrent, starcoder, gemma-2b * fix(model-routes): drop collision-prone substrings from global non-chat filter The NVIDIA PR added several substrings to the shared _NON_CHAT_PREFIXES and _NON_CHAT_CONTAINS tuples. These are intended to filter out embedding, retrieval, safety, and vision models from NVIDIA's catalog that are not chat-completions-capable. However, four of the added substrings collide with legitimate chat models served by other providers: - gemma-2b matches google/gemma-2b-it (instruct chat model) - starcoder matches bigcode/starcoder2-15b (code completion model) - recurrent matches google/recurrentgemma-2b (language model) - guard matches meta-llama/Llama-Guard-3-8B (safety classifier) Removing these four from the global tuples keeps the NVIDIA-specific filtering intact (safety, embedding, retrieval, and vision models are still caught by other tokens such as content-safety, -safety, -reward, embed, bge, -embedqa, -nemoretriever, nvclip, deplot, etc.) while preventing false negatives for instruct/code models on other providers. Tests added for gemma-2b-it, google/gemma-2b-it, and bigcode/starcoder2-15b-instruct asserting they are recognized as chat models. Co-authored-by: Kenny Van de Maele <kenny@kvandemaele.be> * fix(nvidia): remove duplicate bge/embed tokens from _NON_CHAT_CONTAINS Tokens already present in _NON_CHAT_PREFIXES, making the CONTAINS entries redundant since the prefix check runs first. Co-authored-by: Kenny Van de Maele <kenny@kvandemaele.be> * fix(nvidia): move bge to CONTAINS, add llama-guard, remove stray blanks Co-authored-by: Kenny Van de Maele <kenny@kvandemaele.be> * style: fix indentation of groq and xai test cases in test_provider_endpoints.py --------- Co-authored-by: Kenny Van de Maele <kenny@kvandemaele.be>
This commit is contained in:
@@ -2095,6 +2095,7 @@
|
||||
<option value="https://opencode.ai/zen/v1" data-logo="opencode">OpenCode Zen</option>
|
||||
<option value="https://opencode.ai/zen/go/v1" data-logo="opencode">OpenCode Go</option>
|
||||
<option value="https://api.z.ai/api/coding/paas/v4" data-logo="zhipu">Z.AI Coding Plan</option>
|
||||
<option value="https://integrate.api.nvidia.com/v1" data-logo="nvidia">NVIDIA</option>
|
||||
</select>
|
||||
<!-- API key row stays in DOM, hidden until Key button is
|
||||
clicked. Mirrors the Local section pattern: most users
|
||||
|
||||
@@ -118,6 +118,7 @@ const _ENDPOINT_LABELS = [
|
||||
[/(^|\.)together\.(ai|xyz)$/i, "Together"],
|
||||
[/(^|\.)fireworks\.ai$/i, "Fireworks"],
|
||||
[/(^|\.)perplexity\.ai$/i, "Perplexity"],
|
||||
[/(^|\.)nvidia\.com$/i, "NVIDIA"],
|
||||
[/(^|\.)x\.ai$/i, "xAI"],
|
||||
];
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ const PROVIDER_PATTERNS = [
|
||||
{ re: /^gsk_/, name: 'Groq', url: 'https://api.groq.com/openai/v1' },
|
||||
{ re: /^AIza/, name: 'Gemini', url: 'https://generativelanguage.googleapis.com/v1beta/openai' },
|
||||
{ re: /^xai-/, name: 'xAI', url: 'https://api.x.ai/v1' },
|
||||
{ re: /^nvapi-/, name: 'NVIDIA', url: 'https://integrate.api.nvidia.com/v1' },
|
||||
];
|
||||
const SETUP_PROVIDER_URLS = {
|
||||
deepseek: { name: 'DeepSeek', url: 'https://api.deepseek.com/v1' },
|
||||
@@ -56,8 +57,9 @@ const SETUP_PROVIDER_URLS = {
|
||||
google: { name: 'Gemini', url: 'https://generativelanguage.googleapis.com/v1beta/openai' },
|
||||
'opencode-zen': { name: 'OpenCode Zen', url: 'https://opencode.ai/zen/v1' },
|
||||
'opencode-go': { name: 'OpenCode Go', url: 'https://opencode.ai/zen/go/v1' },
|
||||
nvidia: { name: 'NVIDIA', url: 'https://integrate.api.nvidia.com/v1' },
|
||||
};
|
||||
const SETUP_PROVIDER_NAMES = ['deepseek', 'openai', 'openrouter', 'ollama', 'xai', 'anthropic', 'groq', 'gemini', 'opencode-zen', 'opencode-go'];
|
||||
const SETUP_PROVIDER_NAMES = ['deepseek', 'openai', 'openrouter', 'ollama', 'xai', 'anthropic', 'groq', 'gemini', 'opencode-zen', 'opencode-go', 'nvidia'];
|
||||
const SETUP_DEVICE_AUTH_PROVIDERS = [
|
||||
{ key: 'copilot', name: 'GitHub Copilot', aliases: ['github'], command: '/setup copilot' },
|
||||
{ key: 'chatgpt-subscription', name: 'ChatGPT Subscription', aliases: ['chatgptsubscription', 'chatgpt-sub', 'codex'], command: '/setup chatgpt-subscription' },
|
||||
@@ -97,6 +99,7 @@ function _setupProviderFromInput(input) {
|
||||
google: 'gemini',
|
||||
xai: 'xai',
|
||||
grok: 'xai',
|
||||
nvidia: 'nvidia',
|
||||
};
|
||||
return SETUP_PROVIDER_URLS[aliases[raw] || raw] || null;
|
||||
}
|
||||
@@ -124,6 +127,7 @@ function _extractSetupProviderCredential(input) {
|
||||
['groq', 'groq'],
|
||||
['google', 'gemini'], ['gemini', 'gemini'],
|
||||
['x ai', 'xai'], ['xai', 'xai'], ['grok', 'xai'],
|
||||
['nvidia', 'nvidia'],
|
||||
];
|
||||
for (const [alias, key] of providerAliases) {
|
||||
const re = new RegExp('(^|\\s|[,;:])(' + alias.replace(/\s+/g, '\\s+') + ')(?=$|\\s|[,;:])', 'i');
|
||||
|
||||
Reference in New Issue
Block a user