diff --git a/src/llm_core.py b/src/llm_core.py index 20a4b544c..2178f894f 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -618,6 +618,8 @@ def _detect_provider(url: str) -> str: from src.copilot import is_copilot_base if is_copilot_base(url): return "copilot" + if _host_match(url, "mistral.ai"): + return "mistral" return "openai" @@ -906,10 +908,17 @@ def _anthropic_rejects_temperature(model: str) -> bool: return False return (int(match.group(1)), int(match.group(2))) >= (4, 7) +# Reasoning effort level sent to Mistral thinking-capable models. Mistral's +# API accepts "high", "medium", "low", "none" — see +# https://docs.mistral.ai/capabilities/reasoning/. Override via env var +# ODYSSEUS_MISTRAL_REASONING_EFFORT (e.g. set to "medium" for cheaper chat). +_MISTRAL_REASONING_EFFORT = os.getenv("ODYSSEUS_MISTRAL_REASONING_EFFORT", "high") + # Models that support structured thinking — may output without opening tag _THINKING_MODEL_PATTERNS = ( "qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma", "stepfun", "step-3", "step3", + "magistral", "mistral-small", "mistral-medium", ) def _supports_thinking(model: str) -> bool: @@ -919,6 +928,38 @@ def _supports_thinking(model: str) -> bool: m = model.lower() return any(p in m for p in _THINKING_MODEL_PATTERNS) +def _normalize_mistral_content(content): + """Mistral returns content as a structured array when reasoning is on: + [{"type": "thinking", "thinking": [{"type": "text", "text": "..."}], "closed": true}, + {"type": "text", "text": "...final answer..."}] + Convert to (text, thinking) tuple of plain strings. Pass through strings + unchanged so non-Mistral OpenAI-compat endpoints are unaffected. + """ + if isinstance(content, str): + return content, "" + if not isinstance(content, list): + return "", "" + text_parts = [] + thinking_parts = [] + for block in content: + if not isinstance(block, dict): + continue + btype = block.get("type") + if btype == "text": + t = block.get("text", "") + if t: + text_parts.append(t) + elif btype == "thinking": + inner = block.get("thinking", []) + if isinstance(inner, list): + for tb in inner: + if isinstance(tb, dict) and tb.get("text"): + thinking_parts.append(tb["text"]) + elif isinstance(inner, str): + thinking_parts.append(inner) + return "".join(text_parts), "".join(thinking_parts) + + def _convert_openai_content_to_anthropic(content): """Convert OpenAI multimodal content blocks to Anthropic format. @@ -1441,6 +1482,8 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL if max_tokens and max_tokens > 0: tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens" payload[tok_key] = max_tokens + if provider == "mistral" and _supports_thinking(model): + payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT try: note_model_activity(target_url, model) r = httpx_post_kimi_aware(target_url, h, json=payload, timeout=timeout) @@ -1456,7 +1499,16 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL response = _parse_ollama_response(data) else: msg = data["choices"][0]["message"] - response = msg.get("content") or msg.get("reasoning_content") or "" + content = msg.get("content") + if isinstance(content, list): + # Mistral structured content — extract thinking + text + text_part, thinking_part = _normalize_mistral_content(content) + if thinking_part: + response = thinking_part + "\n\n" + (text_part or "") + else: + response = text_part or msg.get("reasoning_content") or "" + else: + response = content or msg.get("reasoning_content") or "" _set_cached_response(cache_key, response) return response except Exception: @@ -1638,6 +1690,8 @@ async def llm_call_async( # Suppress thinking for qwen3/gemma4 on Ollama /v1 — same as stream_llm. if _is_ollama_openai_compat_url(url) and _supports_thinking(model): payload["think"] = False + if provider == "mistral" and _supports_thinking(model): + payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT _apply_local_cache_affinity(payload, url, session_id) if _is_host_dead(target_url): @@ -1756,6 +1810,12 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl payload[tok_key] = max_tokens if tools: payload["tools"] = tools + # Mistral thinking-capable models — send reasoning_effort so Mistral + # activates thinking mode and returns structured reasoning_content. + # Effort level is configurable via ODYSSEUS_MISTRAL_REASONING_EFFORT + # (high / medium / low / none); default "high". + if provider == "mistral" and _supports_thinking(model): + payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT # For Ollama's OpenAI-compat /v1 endpoint with thinking models (qwen3, # gemma4, etc.), suppress thinking so tool calls aren't swallowed inside # blocks. Ollama /v1 accepts "think": false as a top-level param. @@ -2134,9 +2194,17 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl # Text content # Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1, Nemotron). vLLM 0.20.2 / NIM emit the field as `reasoning`; older builds use `reasoning_content`. Some OpenAI-compatible Ollama builds use `thinking`. reasoning = delta.get("reasoning_content") or delta.get("reasoning") or delta.get("thinking") or "" + content = delta.get("content") or "" + # Mistral structured content: content is a list of typed blocks + # ({"type": "thinking", ...}, {"type": "text", ...}). Split into + # reasoning + text so thinking streams into the thinking panel. + if isinstance(content, list): + text_part, thinking_part = _normalize_mistral_content(content) + if thinking_part: + reasoning = (reasoning + thinking_part) if reasoning else thinking_part + content = text_part if reasoning: yield _stream_delta_event(reasoning, thinking=True) - content = delta.get("content") or "" if content: content = re.sub(r"]*)?>", r"", content, flags=re.IGNORECASE) content = re.sub(r"", "", content, flags=re.IGNORECASE) diff --git a/tests/test_llm_core_mistral_content.py b/tests/test_llm_core_mistral_content.py new file mode 100644 index 000000000..ee09011b5 --- /dev/null +++ b/tests/test_llm_core_mistral_content.py @@ -0,0 +1,156 @@ +"""Tests for _normalize_mistral_content() — Mistral's structured content parser. + +Mistral's chat completions API returns content as a typed array when reasoning +is enabled, instead of the plain string most OpenAI-compat servers use: + + "content": [ + {"type": "thinking", "thinking": [{"type": "text", "text": "..."}], "closed": true}, + {"type": "text", "text": "..."} + ] + +_normalize_mistral_content() splits that into (text, thinking) plain strings. +The function is called from three sites: + - llm_call (sync, non-streaming response parser) + - llm_call_async (async, non-streaming response parser) + - stream_llm (streaming delta parser) + +These tests pin the contract: string passthrough, the array shape, and the +edge cases (empty, garbage, missing fields) so a refactor doesn't silently +drop thinking content or break non-Mistral providers. +""" +from src.llm_core import _normalize_mistral_content + + +def test_string_passthrough_returns_text_with_empty_thinking(): + """Plain string content (the common case) passes through unchanged.""" + text, thinking = _normalize_mistral_content("hello world") + assert text == "hello world" + assert thinking == "" + + +def test_empty_string_passthrough(): + text, thinking = _normalize_mistral_content("") + assert text == "" + assert thinking == "" + + +def test_array_with_thinking_and_text_blocks(): + """Mistral's documented format: thinking block + text block.""" + content = [ + { + "type": "thinking", + "thinking": [{"type": "text", "text": "Let me work through this..."}], + "closed": True, + }, + {"type": "text", "text": "The answer is 42."}, + ] + text, thinking = _normalize_mistral_content(content) + assert text == "The answer is 42." + assert thinking == "Let me work through this..." + + +def test_array_with_only_thinking_block(): + """Streaming deltas often contain only a thinking fragment (no text block yet).""" + content = [ + { + "type": "thinking", + "thinking": [{"type": "text", "text": "Okay, let's"}], + "closed": True, + } + ] + text, thinking = _normalize_mistral_content(content) + assert text == "" + assert thinking == "Okay, let's" + + +def test_array_with_only_text_block(): + """Final answer delta — only the text block, no thinking.""" + content = [{"type": "text", "text": "Final answer."}] + text, thinking = _normalize_mistral_content(content) + assert text == "Final answer." + assert thinking == "" + + +def test_array_concatenates_multiple_text_blocks(): + """Multiple text blocks are concatenated in order.""" + content = [ + {"type": "text", "text": "part 1 "}, + {"type": "text", "text": "part 2"}, + ] + text, thinking = _normalize_mistral_content(content) + assert text == "part 1 part 2" + + +def test_array_concatenates_multiple_thinking_fragments(): + """Multiple thinking sub-blocks are concatenated in order.""" + content = [ + { + "type": "thinking", + "thinking": [ + {"type": "text", "text": "first "}, + {"type": "text", "text": "second"}, + ], + "closed": True, + } + ] + text, thinking = _normalize_mistral_content(content) + assert text == "" + assert thinking == "first second" + + +def test_empty_array_returns_empty_strings(): + text, thinking = _normalize_mistral_content([]) + assert text == "" + assert thinking == "" + + +def test_array_with_garbage_entries_skips_them(): + """Non-dict entries, missing type, missing text — all silently skipped.""" + content = [ + "not a dict", + None, + {"type": "unknown_type", "text": "should be ignored"}, + {"type": "text"}, # missing text key + {"type": "thinking"}, # missing thinking key + {"type": "text", "text": "valid text"}, + ] + text, thinking = _normalize_mistral_content(content) + assert text == "valid text" + assert thinking == "" + + +def test_none_returns_empty_strings(): + """Defensive: None content (server bug or schema drift) doesn't crash.""" + text, thinking = _normalize_mistral_content(None) + assert text == "" + assert thinking == "" + + +def test_int_returns_empty_strings(): + """Defensive: wrong-typed content doesn't crash.""" + text, thinking = _normalize_mistral_content(42) + assert text == "" + assert thinking == "" + + +def test_thinking_block_with_string_inner(): + """Some Mistral API versions may use a string instead of an array for + the inner 'thinking' field. Accept both shapes.""" + content = [ + {"type": "thinking", "thinking": "inline string thinking"}, + {"type": "text", "text": "answer"}, + ] + text, thinking = _normalize_mistral_content(content) + assert text == "answer" + assert thinking == "inline string thinking" + + +def test_thinking_block_with_empty_text_field(): + """Empty text fields don't pollute the output.""" + content = [ + {"type": "thinking", "thinking": [{"type": "text", "text": ""}], "closed": True}, + {"type": "text", "text": ""}, + ] + text, thinking = _normalize_mistral_content(content) + assert text == "" + assert thinking == ""