mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-22 04:35:29 -04:00
fix(chat): show requested and actual reply models
Show requested and actual reply models in chat labels when fallback or provider routing changes the responding model.
This commit is contained in:
@@ -1,7 +1,12 @@
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
from routes.chat_helpers import _enforce_chat_privileges, clean_thinking_for_save, needs_auto_name
|
||||
from routes.chat_helpers import (
|
||||
_enforce_chat_privileges,
|
||||
clean_thinking_for_save,
|
||||
needs_auto_name,
|
||||
save_assistant_response,
|
||||
)
|
||||
|
||||
|
||||
class _AuthManager:
|
||||
@@ -64,6 +69,15 @@ def test_allowed_models_nonempty_list_still_restricts_without_new_flag(monkeypat
|
||||
)
|
||||
|
||||
|
||||
class _FakeSession:
|
||||
def __init__(self, model="selected-model"):
|
||||
self.model = model
|
||||
self.history = []
|
||||
|
||||
def add_message(self, message):
|
||||
self.history.append(message)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name,expected", [
|
||||
# 24h format (the bug this PR fixes)
|
||||
("deepseek-v4-flash 14:05:33", True),
|
||||
@@ -130,3 +144,19 @@ def test_clean_thinking_for_save_extracts_thought_tag():
|
||||
|
||||
assert content == "Final answer."
|
||||
assert metadata["thinking"] == "internal reasoning"
|
||||
|
||||
|
||||
def test_save_assistant_response_preserves_actual_and_requested_model():
|
||||
sess = _FakeSession("selected-model")
|
||||
|
||||
save_assistant_response(
|
||||
sess,
|
||||
session_manager=None,
|
||||
session_id="s1",
|
||||
full_response="hello",
|
||||
last_metrics={"model": "actual-model", "input_tokens": 1, "output_tokens": 2},
|
||||
incognito=True,
|
||||
)
|
||||
|
||||
assert sess.history[-1].metadata["requested_model"] == "selected-model"
|
||||
assert sess.history[-1].metadata["model"] == "actual-model"
|
||||
|
||||
@@ -82,6 +82,32 @@ def _usage_event(monkeypatch, lines):
|
||||
return asyncio.run(run())
|
||||
|
||||
|
||||
def _stream_events(monkeypatch, lines):
|
||||
"""Drive stream_llm and return all JSON data events."""
|
||||
monkeypatch.setattr(llm_core, "_get_http_client", lambda: _FakeClient(lines))
|
||||
monkeypatch.setattr(llm_core, "_is_host_dead", lambda u: False)
|
||||
monkeypatch.setattr(llm_core, "note_model_activity", lambda *a, **k: None)
|
||||
monkeypatch.setattr(llm_core, "_clear_host_dead", lambda *a, **k: None)
|
||||
|
||||
async def run():
|
||||
events = []
|
||||
async for chunk in llm_core.stream_llm(
|
||||
"http://127.0.0.1:8081/v1/chat/completions",
|
||||
"openrouter/auto",
|
||||
[{"role": "user", "content": "hi"}],
|
||||
):
|
||||
for ln in chunk.split("\n"):
|
||||
ln = ln.strip()
|
||||
if ln.startswith("data: ") and ln[6:] != "[DONE]":
|
||||
try:
|
||||
events.append(json.loads(ln[6:]))
|
||||
except ValueError:
|
||||
pass
|
||||
return events
|
||||
|
||||
return asyncio.run(run())
|
||||
|
||||
|
||||
# A real llama.cpp final chunk carries `usage` (delta empty / choices []) with a
|
||||
# sibling `timings` block. The decode speed here (78.91) is far above the
|
||||
# wall-clock figure the old code would have shown.
|
||||
@@ -127,6 +153,31 @@ def test_stream_llm_omits_tps_when_backend_has_no_timings(monkeypatch):
|
||||
assert "prefill_tps" not in usage
|
||||
|
||||
|
||||
def test_stream_llm_surfaces_provider_resolved_model(monkeypatch):
|
||||
events = _stream_events(monkeypatch, [
|
||||
'data: ' + json.dumps({
|
||||
"model": "meta-llama/llama-3.3-70b-instruct:free",
|
||||
"choices": [{"index": 0, "delta": {"content": "Hi"}}],
|
||||
}),
|
||||
'data: ' + json.dumps({
|
||||
"model": "meta-llama/llama-3.3-70b-instruct:free",
|
||||
"choices": [],
|
||||
"usage": {"prompt_tokens": 8, "completion_tokens": 5},
|
||||
}),
|
||||
"data: [DONE]",
|
||||
])
|
||||
|
||||
actual = [e for e in events if e.get("type") == "model_actual"]
|
||||
assert actual == [{
|
||||
"type": "model_actual",
|
||||
"requested_model": "openrouter/auto",
|
||||
"model": "meta-llama/llama-3.3-70b-instruct:free",
|
||||
}]
|
||||
usage = [e["data"] for e in events if e.get("type") == "usage"][0]
|
||||
assert usage["requested_model"] == "openrouter/auto"
|
||||
assert usage["model"] == "meta-llama/llama-3.3-70b-instruct:free"
|
||||
|
||||
|
||||
# --- _compute_final_metrics preference logic --------------------------------
|
||||
|
||||
def _metrics(**overrides):
|
||||
|
||||
Reference in New Issue
Block a user