mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-30 00:22:10 -04:00
fix(utility): use utility model for background tasks (auto-title, memory audit) instead of chat model (#4027)
This commit is contained in:
+11
-3
@@ -159,9 +159,17 @@ async def auto_name_session(session_manager, sess):
|
|||||||
return
|
return
|
||||||
|
|
||||||
owner = getattr(sess, "owner", None)
|
owner = getattr(sess, "owner", None)
|
||||||
t_url, t_model, t_headers = resolve_task_endpoint(
|
t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
|
||||||
sess.endpoint_url, sess.model, sess.headers, owner=owner,
|
if not t_model:
|
||||||
)
|
# If no task/utility model is configured at all, fall back to
|
||||||
|
# the session's own model so auto-naming still works even on
|
||||||
|
# minimal setups.
|
||||||
|
from src.endpoint_resolver import resolve_endpoint
|
||||||
|
_fallback = resolve_endpoint("default", owner=owner)
|
||||||
|
if _fallback and _fallback[1]:
|
||||||
|
t_url, t_model, t_headers = _fallback
|
||||||
|
else:
|
||||||
|
t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
|
||||||
if not t_model:
|
if not t_model:
|
||||||
logger.debug("[auto-name] No model provided, skipping")
|
logger.debug("[auto-name] No model provided, skipping")
|
||||||
return
|
return
|
||||||
|
|||||||
+21
-8
@@ -29,6 +29,7 @@ from src.llm_core import llm_call_async
|
|||||||
from services.memory.memory_extractor import audit_memories
|
from services.memory.memory_extractor import audit_memories
|
||||||
from src.auth_helpers import get_current_user, require_user
|
from src.auth_helpers import get_current_user, require_user
|
||||||
from src.endpoint_resolver import resolve_endpoint
|
from src.endpoint_resolver import resolve_endpoint
|
||||||
|
from src.task_endpoint import resolve_task_endpoint
|
||||||
from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES
|
from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -240,14 +241,18 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
|||||||
}
|
}
|
||||||
messages = [system_msg] + sess.get_context_messages()
|
messages = [system_msg] + sess.get_context_messages()
|
||||||
|
|
||||||
|
t_url, t_model, t_headers = resolve_task_endpoint(
|
||||||
|
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
suggestion_text = await llm_call_async(
|
suggestion_text = await llm_call_async(
|
||||||
sess.endpoint_url,
|
t_url,
|
||||||
sess.model,
|
t_model,
|
||||||
messages,
|
messages,
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
max_tokens=500,
|
max_tokens=500,
|
||||||
headers=sess.headers,
|
headers=t_headers,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
suggestions = json.loads(suggestion_text)
|
suggestions = json.loads(suggestion_text)
|
||||||
@@ -278,7 +283,15 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
|||||||
endpoint_url = model = None
|
endpoint_url = model = None
|
||||||
headers = {}
|
headers = {}
|
||||||
|
|
||||||
# Try default model from settings first
|
# Try utility model from settings first — memory audit is a background
|
||||||
|
# task and should prefer the lighter utility model over the main chat model.
|
||||||
|
from src.task_endpoint import resolve_task_endpoint
|
||||||
|
user = _owner(request)
|
||||||
|
t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
|
||||||
|
if t_url and t_model:
|
||||||
|
endpoint_url, model, headers = t_url, t_model, t_headers
|
||||||
|
else:
|
||||||
|
# Fall back to default model if no task/utility model configured
|
||||||
settings = _load_settings()
|
settings = _load_settings()
|
||||||
ep_id = settings.get("default_endpoint_id", "")
|
ep_id = settings.get("default_endpoint_id", "")
|
||||||
default_model = settings.get("default_model", "")
|
default_model = settings.get("default_model", "")
|
||||||
@@ -360,13 +373,13 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
|||||||
try:
|
try:
|
||||||
sess = session_manager.get_session(session)
|
sess = session_manager.get_session(session)
|
||||||
_assert_session_owner(sess, _owner(request))
|
_assert_session_owner(sess, _owner(request))
|
||||||
endpoint_url = sess.endpoint_url
|
endpoint_url, model, headers = resolve_task_endpoint(
|
||||||
model = sess.model
|
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
|
||||||
headers = sess.headers
|
)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise HTTPException(404, "Session not found — needed for LLM config")
|
raise HTTPException(404, "Session not found — needed for LLM config")
|
||||||
else:
|
else:
|
||||||
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
|
endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
|
||||||
|
|
||||||
if not endpoint_url or not model:
|
if not endpoint_url or not model:
|
||||||
raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
|
raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
|
||||||
|
|||||||
@@ -265,23 +265,19 @@ def resolve_endpoint(
|
|||||||
ep_id = _stg(f"{setting_prefix}_endpoint_id")
|
ep_id = _stg(f"{setting_prefix}_endpoint_id")
|
||||||
model = _stg(f"{setting_prefix}_model")
|
model = _stg(f"{setting_prefix}_model")
|
||||||
|
|
||||||
# If the specific endpoint is not configured, but the caller provided a
|
# Fall back to utility model for task/research/auto-naming if not specifically configured.
|
||||||
|
if not ep_id and setting_prefix not in ("utility", "default"):
|
||||||
|
ep_id = _stg("utility_endpoint_id")
|
||||||
|
model = _stg("utility_model")
|
||||||
|
|
||||||
|
# If the endpoint is STILL not configured, but the caller provided a
|
||||||
# valid fallback (e.g. the active session model), use that immediately.
|
# valid fallback (e.g. the active session model), use that immediately.
|
||||||
# This prevents background tasks from jumping to the global default_model
|
# This prevents background tasks from jumping to the global default_model
|
||||||
# when the user is mid-conversation with a different model.
|
# when the user is mid-conversation with a different model.
|
||||||
if not ep_id and fallback_url and fallback_model:
|
if not ep_id and fallback_url and fallback_model:
|
||||||
return fallback_url, fallback_model, fallback_headers
|
return fallback_url, fallback_model, fallback_headers
|
||||||
|
|
||||||
# Unset Utility means "same as Default Chat Model".
|
# Unset Utility (or anything else that didn't have a fallback) means "same as Default Chat Model".
|
||||||
if setting_prefix == "utility" and not ep_id:
|
|
||||||
ep_id = _stg("default_endpoint_id")
|
|
||||||
model = _stg("default_model")
|
|
||||||
|
|
||||||
# Fall back to utility model for task/research/auto-naming if not specifically configured.
|
|
||||||
# If Utility itself is unset, the block above makes that resolve to Default Chat.
|
|
||||||
if not ep_id and setting_prefix != "utility":
|
|
||||||
ep_id = _stg("utility_endpoint_id")
|
|
||||||
model = _stg("utility_model")
|
|
||||||
if not ep_id:
|
if not ep_id:
|
||||||
ep_id = _stg("default_endpoint_id")
|
ep_id = _stg("default_endpoint_id")
|
||||||
model = _stg("default_model")
|
model = _stg("default_model")
|
||||||
|
|||||||
Reference in New Issue
Block a user