fix(utility): use utility model for background tasks (auto-title, memory audit) instead of chat model (#4027)

This commit is contained in:
Vishnu
2026-06-15 12:03:19 +05:30
committed by GitHub
parent 7ebbc15377
commit d6a3c9a0fe
3 changed files with 74 additions and 57 deletions
+11 -3
View File
@@ -159,9 +159,17 @@ async def auto_name_session(session_manager, sess):
return
owner = getattr(sess, "owner", None)
t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=owner,
)
t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
if not t_model:
# If no task/utility model is configured at all, fall back to
# the session's own model so auto-naming still works even on
# minimal setups.
from src.endpoint_resolver import resolve_endpoint
_fallback = resolve_endpoint("default", owner=owner)
if _fallback and _fallback[1]:
t_url, t_model, t_headers = _fallback
else:
t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
if not t_model:
logger.debug("[auto-name] No model provided, skipping")
return
+55 -42
View File
@@ -29,6 +29,7 @@ from src.llm_core import llm_call_async
from services.memory.memory_extractor import audit_memories
from src.auth_helpers import get_current_user, require_user
from src.endpoint_resolver import resolve_endpoint
from src.task_endpoint import resolve_task_endpoint
from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES
logger = logging.getLogger(__name__)
@@ -240,14 +241,18 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
}
messages = [system_msg] + sess.get_context_messages()
t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
)
try:
suggestion_text = await llm_call_async(
sess.endpoint_url,
sess.model,
t_url,
t_model,
messages,
temperature=0.2,
max_tokens=500,
headers=sess.headers,
headers=t_headers,
)
try:
suggestions = json.loads(suggestion_text)
@@ -278,42 +283,50 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
endpoint_url = model = None
headers = {}
# Try default model from settings first
settings = _load_settings()
ep_id = settings.get("default_endpoint_id", "")
default_model = settings.get("default_model", "")
if ep_id:
db = SessionLocal()
try:
ep = db.query(ModelEndpoint).filter(
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
).first()
if ep:
base = _normalize_base(ep.base_url)
endpoint_url = build_chat_url(base)
model = default_model
if not model and ep.models:
try:
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
if models:
model = models[0]
except Exception:
pass
if ep.api_key:
headers = {"Authorization": f"Bearer {ep.api_key}"}
finally:
db.close()
# Try utility model from settings first — memory audit is a background
# task and should prefer the lighter utility model over the main chat model.
from src.task_endpoint import resolve_task_endpoint
user = _owner(request)
t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
if t_url and t_model:
endpoint_url, model, headers = t_url, t_model, t_headers
else:
# Fall back to default model if no task/utility model configured
settings = _load_settings()
ep_id = settings.get("default_endpoint_id", "")
default_model = settings.get("default_model", "")
if ep_id:
db = SessionLocal()
try:
ep = db.query(ModelEndpoint).filter(
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
).first()
if ep:
base = _normalize_base(ep.base_url)
endpoint_url = build_chat_url(base)
model = default_model
if not model and ep.models:
try:
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
if models:
model = models[0]
except Exception:
pass
if ep.api_key:
headers = {"Authorization": f"Bearer {ep.api_key}"}
finally:
db.close()
# Fall back to session model if no default configured
if not endpoint_url and session:
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, _owner(request))
endpoint_url = sess.endpoint_url
model = sess.model
headers = sess.headers
except KeyError:
pass
# Fall back to session model if no default configured
if not endpoint_url and session:
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, _owner(request))
endpoint_url = sess.endpoint_url
model = sess.model
headers = sess.headers
except KeyError:
pass
if not endpoint_url or not model:
raise HTTPException(400, "No default model configured — set one in Settings")
@@ -360,13 +373,13 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, _owner(request))
endpoint_url = sess.endpoint_url
model = sess.model
headers = sess.headers
endpoint_url, model, headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
)
except KeyError:
raise HTTPException(404, "Session not found — needed for LLM config")
else:
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
if not endpoint_url or not model:
raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
+8 -12
View File
@@ -265,27 +265,23 @@ def resolve_endpoint(
ep_id = _stg(f"{setting_prefix}_endpoint_id")
model = _stg(f"{setting_prefix}_model")
# If the specific endpoint is not configured, but the caller provided a
# Fall back to utility model for task/research/auto-naming if not specifically configured.
if not ep_id and setting_prefix not in ("utility", "default"):
ep_id = _stg("utility_endpoint_id")
model = _stg("utility_model")
# If the endpoint is STILL not configured, but the caller provided a
# valid fallback (e.g. the active session model), use that immediately.
# This prevents background tasks from jumping to the global default_model
# when the user is mid-conversation with a different model.
if not ep_id and fallback_url and fallback_model:
return fallback_url, fallback_model, fallback_headers
# Unset Utility means "same as Default Chat Model".
if setting_prefix == "utility" and not ep_id:
# Unset Utility (or anything else that didn't have a fallback) means "same as Default Chat Model".
if not ep_id:
ep_id = _stg("default_endpoint_id")
model = _stg("default_model")
# Fall back to utility model for task/research/auto-naming if not specifically configured.
# If Utility itself is unset, the block above makes that resolve to Default Chat.
if not ep_id and setting_prefix != "utility":
ep_id = _stg("utility_endpoint_id")
model = _stg("utility_model")
if not ep_id:
ep_id = _stg("default_endpoint_id")
model = _stg("default_model")
if not ep_id:
return fallback_url, fallback_model, fallback_headers