Background tasks: respect active session model fallback

This commit is contained in:
Tushar-Projects
2026-06-02 17:27:42 +05:30
committed by GitHub
parent 537b4bcff7
commit c3228f8b59
4 changed files with 55 additions and 6 deletions
+3
View File
@@ -149,6 +149,9 @@ async def auto_name_session(session_manager, sess):
t_url, t_model, t_headers = resolve_task_endpoint( t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, sess.endpoint_url, sess.model, sess.headers,
) )
if not t_model:
logger.debug("[auto-name] No model provided, skipping")
return
# max_tokens big enough that reasoning models (Minimax M2, # max_tokens big enough that reasoning models (Minimax M2,
# DeepSeek R1, QwQ, etc.) have headroom for <think>…</think> # DeepSeek R1, QwQ, etc.) have headroom for <think>…</think>
+25 -2
View File
@@ -235,6 +235,10 @@ async def extract_and_store(
Designed to run as a background task (asyncio.create_task). Designed to run as a background task (asyncio.create_task).
Errors are logged, never raised. Errors are logged, never raised.
""" """
if not endpoint_url or not model:
logger.debug("[memory-extract] No model or URL provided, skipping")
return
try: try:
from src.llm_core import llm_call_async from src.llm_core import llm_call_async
@@ -245,11 +249,30 @@ async def extract_and_store(
if len(recent) < 2: if len(recent) < 2:
return # Need at least a user message and assistant response return # Need at least a user message and assistant response
fallback_facts = _fallback_memory_candidates(recent) # Strip media (images/audio) from messages — background memory extraction
# only needs the text. The VL-generated descriptions are already in the
# text content of the messages. This avoids sending image tokens to
# non-vision models and prevents accidental "vision grounding" triggers.
stripped_recent = []
for msg in recent:
role = msg.get("role")
content = msg.get("content", "")
if isinstance(content, list):
# Filter out multimodal blocks that aren't text
text_only = [b for b in content if isinstance(b, dict) and b.get("type") == "text"]
if not text_only and content:
continue
content = text_only
stripped_recent.append({"role": role, "content": content})
if not stripped_recent:
return
fallback_facts = _fallback_memory_candidates(stripped_recent)
extraction_messages = [ extraction_messages = [
{"role": "system", "content": EXTRACT_SYSTEM_PROMPT}, {"role": "system", "content": EXTRACT_SYSTEM_PROMPT},
] + recent ] + stripped_recent
facts = [] facts = []
try: try:
+19 -1
View File
@@ -59,6 +59,10 @@ async def maybe_extract_skill(
owner: Optional[str] = None, owner: Optional[str] = None,
): ):
"""Extract a skill if the agent run was complex enough.""" """Extract a skill if the agent run was complex enough."""
if not model:
logger.debug("[skill-extract] No model provided, skipping")
return None
# Quiet by default; flip to DEBUG when chasing extractor issues. # Quiet by default; flip to DEBUG when chasing extractor issues.
logger.debug( logger.debug(
"[skill-extract] start: rounds=%d tools=%d model=%s owner=%s", "[skill-extract] start: rounds=%d tools=%d model=%s owner=%s",
@@ -78,9 +82,23 @@ async def maybe_extract_skill(
logger.debug("[skill-extract] no recent messages, skipping") logger.debug("[skill-extract] no recent messages, skipping")
return None return None
# Strip media (images/audio) from messages
stripped_recent = []
for msg in recent:
content = msg.get("content", "")
if isinstance(content, list):
text_only = [b for b in content if isinstance(b, dict) and b.get("type") == "text"]
if not text_only and content:
continue
content = text_only
stripped_recent.append({"role": msg.get("role"), "content": content})
if not stripped_recent:
return None
# Build conversation summary for extraction # Build conversation summary for extraction
conv_lines = [] conv_lines = []
for msg in recent: for msg in stripped_recent:
role = msg.get("role", "?") role = msg.get("role", "?")
content = msg.get("content", "") content = msg.get("content", "")
if isinstance(content, list): if isinstance(content, list):
+8 -3
View File
@@ -234,9 +234,14 @@ def resolve_endpoint(
ep_id = _stg(f"{setting_prefix}_endpoint_id") ep_id = _stg(f"{setting_prefix}_endpoint_id")
model = _stg(f"{setting_prefix}_model") model = _stg(f"{setting_prefix}_model")
# Unset Utility means "same as Default Chat Model". This keeps background # If the specific endpoint is not configured, but the caller provided a
# features usable out of the box and lets users override Utility only when # valid fallback (e.g. the active session model), use that immediately.
# they explicitly want a separate cheaper/faster model. # This prevents background tasks from jumping to the global default_model
# when the user is mid-conversation with a different model.
if not ep_id and fallback_url and fallback_model:
return fallback_url, fallback_model, fallback_headers
# Unset Utility means "same as Default Chat Model".
if setting_prefix == "utility" and not ep_id: if setting_prefix == "utility" and not ep_id:
ep_id = _stg("default_endpoint_id") ep_id = _stg("default_endpoint_id")
model = _stg("default_model") model = _stg("default_model")