fix(ai): offload model resolution from async paths

Wrap blocking _resolve_model calls in asyncio.to_thread across async model interaction paths so endpoint/model resolution does not stall the event loop. Preserve owner-scoped resolution and add focused regression coverage.
2026-06-28 15:45:22 -04:00 · 2026-06-28 05:18:35 +05:30
parent 8b110c28e6
commit c01c09559a
8 changed files with 80 additions and 14 deletions
@@ -25,9 +25,10 @@ def test_model_listing_and_image_fallback_are_owner_scoped():

    assert "owner: Optional[str] = None" in list_body
    assert "owner_filter(query, ModelEndpoint, owner)" in list_body
-    assert "_resolve_model(candidate, owner=owner)" in image_body
+    # _resolve_model is offloaded to a worker thread (#4589) but stays owner-scoped.
+    assert "asyncio.to_thread(_resolve_model, candidate, owner=owner)" in image_body
    assert "owner_filter(_img_q, ModelEndpoint, owner)" in image_body
-    assert "_resolve_model(model_spec, owner=owner)" in image_body
+    assert "asyncio.to_thread(_resolve_model, model_spec, owner=owner)" in image_body


 # chat_with_model, list_models and ask_teacher moved to the registry (#3629)
@@ -0,0 +1,61 @@
+"""Issue #4589 — _resolve_model does a blocking httpx.get, so calling it
+directly from an async handler stalls the whole event loop for the duration of
+the probe. The async call sites now wrap it in asyncio.to_thread.
+
+do_pipeline is used as the representative handler: _resolve_model is the first
+real work it does, and a ValueError returns early before any LLM call, so these
+tests drive the offload path without a live model endpoint.
+"""
+
+import asyncio
+import threading
+import time
+
+import src.ai_interaction as ai
+
+
+async def test_do_pipeline_resolves_model_off_the_event_loop(monkeypatch):
+    # A deliberately blocking _resolve_model that records how many copies run
+    # at once. If it ran on the event loop, the first call would block the loop
+    # and the second could not start — peak concurrency would be 1.
+    state = {"active": 0, "peak": 0}
+    lock = threading.Lock()
+
+    def slow_resolve(spec, owner=None):
+        with lock:
+            state["active"] += 1
+            state["peak"] = max(state["peak"], state["active"])
+        time.sleep(0.2)
+        with lock:
+            state["active"] -= 1
+        raise ValueError("no such model")  # early-return path, no LLM call
+
+    monkeypatch.setattr(ai, "_resolve_model", slow_resolve)
+
+    content = '[{"model": "m", "instruction": "go"}]'
+    results = await asyncio.gather(
+        ai.do_pipeline(content, owner="u"),
+        ai.do_pipeline(content, owner="u"),
+    )
+
+    assert all("error" in r for r in results)
+    assert state["peak"] == 2, "resolutions did not overlap — call still blocks the loop"
+
+
+async def test_do_pipeline_uses_offloaded_resolution_result(monkeypatch):
+    # The offload must also return the resolved tuple, not just propagate errors.
+    monkeypatch.setattr(
+        ai, "_resolve_model",
+        lambda spec, owner=None: ("http://x/v1/chat/completions", "resolved-model", {}),
+    )
+
+    async def fake_llm(url, model, messages, **kwargs):
+        return f"output from {model}"
+
+    monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm)
+
+    result = await ai.do_pipeline('[{"model": "m", "instruction": "go"}]', owner="u")
+
+    assert "error" not in result, result
+    # The model the offloaded _resolve_model returned made it through to the call.
+    assert "resolved-model" in str(result)