mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 15:45:22 -04:00
c01c09559a
Wrap blocking _resolve_model calls in asyncio.to_thread across async model interaction paths so endpoint/model resolution does not stall the event loop. Preserve owner-scoped resolution and add focused regression coverage.
62 lines
2.3 KiB
Python
62 lines
2.3 KiB
Python
"""Issue #4589 — _resolve_model does a blocking httpx.get, so calling it
|
|
directly from an async handler stalls the whole event loop for the duration of
|
|
the probe. The async call sites now wrap it in asyncio.to_thread.
|
|
|
|
do_pipeline is used as the representative handler: _resolve_model is the first
|
|
real work it does, and a ValueError returns early before any LLM call, so these
|
|
tests drive the offload path without a live model endpoint.
|
|
"""
|
|
|
|
import asyncio
|
|
import threading
|
|
import time
|
|
|
|
import src.ai_interaction as ai
|
|
|
|
|
|
async def test_do_pipeline_resolves_model_off_the_event_loop(monkeypatch):
|
|
# A deliberately blocking _resolve_model that records how many copies run
|
|
# at once. If it ran on the event loop, the first call would block the loop
|
|
# and the second could not start — peak concurrency would be 1.
|
|
state = {"active": 0, "peak": 0}
|
|
lock = threading.Lock()
|
|
|
|
def slow_resolve(spec, owner=None):
|
|
with lock:
|
|
state["active"] += 1
|
|
state["peak"] = max(state["peak"], state["active"])
|
|
time.sleep(0.2)
|
|
with lock:
|
|
state["active"] -= 1
|
|
raise ValueError("no such model") # early-return path, no LLM call
|
|
|
|
monkeypatch.setattr(ai, "_resolve_model", slow_resolve)
|
|
|
|
content = '[{"model": "m", "instruction": "go"}]'
|
|
results = await asyncio.gather(
|
|
ai.do_pipeline(content, owner="u"),
|
|
ai.do_pipeline(content, owner="u"),
|
|
)
|
|
|
|
assert all("error" in r for r in results)
|
|
assert state["peak"] == 2, "resolutions did not overlap — call still blocks the loop"
|
|
|
|
|
|
async def test_do_pipeline_uses_offloaded_resolution_result(monkeypatch):
|
|
# The offload must also return the resolved tuple, not just propagate errors.
|
|
monkeypatch.setattr(
|
|
ai, "_resolve_model",
|
|
lambda spec, owner=None: ("http://x/v1/chat/completions", "resolved-model", {}),
|
|
)
|
|
|
|
async def fake_llm(url, model, messages, **kwargs):
|
|
return f"output from {model}"
|
|
|
|
monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm)
|
|
|
|
result = await ai.do_pipeline('[{"model": "m", "instruction": "go"}]', owner="u")
|
|
|
|
assert "error" not in result, result
|
|
# The model the offloaded _resolve_model returned made it through to the call.
|
|
assert "resolved-model" in str(result)
|