fix(research): track analyzed URLs separately (#3125)

Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
This commit is contained in:
ooovenenoso
2026-06-10 07:08:22 -04:00
committed by GitHub
parent 3e49658204
commit 725d174243
4 changed files with 139 additions and 4 deletions
@@ -45,6 +45,20 @@ async def test_search_and_extract_respects_extraction_concurrency():
assert researcher.max_active == 2
@pytest.mark.asyncio
async def test_search_and_extract_tracks_all_urls_selected_for_analysis():
researcher = _ControlledResearcher(extraction_concurrency=2, max_urls_per_round=2)
researcher._start_time = time.time()
findings = await researcher._search_and_extract(["a"], "question")
assert len(findings) == 2
assert researcher.analyzed_urls == [
{"url": "https://example.test/a/0", "title": "a-0"},
{"url": "https://example.test/a/1", "title": "a-1"},
]
@pytest.mark.asyncio
async def test_fetch_and_extract_uses_configured_timeout(monkeypatch):
captured = {}