fix(research): track analyzed URLs separately (#3125)

Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
2026-06-17 02:05:22 -04:00 · 2026-06-10 07:08:22 -04:00
parent 3e49658204
commit 725d174243
4 changed files with 139 additions and 4 deletions
@@ -45,6 +45,20 @@ async def test_search_and_extract_respects_extraction_concurrency():
    assert researcher.max_active == 2


+@pytest.mark.asyncio
+async def test_search_and_extract_tracks_all_urls_selected_for_analysis():
+    researcher = _ControlledResearcher(extraction_concurrency=2, max_urls_per_round=2)
+    researcher._start_time = time.time()
+
+    findings = await researcher._search_and_extract(["a"], "question")
+
+    assert len(findings) == 2
+    assert researcher.analyzed_urls == [
+        {"url": "https://example.test/a/0", "title": "a-0"},
+        {"url": "https://example.test/a/1", "title": "a-1"},
+    ]
+
+
@pytest.mark.asyncio
 async def test_fetch_and_extract_uses_configured_timeout(monkeypatch):
    captured = {}