mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix(research): track analyzed URLs separately (#3125)
Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
This commit is contained in:
@@ -285,6 +285,7 @@ class ResearchHandler:
|
||||
query, report, stats, elapsed,
|
||||
findings=researcher.findings,
|
||||
evolving_report=researcher.evolving_report,
|
||||
analyzed_urls=getattr(researcher, "analyzed_urls", None),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -331,7 +332,8 @@ class ResearchHandler:
|
||||
|
||||
def _format_research_report(
|
||||
self, query: str, full_report: str, stats: dict, elapsed: float,
|
||||
findings: list = None, evolving_report: str = None,
|
||||
findings: Optional[list] = None, evolving_report: Optional[str] = None,
|
||||
analyzed_urls: Optional[list] = None,
|
||||
) -> str:
|
||||
"""Format research report with sources list and expandable raw findings."""
|
||||
summary_lines = [
|
||||
@@ -342,20 +344,34 @@ class ResearchHandler:
|
||||
]
|
||||
summary_text = " | ".join(summary_lines)
|
||||
|
||||
# Build sources list with clickable links
|
||||
# Build sources list with clickable links. Keep the curated Sources
|
||||
# section filtered for citation quality, but also list every unique URL
|
||||
# the research run inspected so the "URLs Analyzed" count is auditable.
|
||||
sources_section = ""
|
||||
if findings:
|
||||
analyzed_urls_section = ""
|
||||
url_items = analyzed_urls if analyzed_urls is not None else findings
|
||||
if findings or url_items:
|
||||
seen_urls = set()
|
||||
source_lines = []
|
||||
for f in findings:
|
||||
analyzed_seen = set()
|
||||
analyzed_lines = []
|
||||
for f in findings or []:
|
||||
url = f.get("url", "")
|
||||
title = f.get("title", "") or url
|
||||
summary = f.get("summary", "") or f.get("evidence", "")
|
||||
if url and url not in seen_urls and not is_low_quality(summary):
|
||||
seen_urls.add(url)
|
||||
source_lines.append(f"- [{title}]({url})")
|
||||
for item in url_items or []:
|
||||
url = item.get("url", "")
|
||||
title = item.get("title", "") or url
|
||||
if url and url not in analyzed_seen:
|
||||
analyzed_seen.add(url)
|
||||
analyzed_lines.append(f"{len(analyzed_lines) + 1}. [{title}]({url})")
|
||||
if source_lines:
|
||||
sources_section = "\n### Sources\n\n" + "\n".join(source_lines) + "\n"
|
||||
if analyzed_lines:
|
||||
analyzed_urls_section = "\n### Analyzed URLs\n\n" + "\n".join(analyzed_lines) + "\n"
|
||||
|
||||
# Build raw findings section (individual extractions per source)
|
||||
raw_findings_section = ""
|
||||
@@ -391,6 +407,7 @@ class ResearchHandler:
|
||||
{full_report}
|
||||
|
||||
{sources_section}
|
||||
{analyzed_urls_section}
|
||||
{collected_section}
|
||||
---
|
||||
|
||||
|
||||
Reference in New Issue
Block a user