mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-20 19:55:25 -04:00
Constrain research handler JSON paths (#2846)
This commit is contained in:
+47
-10
@@ -20,6 +20,7 @@ from src.research_utils import strip_thinking, is_low_quality
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
RESEARCH_DATA_DIR = Path("data/deep_research")
|
||||
_RESEARCH_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9-]{1,128}$")
|
||||
|
||||
|
||||
def _bounded_int(value, *, default: int, minimum: int, maximum: int) -> int:
|
||||
@@ -48,6 +49,18 @@ def _format_probe_failure(model: str, exc: Exception) -> str:
|
||||
return f"Cannot reach model '{model}' — check that the endpoint is running and accessible."
|
||||
|
||||
|
||||
def _research_json_path(session_id: str) -> Optional[Path]:
|
||||
if not isinstance(session_id, str) or not _RESEARCH_SESSION_ID_RE.fullmatch(session_id):
|
||||
return None
|
||||
root = RESEARCH_DATA_DIR.resolve()
|
||||
path = (RESEARCH_DATA_DIR / f"{session_id}.json").resolve()
|
||||
try:
|
||||
path.relative_to(root)
|
||||
except ValueError:
|
||||
return None
|
||||
return path
|
||||
|
||||
|
||||
class ResearchHandler:
|
||||
"""Handles research service operations with iterative deep research."""
|
||||
|
||||
@@ -232,6 +245,9 @@ class ResearchHandler:
|
||||
max_rounds is the safety cap; the AI's _should_stop decision (after
|
||||
min_rounds) terminates the loop earlier in normal operation.
|
||||
"""
|
||||
if _research_json_path(session_id) is None:
|
||||
raise ValueError("Invalid research session_id")
|
||||
|
||||
# Resolve the hard wall-clock timeout from settings when the caller
|
||||
# didn't pin one. Local / edge models routinely need more than the
|
||||
# old 600s default to finish a deep-research synthesis. A setting of
|
||||
@@ -368,7 +384,9 @@ class ResearchHandler:
|
||||
result["avg_duration"] = round(avg, 1)
|
||||
return result
|
||||
# Check disk for completed research (skip consumed results)
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
return None
|
||||
if path.exists():
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
@@ -407,7 +425,9 @@ class ResearchHandler:
|
||||
if entry["status"] in ("done", "error", "cancelled"):
|
||||
return entry.get("result")
|
||||
# Check disk (skip consumed results)
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
return None
|
||||
if path.exists():
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
@@ -429,7 +449,9 @@ class ResearchHandler:
|
||||
if researcher and researcher.findings:
|
||||
return self._extract_sources(researcher.findings)
|
||||
# Check disk
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
return None
|
||||
if path.exists():
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
@@ -446,7 +468,9 @@ class ResearchHandler:
|
||||
if researcher and researcher.findings:
|
||||
return self._extract_raw_findings(researcher.findings)
|
||||
# Check disk
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
return None
|
||||
if path.exists():
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
@@ -521,7 +545,9 @@ class ResearchHandler:
|
||||
Keeps the JSON on disk so visual reports can be generated later.
|
||||
"""
|
||||
self._active_tasks.pop(session_id, None)
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
return
|
||||
if path.exists():
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
@@ -533,6 +559,10 @@ class ResearchHandler:
|
||||
def _save_result(self, session_id: str, entry: dict):
|
||||
"""Persist completed research result to disk."""
|
||||
try:
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
logger.error("Refusing to save research result for invalid session_id: %r", session_id)
|
||||
return
|
||||
# Extract and cache sources + raw findings
|
||||
sources = []
|
||||
raw_findings = []
|
||||
@@ -542,7 +572,6 @@ class ResearchHandler:
|
||||
raw_findings = self._extract_raw_findings(researcher.findings)
|
||||
entry["sources"] = sources
|
||||
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
data = {
|
||||
"query": entry["query"],
|
||||
"status": entry["status"],
|
||||
@@ -569,7 +598,9 @@ class ResearchHandler:
|
||||
|
||||
def _get_session_json(self, session_id: str) -> Optional[dict]:
|
||||
"""Load the saved research JSON for a session, if it exists."""
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
return None
|
||||
if path.exists():
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
@@ -579,7 +610,9 @@ class ResearchHandler:
|
||||
|
||||
def get_report_html(self, session_id: str) -> Optional[str]:
|
||||
"""Generate the visual HTML report for a session (always fresh from JSON)."""
|
||||
json_path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
json_path = _research_json_path(session_id)
|
||||
if json_path is None:
|
||||
return None
|
||||
if not json_path.exists():
|
||||
logger.warning(f"No JSON found for visual report: {json_path}")
|
||||
return None
|
||||
@@ -606,7 +639,9 @@ class ResearchHandler:
|
||||
|
||||
def hide_image(self, session_id: str, image_url: str) -> bool:
|
||||
"""Add image_url to the persisted hidden_images list for a research."""
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
return False
|
||||
if not path.exists():
|
||||
return False
|
||||
try:
|
||||
@@ -624,7 +659,9 @@ class ResearchHandler:
|
||||
|
||||
def unhide_all_images(self, session_id: str) -> bool:
|
||||
"""Clear the hidden_images list for a research."""
|
||||
path = RESEARCH_DATA_DIR / f"{session_id}.json"
|
||||
path = _research_json_path(session_id)
|
||||
if path is None:
|
||||
return False
|
||||
if not path.exists():
|
||||
return False
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user