mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-22 04:35:29 -04:00
Clear remaining CodeQL path and parser alerts
This commit is contained in:
+30
-9
@@ -1645,13 +1645,12 @@ def setup_email_routes():
|
||||
return {"error": f"Attachment index {index} not found"}
|
||||
|
||||
from pathlib import Path as _Path
|
||||
target_root = _Path(target_dir).resolve()
|
||||
filepath = _Path(filepath).resolve()
|
||||
try:
|
||||
filepath.relative_to(target_root)
|
||||
except ValueError:
|
||||
target_root = os.path.abspath(str(target_dir))
|
||||
filepath_str = os.path.abspath(str(filepath))
|
||||
if os.path.commonpath([target_root, filepath_str]) != target_root:
|
||||
logger.warning("Rejected attachment path outside extraction dir: %s", filepath)
|
||||
return {"error": "Invalid attachment path"}
|
||||
filepath = _Path(filepath_str)
|
||||
base = _Path(filepath).name
|
||||
if base.startswith("."):
|
||||
return {"error": "Invalid filename", "filename": base}
|
||||
@@ -1728,8 +1727,7 @@ def setup_email_routes():
|
||||
_tag_doc_with_source(doc_id)
|
||||
return doc_id
|
||||
|
||||
def _attached_email_markdown(path):
|
||||
raw_bytes = path.read_bytes()
|
||||
def _attached_email_markdown(raw_bytes: bytes):
|
||||
if not raw_bytes:
|
||||
return f"# Attached email: {base}\n\n_(empty email attachment)_"
|
||||
try:
|
||||
@@ -1814,9 +1812,32 @@ def setup_email_routes():
|
||||
|
||||
# ── Attached email (.eml / message/rfc822) ────────────────
|
||||
if ext == ".eml":
|
||||
def _attachment_bytes_from_msg():
|
||||
if not msg.is_multipart():
|
||||
return b""
|
||||
idx = 0
|
||||
for part in msg.walk():
|
||||
cd = str(part.get("Content-Disposition", ""))
|
||||
ct = part.get_content_type()
|
||||
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
|
||||
if part.is_multipart() and not is_attached_email:
|
||||
continue
|
||||
if ct in ("text/plain", "text/html") and "attachment" not in cd:
|
||||
continue
|
||||
if idx == index:
|
||||
payload = part.get_payload(decode=True)
|
||||
if payload is None and ct == "message/rfc822":
|
||||
try:
|
||||
payload = part.as_bytes()
|
||||
except Exception:
|
||||
payload = b""
|
||||
return payload or b""
|
||||
idx += 1
|
||||
return b""
|
||||
|
||||
try:
|
||||
content = _attached_email_markdown(filepath)
|
||||
except Exception as e:
|
||||
content = _attached_email_markdown(_attachment_bytes_from_msg())
|
||||
except Exception:
|
||||
logger.exception("Failed to read email attachment %s", base)
|
||||
return {"error": "Failed to read email attachment", "filename": base}
|
||||
doc_id = _create_markdown_doc(content, "Imported attached email")
|
||||
|
||||
+23
-1
@@ -545,6 +545,28 @@ def _strip_stepfun_tool_markup(text: str) -> str:
|
||||
return cleaned.replace(_STEPFUN_CALLS_BEGIN, "").replace(_STEPFUN_CALLS_END, "")
|
||||
|
||||
|
||||
def _strip_bare_invoke_markup(text: str) -> str:
|
||||
"""Remove bare <invoke ...>...</invoke> blocks without regex backtracking."""
|
||||
out = []
|
||||
pos = 0
|
||||
while True:
|
||||
start = text.lower().find("<invoke", pos)
|
||||
if start < 0:
|
||||
out.append(text[pos:])
|
||||
break
|
||||
tag_end = text.find(">", start)
|
||||
if tag_end < 0:
|
||||
out.append(text[pos:])
|
||||
break
|
||||
close = text.lower().find("</invoke>", tag_end + 1)
|
||||
if close < 0:
|
||||
out.append(text[pos:])
|
||||
break
|
||||
out.append(text[pos:start])
|
||||
pos = close + len("</invoke>")
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _parse_stepfun_tool_call(tool_name: str, body: str) -> Optional[ToolBlock]:
|
||||
"""Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
|
||||
tool_name = tool_name.lower().replace("-", "_").replace(".", "_")
|
||||
@@ -780,6 +802,6 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
|
||||
_, (start, end) = raw_web_json
|
||||
cleaned = cleaned[:start] + cleaned[end:]
|
||||
# Strip bare <invoke> blocks not wrapped in <tool_call>
|
||||
cleaned = _XML_INVOKE_RE.sub('', cleaned)
|
||||
cleaned = _strip_bare_invoke_markup(cleaned)
|
||||
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
Reference in New Issue
Block a user