Clear remaining CodeQL path and parser alerts

This commit is contained in:
pewdiepie-archdaemon
2026-06-22 02:45:05 +00:00
parent fbdec22dcb
commit 993d504de3
2 changed files with 53 additions and 10 deletions
+30 -9
View File
@@ -1645,13 +1645,12 @@ def setup_email_routes():
return {"error": f"Attachment index {index} not found"}
from pathlib import Path as _Path
target_root = _Path(target_dir).resolve()
filepath = _Path(filepath).resolve()
try:
filepath.relative_to(target_root)
except ValueError:
target_root = os.path.abspath(str(target_dir))
filepath_str = os.path.abspath(str(filepath))
if os.path.commonpath([target_root, filepath_str]) != target_root:
logger.warning("Rejected attachment path outside extraction dir: %s", filepath)
return {"error": "Invalid attachment path"}
filepath = _Path(filepath_str)
base = _Path(filepath).name
if base.startswith("."):
return {"error": "Invalid filename", "filename": base}
@@ -1728,8 +1727,7 @@ def setup_email_routes():
_tag_doc_with_source(doc_id)
return doc_id
def _attached_email_markdown(path):
raw_bytes = path.read_bytes()
def _attached_email_markdown(raw_bytes: bytes):
if not raw_bytes:
return f"# Attached email: {base}\n\n_(empty email attachment)_"
try:
@@ -1814,9 +1812,32 @@ def setup_email_routes():
# ── Attached email (.eml / message/rfc822) ────────────────
if ext == ".eml":
def _attachment_bytes_from_msg():
if not msg.is_multipart():
return b""
idx = 0
for part in msg.walk():
cd = str(part.get("Content-Disposition", ""))
ct = part.get_content_type()
is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
if part.is_multipart() and not is_attached_email:
continue
if ct in ("text/plain", "text/html") and "attachment" not in cd:
continue
if idx == index:
payload = part.get_payload(decode=True)
if payload is None and ct == "message/rfc822":
try:
payload = part.as_bytes()
except Exception:
payload = b""
return payload or b""
idx += 1
return b""
try:
content = _attached_email_markdown(filepath)
except Exception as e:
content = _attached_email_markdown(_attachment_bytes_from_msg())
except Exception:
logger.exception("Failed to read email attachment %s", base)
return {"error": "Failed to read email attachment", "filename": base}
doc_id = _create_markdown_doc(content, "Imported attached email")
+23 -1
View File
@@ -545,6 +545,28 @@ def _strip_stepfun_tool_markup(text: str) -> str:
return cleaned.replace(_STEPFUN_CALLS_BEGIN, "").replace(_STEPFUN_CALLS_END, "")
def _strip_bare_invoke_markup(text: str) -> str:
"""Remove bare <invoke ...>...</invoke> blocks without regex backtracking."""
out = []
pos = 0
while True:
start = text.lower().find("<invoke", pos)
if start < 0:
out.append(text[pos:])
break
tag_end = text.find(">", start)
if tag_end < 0:
out.append(text[pos:])
break
close = text.lower().find("</invoke>", tag_end + 1)
if close < 0:
out.append(text[pos:])
break
out.append(text[pos:start])
pos = close + len("</invoke>")
return "".join(out)
def _parse_stepfun_tool_call(tool_name: str, body: str) -> Optional[ToolBlock]:
"""Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
tool_name = tool_name.lower().replace("-", "_").replace(".", "_")
@@ -780,6 +802,6 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
_, (start, end) = raw_web_json
cleaned = cleaned[:start] + cleaned[end:]
# Strip bare <invoke> blocks not wrapped in <tool_call>
cleaned = _XML_INVOKE_RE.sub('', cleaned)
cleaned = _strip_bare_invoke_markup(cleaned)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
return cleaned.strip()