fix(tools): strict path confinement with sensitive-subpath deny list (#1072)

Rework read_file / write_file confinement after review feedback:

- Remove $HOME from default allow roots. Only project data/ and system
  temp dirs are allowed out of the box.
- Add a sensitive-subpath deny list (.ssh, .gnupg, shell rc files,
  .env, .netrc, SSH key filenames). Checked BEFORE allowlist so it
  blocks even when a broader root is configured.
- Add "tool_path_extra_roots" setting for opt-in broader access.
- Sensitive subpaths remain blocked regardless of configured roots.

Tests: 24 cases covering /etc/shadow, ~/.ssh/authorized_keys,
symlink into .ssh, traversal, shell rc files, key filenames,
extra roots, and dispatch-level end-to-end.
This commit is contained in:
Ernest Hysa
2026-06-02 15:13:30 +01:00
committed by GitHub
parent 16f7feee0a
commit c12ae79c42
3 changed files with 434 additions and 7 deletions
+147 -7
View File
@@ -21,6 +21,143 @@ from src.tool_security import is_public_blocked_tool, owner_is_admin_or_single_u
MAX_OUTPUT_CHARS = 10_000
MAX_READ_CHARS = 20_000
# ---------------------------------------------------------------------------
# Path confinement for read_file / write_file
# ---------------------------------------------------------------------------
# read_file + write_file are admin-only tools, but the path the agent
# supplies is model-controlled. Prompt-injection in an admin's chat can
# weaponise "read /etc/shadow" or "write ~/.ssh/authorized_keys" without
# the admin noticing.
#
# Policy:
# 1. Sensitive-subpath deny list — checked FIRST. Blocks .ssh,
# .gnupg, shell rc files, token/env files even if the root above
# them is on the allowlist.
# 2. Allowlist — only the directories the agent legitimately needs
# (project data/, system tmp). $HOME is NOT on the default list.
# 3. Opt-in extra roots — admin can add broader roots via the
# "tool_path_extra_roots" setting (list of path strings).
# ---------------------------------------------------------------------------
_SENSITIVE_BASENAMES: set[str] = {
".ssh", ".gnupg", ".gitconfig",
".bashrc", ".bash_profile", ".bash_logout",
".zshrc", ".zprofile", ".zshenv",
".profile", ".tcshrc", ".cshrc",
".env", ".netrc",
}
_SENSITIVE_FILE_PATTERNS: tuple[str, ...] = (
"authorized_keys", "id_rsa", "id_ed25519", "id_ecdsa",
"known_hosts",
)
def _is_sensitive_path(resolved: str) -> bool:
"""Return True if *resolved* falls under a sensitive directory or
matches a sensitive filename — regardless of what root it sits under.
"""
parts = resolved.split(os.sep)
filenames: set[str] = {parts[-1]} if parts else set()
# Check if any path component is a sensitive directory.
for part in parts:
if part in _SENSITIVE_BASENAMES:
return True
# Check filename against known sensitive files.
for pat in _SENSITIVE_FILE_PATTERNS:
if pat in filenames:
return True
return False
def _tool_path_roots() -> list[str]:
"""Return the list of directory roots that read_file / write_file
may touch. Default: project data/ + system temp dirs. Extra roots
are loaded from the ``tool_path_extra_roots`` setting.
"""
roots: list[str] = []
# Project data directory — the agent's primary workspace.
from src.constants import DATA_DIR
roots.append(DATA_DIR)
# /tmp (and its macOS realpath /private/tmp).
roots.append("/tmp")
try:
private_tmp = os.path.realpath("/tmp")
if private_tmp != "/tmp":
roots.append(private_tmp)
except OSError:
pass
# $TMPDIR — per-user temp root on macOS (e.g. /var/folders/.../T/).
tmpdir = os.environ.get("TMPDIR")
if tmpdir:
roots.append(tmpdir)
# Opt-in extra roots from settings.
try:
from src.settings import get_setting
extra = get_setting("tool_path_extra_roots")
if isinstance(extra, list):
roots.extend(str(r) for r in extra if r)
except Exception:
pass
# Deduplicate; resolve symlinks so containment is unambiguous.
seen: set[str] = set()
out: list[str] = []
for r in roots:
try:
real = os.path.realpath(r)
except OSError:
continue
if real in seen:
continue
seen.add(real)
out.append(real)
return out
def _resolve_tool_path(raw_path: str) -> str:
"""Resolve and confine a model-supplied path.
Order of checks:
1. Non-empty path.
2. Sensitive-subpath deny list (blocks .ssh, .gnupg, etc.
even when the root is on the allowlist).
3. Allowlist containment (must land under one of the roots).
Returns the realpath on success. Raises ValueError on rejection.
Symlinks are resolved before comparison.
"""
if raw_path is None or not str(raw_path).strip():
raise ValueError("path is required")
expanded = os.path.expanduser(str(raw_path).strip())
resolved = os.path.realpath(expanded)
if _is_sensitive_path(resolved):
raise ValueError(
f"path '{raw_path}' is inside a sensitive directory "
f"(e.g. .ssh, .gnupg) or matches a sensitive filename"
)
for root in _tool_path_roots():
if resolved == root:
return resolved
try:
common = os.path.commonpath([resolved, root])
except ValueError:
continue
if common == root:
return resolved
raise ValueError(
f"path '{raw_path}' is outside the allowed roots"
)
# Bash + python tools used to share a single 60s timeout. That's
# enough for one-shot commands but starves real workloads (pip
# install, ffmpeg conversions, etc.) — and worse, the agent saw the
@@ -375,9 +512,11 @@ async def _direct_fallback(
return {"output": output or "(no output)", "exit_code": rc or 0}
if tool == "read_file":
path = os.path.expanduser(content.split("\n", 1)[0].strip())
if not path:
return {"error": "read_file: path required", "exit_code": 1}
raw_path = content.split("\n", 1)[0].strip()
try:
path = _resolve_tool_path(raw_path)
except ValueError as e:
return {"error": f"read_file: {e}", "exit_code": 1}
try:
# Run blocking read in a thread to keep the loop responsive
def _read():
@@ -397,13 +536,14 @@ async def _direct_fallback(
if tool == "write_file":
lines = content.split("\n", 1)
path = os.path.expanduser(lines[0].strip())
raw_path = lines[0].strip()
body = lines[1] if len(lines) > 1 else ""
if not path:
return {"error": "write_file: path required", "exit_code": 1}
try:
path = _resolve_tool_path(raw_path)
except ValueError as e:
return {"error": f"write_file: {e}", "exit_code": 1}
try:
def _write():
import os
d = os.path.dirname(path)
if d:
os.makedirs(d, exist_ok=True)