fix(cookbook): allow spaces and non-ASCII characters in model directory paths (#3473)

* fix(cookbook): allow spaces in model directory paths

Allow POSIX external-drive paths and Windows drive paths with spaces while keeping shell metacharacters rejected.

* fix(cookbook): also allow non-ASCII (Unicode) characters in model dir paths

The ASCII-only allowlist that rejected spaces also rejected Cyrillic,
accented Latin and CJK folder names (e.g. /Volumes/Модели,
D:\AI Models\Модели) with 400 Invalid local_dir. Switch the path
character class from [A-Za-z0-9._ -] to [\w. -] (\w is Unicode-aware on
Python 3 str patterns) so localized folder names validate, while shell
metacharacters (; & | ` $ quotes newlines) stay rejected.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* fix(cookbook): reject local_dir path segments starting with '-'

The local_dir allowlist includes '-', so a directory like /models/-rf
(or D:\models\-rf) could be parsed as a CLI flag by hf/etc. (option
injection) — and quoting does not stop a value from being read as an
option. Guard against it inside the validator so the safety stays fully
self-contained there rather than depending on consumers' quoting.

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Disorder AA
2026-06-09 07:58:38 +02:00
committed by GitHub
parent 8ae2b5f58c
commit d9141c6e56
2 changed files with 109 additions and 6 deletions
+24 -6
View File
@@ -42,9 +42,16 @@ _SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
_SSH_PORT_RE = re.compile(r"^\d{1,5}$") _SSH_PORT_RE = re.compile(r"^\d{1,5}$")
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$") _GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
# A download target directory. Absolute or ~-relative path; safe path glyphs # A download target directory. Absolute or ~-relative path; safe path glyphs
# only (no quotes, shell metacharacters, or spaces) since it lands in a shell # only (no quotes or shell metacharacters). Spaces are allowed because command
# command. A leading ~ is expanded to $HOME at command-build time. # builders pass the value through quoted shell/Python contexts. The character
_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$") # class uses ``\w`` — Unicode word characters under Python 3's default str
# matching — so non-ASCII folder names pass validation too: Cyrillic, accented
# Latin, CJK, e.g. ``/Volumes/Модели`` or ``D:\AI Models\Модели``. This stays
# shell-safe: none of ``; & | ` $ '' "" () {}`` newlines etc. are in ``[\w. -]``,
# so injection vectors remain rejected. A leading ~ is expanded to $HOME at
# command-build time. (Drive letters stay ASCII: ``[A-Za-z]:``.)
_LOCAL_DIR_RE = re.compile(r"^~?(?:/[\w. -]*)+$|^~$")
_WINDOWS_LOCAL_DIR_RE = re.compile(r"^[A-Za-z]:[\\/](?:[\w. -]+(?:[\\/][\w. -]+)*[\\/]?)?$")
_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]") _WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]")
@@ -97,9 +104,19 @@ def _validate_token(v: str | None) -> str | None:
def _validate_local_dir(v: str | None) -> str | None: def _validate_local_dir(v: str | None) -> str | None:
if v is None or v == "": if v is None or v == "":
return None return None
if len(v) >= 2 and v[0] == v[-1] and v[0] in {"'", '"'}:
v = v[1:-1]
v = v.rstrip("/") or "/" v = v.rstrip("/") or "/"
if not _LOCAL_DIR_RE.match(v): if not (_LOCAL_DIR_RE.match(v) or _WINDOWS_LOCAL_DIR_RE.match(v)):
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters") raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no shell metacharacters")
# Reject path segments that start with '-' (option injection). '-' is in the
# allowlist, so a dir like ``/models/-rf`` or ``D:\models\-rf`` could be read
# as a CLI flag by hf/etc. — and quoting does NOT stop a value from being
# parsed as an option. This is the one residual that command-build-time
# quoting can't cover, so the guard lives here, keeping the safety wholly
# inside the validator rather than relying on consumers.
if any(seg.startswith("-") for seg in re.split(r"[\\/]", v) if seg):
raise HTTPException(400, "Invalid local_dir — path segments cannot start with '-'")
return v return v
@@ -125,7 +142,7 @@ def _validate_gpus(v: str | None) -> str | None:
def _shell_path(p: str) -> str: def _shell_path(p: str) -> str:
"""Render a validated path for a double-quoted shell context, expanding a """Render a validated path for a double-quoted shell context, expanding a
leading ~ to $HOME (single quotes wouldn't expand it). Safe because leading ~ to $HOME (single quotes wouldn't expand it). Safe because
_validate_local_dir already restricts the charset.""" _validate_local_dir already rejects quotes and shell metacharacters."""
if p == "~": if p == "~":
return '"$HOME"' return '"$HOME"'
if p.startswith("~/"): if p.startswith("~/"):
@@ -386,6 +403,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
" for root, dirs, fns in safe_walk(base):", " for root, dirs, fns in safe_walk(base):",
" for fn in sorted(fns):", " for fn in sorted(fns):",
" if not fn.lower().endswith('.gguf'): continue", " if not fn.lower().endswith('.gguf'): continue",
" if fn.startswith('._'): continue # macOS AppleDouble sidecar, not a real GGUF",
" fp = os.path.join(root, fn)", " fp = os.path.join(root, fn)",
" try: size = os.path.getsize(fp)", " try: size = os.path.getsize(fp)",
" except Exception: size = 0", " except Exception: size = 0",
+85
View File
@@ -22,10 +22,12 @@ from routes.cookbook_helpers import (
_user_shell_path_bootstrap, _user_shell_path_bootstrap,
_venv_safe_local_pip_install_cmd, _venv_safe_local_pip_install_cmd,
_validate_gpus, _validate_gpus,
_validate_local_dir,
_validate_repo_id, _validate_repo_id,
_validate_serve_cmd, _validate_serve_cmd,
_validate_serve_model_id, _validate_serve_model_id,
_validate_ssh_port, _validate_ssh_port,
_shell_path,
run_ssh_command_async, run_ssh_command_async,
) )
@@ -110,6 +112,89 @@ def test_validate_ssh_port_rejects_shell_payload():
assert _validate_ssh_port("2222") == "2222" assert _validate_ssh_port("2222") == "2222"
def test_validate_local_dir_accepts_external_drive_paths_with_spaces():
path = "/Volumes/T7 2TB/AI Models/llamacpp"
assert _validate_local_dir(path) == path
assert _validate_local_dir(f'"{path}"') == path
assert _shell_path(f"{path}/Qwen3-8B") == '"/Volumes/T7 2TB/AI Models/llamacpp/Qwen3-8B"'
def test_validate_local_dir_accepts_windows_drive_paths_with_spaces():
backslash_path = r"D:\AI Models\llamacpp"
slash_path = "D:/AI Models/llamacpp"
assert _validate_local_dir(backslash_path) == backslash_path
assert _validate_local_dir(f"'{backslash_path}'") == backslash_path
assert _validate_local_dir(slash_path) == slash_path
assert _shell_path(backslash_path + r"\Qwen3-8B") == '"D:\\AI Models\\llamacpp\\Qwen3-8B"'
def test_validate_local_dir_still_rejects_shell_metacharacters():
for path in [
"/Volumes/T7 2TB/AI Models; touch /tmp/pwned",
"/Volumes/T7 2TB/AI Models/$(touch pwned)",
"/Volumes/T7 2TB/AI Models/`touch pwned`",
"/Volumes/T7 2TB/AI Models/model\nnext",
]:
with pytest.raises(HTTPException):
_validate_local_dir(path)
def test_validate_local_dir_rejects_windows_shell_metacharacters():
for path in [
r"D:\AI Models\llamacpp; touch C:\pwned",
r"D:\AI Models\llamacpp\$(touch pwned)",
r"D:\AI Models\llamacpp\`touch pwned`",
"D:\\AI Models\\llamacpp\nnext",
]:
with pytest.raises(HTTPException):
_validate_local_dir(path)
def test_validate_local_dir_accepts_non_ascii_unicode_paths():
# Folder names are routinely non-ASCII on localized systems; the validator
# must accept them the same way it accepts spaces (see issue: spaces AND
# non-ASCII chars were both rejected by the old ASCII-only allowlist).
for path in [
"/Volumes/Модели/llamacpp", # Cyrillic (POSIX / external drive)
"/home/josé/models", # accented Latin
"/Volumes/モデル/llm", # CJK
r"D:\AI Models\Модели", # Cyrillic (Windows drive path)
]:
assert _validate_local_dir(path) == path
def test_validate_local_dir_rejects_metacharacters_in_unicode_paths():
# Widening the allowlist to Unicode must not reopen the injection surface:
# shell metacharacters stay rejected even alongside non-ASCII segments.
for path in [
"/Volumes/Модели; touch /tmp/pwned",
"/Volumes/Модели/$(touch pwned)",
"/Volumes/Модели/`touch pwned`",
"/Volumes/Модели/a|b",
"/Volumes/Модели\nnext",
r"D:\Модели\llamacpp & calc.exe",
]:
with pytest.raises(HTTPException):
_validate_local_dir(path)
def test_validate_local_dir_rejects_leading_dash_segments():
# A path segment starting with '-' could be parsed as a CLI option by hf/etc.
# (option injection) even when quoted, since quoting doesn't stop a value from
# being read as a flag. The validator must reject it on every platform.
for path in [
"/models/-rf",
"/models/-rf/llamacpp",
"/-oStrictHostKeyChecking=no",
r"D:\models\-rf",
"D:/models/-rf",
]:
with pytest.raises(HTTPException):
_validate_local_dir(path)
def test_validate_gpus_accepts_indexes_only(): def test_validate_gpus_accepts_indexes_only():
assert _validate_gpus("0,1,2") == "0,1,2" assert _validate_gpus("0,1,2") == "0,1,2"
with pytest.raises(HTTPException): with pytest.raises(HTTPException):