mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
fix(cookbook): allow spaces and non-ASCII characters in model directory paths (#3473)
* fix(cookbook): allow spaces in model directory paths Allow POSIX external-drive paths and Windows drive paths with spaces while keeping shell metacharacters rejected. * fix(cookbook): also allow non-ASCII (Unicode) characters in model dir paths The ASCII-only allowlist that rejected spaces also rejected Cyrillic, accented Latin and CJK folder names (e.g. /Volumes/Модели, D:\AI Models\Модели) with 400 Invalid local_dir. Switch the path character class from [A-Za-z0-9._ -] to [\w. -] (\w is Unicode-aware on Python 3 str patterns) so localized folder names validate, while shell metacharacters (; & | ` $ quotes newlines) stay rejected. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(cookbook): reject local_dir path segments starting with '-' The local_dir allowlist includes '-', so a directory like /models/-rf (or D:\models\-rf) could be parsed as a CLI flag by hf/etc. (option injection) — and quoting does not stop a value from being read as an option. Guard against it inside the validator so the safety stays fully self-contained there rather than depending on consumers' quoting. --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -42,9 +42,16 @@ _SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
|
|||||||
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
|
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
|
||||||
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
|
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
|
||||||
# A download target directory. Absolute or ~-relative path; safe path glyphs
|
# A download target directory. Absolute or ~-relative path; safe path glyphs
|
||||||
# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
|
# only (no quotes or shell metacharacters). Spaces are allowed because command
|
||||||
# command. A leading ~ is expanded to $HOME at command-build time.
|
# builders pass the value through quoted shell/Python contexts. The character
|
||||||
_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")
|
# class uses ``\w`` — Unicode word characters under Python 3's default str
|
||||||
|
# matching — so non-ASCII folder names pass validation too: Cyrillic, accented
|
||||||
|
# Latin, CJK, e.g. ``/Volumes/Модели`` or ``D:\AI Models\Модели``. This stays
|
||||||
|
# shell-safe: none of ``; & | ` $ '' "" () {}`` newlines etc. are in ``[\w. -]``,
|
||||||
|
# so injection vectors remain rejected. A leading ~ is expanded to $HOME at
|
||||||
|
# command-build time. (Drive letters stay ASCII: ``[A-Za-z]:``.)
|
||||||
|
_LOCAL_DIR_RE = re.compile(r"^~?(?:/[\w. -]*)+$|^~$")
|
||||||
|
_WINDOWS_LOCAL_DIR_RE = re.compile(r"^[A-Za-z]:[\\/](?:[\w. -]+(?:[\\/][\w. -]+)*[\\/]?)?$")
|
||||||
_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]")
|
_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]")
|
||||||
|
|
||||||
|
|
||||||
@@ -97,9 +104,19 @@ def _validate_token(v: str | None) -> str | None:
|
|||||||
def _validate_local_dir(v: str | None) -> str | None:
|
def _validate_local_dir(v: str | None) -> str | None:
|
||||||
if v is None or v == "":
|
if v is None or v == "":
|
||||||
return None
|
return None
|
||||||
|
if len(v) >= 2 and v[0] == v[-1] and v[0] in {"'", '"'}:
|
||||||
|
v = v[1:-1]
|
||||||
v = v.rstrip("/") or "/"
|
v = v.rstrip("/") or "/"
|
||||||
if not _LOCAL_DIR_RE.match(v):
|
if not (_LOCAL_DIR_RE.match(v) or _WINDOWS_LOCAL_DIR_RE.match(v)):
|
||||||
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
|
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no shell metacharacters")
|
||||||
|
# Reject path segments that start with '-' (option injection). '-' is in the
|
||||||
|
# allowlist, so a dir like ``/models/-rf`` or ``D:\models\-rf`` could be read
|
||||||
|
# as a CLI flag by hf/etc. — and quoting does NOT stop a value from being
|
||||||
|
# parsed as an option. This is the one residual that command-build-time
|
||||||
|
# quoting can't cover, so the guard lives here, keeping the safety wholly
|
||||||
|
# inside the validator rather than relying on consumers.
|
||||||
|
if any(seg.startswith("-") for seg in re.split(r"[\\/]", v) if seg):
|
||||||
|
raise HTTPException(400, "Invalid local_dir — path segments cannot start with '-'")
|
||||||
return v
|
return v
|
||||||
|
|
||||||
|
|
||||||
@@ -125,7 +142,7 @@ def _validate_gpus(v: str | None) -> str | None:
|
|||||||
def _shell_path(p: str) -> str:
|
def _shell_path(p: str) -> str:
|
||||||
"""Render a validated path for a double-quoted shell context, expanding a
|
"""Render a validated path for a double-quoted shell context, expanding a
|
||||||
leading ~ to $HOME (single quotes wouldn't expand it). Safe because
|
leading ~ to $HOME (single quotes wouldn't expand it). Safe because
|
||||||
_validate_local_dir already restricts the charset."""
|
_validate_local_dir already rejects quotes and shell metacharacters."""
|
||||||
if p == "~":
|
if p == "~":
|
||||||
return '"$HOME"'
|
return '"$HOME"'
|
||||||
if p.startswith("~/"):
|
if p.startswith("~/"):
|
||||||
@@ -386,6 +403,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
|
|||||||
" for root, dirs, fns in safe_walk(base):",
|
" for root, dirs, fns in safe_walk(base):",
|
||||||
" for fn in sorted(fns):",
|
" for fn in sorted(fns):",
|
||||||
" if not fn.lower().endswith('.gguf'): continue",
|
" if not fn.lower().endswith('.gguf'): continue",
|
||||||
|
" if fn.startswith('._'): continue # macOS AppleDouble sidecar, not a real GGUF",
|
||||||
" fp = os.path.join(root, fn)",
|
" fp = os.path.join(root, fn)",
|
||||||
" try: size = os.path.getsize(fp)",
|
" try: size = os.path.getsize(fp)",
|
||||||
" except Exception: size = 0",
|
" except Exception: size = 0",
|
||||||
|
|||||||
@@ -22,10 +22,12 @@ from routes.cookbook_helpers import (
|
|||||||
_user_shell_path_bootstrap,
|
_user_shell_path_bootstrap,
|
||||||
_venv_safe_local_pip_install_cmd,
|
_venv_safe_local_pip_install_cmd,
|
||||||
_validate_gpus,
|
_validate_gpus,
|
||||||
|
_validate_local_dir,
|
||||||
_validate_repo_id,
|
_validate_repo_id,
|
||||||
_validate_serve_cmd,
|
_validate_serve_cmd,
|
||||||
_validate_serve_model_id,
|
_validate_serve_model_id,
|
||||||
_validate_ssh_port,
|
_validate_ssh_port,
|
||||||
|
_shell_path,
|
||||||
run_ssh_command_async,
|
run_ssh_command_async,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -110,6 +112,89 @@ def test_validate_ssh_port_rejects_shell_payload():
|
|||||||
assert _validate_ssh_port("2222") == "2222"
|
assert _validate_ssh_port("2222") == "2222"
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_local_dir_accepts_external_drive_paths_with_spaces():
|
||||||
|
path = "/Volumes/T7 2TB/AI Models/llamacpp"
|
||||||
|
|
||||||
|
assert _validate_local_dir(path) == path
|
||||||
|
assert _validate_local_dir(f'"{path}"') == path
|
||||||
|
assert _shell_path(f"{path}/Qwen3-8B") == '"/Volumes/T7 2TB/AI Models/llamacpp/Qwen3-8B"'
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_local_dir_accepts_windows_drive_paths_with_spaces():
|
||||||
|
backslash_path = r"D:\AI Models\llamacpp"
|
||||||
|
slash_path = "D:/AI Models/llamacpp"
|
||||||
|
|
||||||
|
assert _validate_local_dir(backslash_path) == backslash_path
|
||||||
|
assert _validate_local_dir(f"'{backslash_path}'") == backslash_path
|
||||||
|
assert _validate_local_dir(slash_path) == slash_path
|
||||||
|
assert _shell_path(backslash_path + r"\Qwen3-8B") == '"D:\\AI Models\\llamacpp\\Qwen3-8B"'
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_local_dir_still_rejects_shell_metacharacters():
|
||||||
|
for path in [
|
||||||
|
"/Volumes/T7 2TB/AI Models; touch /tmp/pwned",
|
||||||
|
"/Volumes/T7 2TB/AI Models/$(touch pwned)",
|
||||||
|
"/Volumes/T7 2TB/AI Models/`touch pwned`",
|
||||||
|
"/Volumes/T7 2TB/AI Models/model\nnext",
|
||||||
|
]:
|
||||||
|
with pytest.raises(HTTPException):
|
||||||
|
_validate_local_dir(path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_local_dir_rejects_windows_shell_metacharacters():
|
||||||
|
for path in [
|
||||||
|
r"D:\AI Models\llamacpp; touch C:\pwned",
|
||||||
|
r"D:\AI Models\llamacpp\$(touch pwned)",
|
||||||
|
r"D:\AI Models\llamacpp\`touch pwned`",
|
||||||
|
"D:\\AI Models\\llamacpp\nnext",
|
||||||
|
]:
|
||||||
|
with pytest.raises(HTTPException):
|
||||||
|
_validate_local_dir(path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_local_dir_accepts_non_ascii_unicode_paths():
|
||||||
|
# Folder names are routinely non-ASCII on localized systems; the validator
|
||||||
|
# must accept them the same way it accepts spaces (see issue: spaces AND
|
||||||
|
# non-ASCII chars were both rejected by the old ASCII-only allowlist).
|
||||||
|
for path in [
|
||||||
|
"/Volumes/Модели/llamacpp", # Cyrillic (POSIX / external drive)
|
||||||
|
"/home/josé/models", # accented Latin
|
||||||
|
"/Volumes/モデル/llm", # CJK
|
||||||
|
r"D:\AI Models\Модели", # Cyrillic (Windows drive path)
|
||||||
|
]:
|
||||||
|
assert _validate_local_dir(path) == path
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_local_dir_rejects_metacharacters_in_unicode_paths():
|
||||||
|
# Widening the allowlist to Unicode must not reopen the injection surface:
|
||||||
|
# shell metacharacters stay rejected even alongside non-ASCII segments.
|
||||||
|
for path in [
|
||||||
|
"/Volumes/Модели; touch /tmp/pwned",
|
||||||
|
"/Volumes/Модели/$(touch pwned)",
|
||||||
|
"/Volumes/Модели/`touch pwned`",
|
||||||
|
"/Volumes/Модели/a|b",
|
||||||
|
"/Volumes/Модели\nnext",
|
||||||
|
r"D:\Модели\llamacpp & calc.exe",
|
||||||
|
]:
|
||||||
|
with pytest.raises(HTTPException):
|
||||||
|
_validate_local_dir(path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_local_dir_rejects_leading_dash_segments():
|
||||||
|
# A path segment starting with '-' could be parsed as a CLI option by hf/etc.
|
||||||
|
# (option injection) even when quoted, since quoting doesn't stop a value from
|
||||||
|
# being read as a flag. The validator must reject it on every platform.
|
||||||
|
for path in [
|
||||||
|
"/models/-rf",
|
||||||
|
"/models/-rf/llamacpp",
|
||||||
|
"/-oStrictHostKeyChecking=no",
|
||||||
|
r"D:\models\-rf",
|
||||||
|
"D:/models/-rf",
|
||||||
|
]:
|
||||||
|
with pytest.raises(HTTPException):
|
||||||
|
_validate_local_dir(path)
|
||||||
|
|
||||||
|
|
||||||
def test_validate_gpus_accepts_indexes_only():
|
def test_validate_gpus_accepts_indexes_only():
|
||||||
assert _validate_gpus("0,1,2") == "0,1,2"
|
assert _validate_gpus("0,1,2") == "0,1,2"
|
||||||
with pytest.raises(HTTPException):
|
with pytest.raises(HTTPException):
|
||||||
|
|||||||
Reference in New Issue
Block a user