mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
fix(cookbook): allow spaces and non-ASCII characters in model directory paths (#3473)
* fix(cookbook): allow spaces in model directory paths Allow POSIX external-drive paths and Windows drive paths with spaces while keeping shell metacharacters rejected. * fix(cookbook): also allow non-ASCII (Unicode) characters in model dir paths The ASCII-only allowlist that rejected spaces also rejected Cyrillic, accented Latin and CJK folder names (e.g. /Volumes/Модели, D:\AI Models\Модели) with 400 Invalid local_dir. Switch the path character class from [A-Za-z0-9._ -] to [\w. -] (\w is Unicode-aware on Python 3 str patterns) so localized folder names validate, while shell metacharacters (; & | ` $ quotes newlines) stay rejected. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(cookbook): reject local_dir path segments starting with '-' The local_dir allowlist includes '-', so a directory like /models/-rf (or D:\models\-rf) could be parsed as a CLI flag by hf/etc. (option injection) — and quoting does not stop a value from being read as an option. Guard against it inside the validator so the safety stays fully self-contained there rather than depending on consumers' quoting. --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -42,9 +42,16 @@ _SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
|
||||
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
|
||||
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
|
||||
# A download target directory. Absolute or ~-relative path; safe path glyphs
|
||||
# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
|
||||
# command. A leading ~ is expanded to $HOME at command-build time.
|
||||
_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")
|
||||
# only (no quotes or shell metacharacters). Spaces are allowed because command
|
||||
# builders pass the value through quoted shell/Python contexts. The character
|
||||
# class uses ``\w`` — Unicode word characters under Python 3's default str
|
||||
# matching — so non-ASCII folder names pass validation too: Cyrillic, accented
|
||||
# Latin, CJK, e.g. ``/Volumes/Модели`` or ``D:\AI Models\Модели``. This stays
|
||||
# shell-safe: none of ``; & | ` $ '' "" () {}`` newlines etc. are in ``[\w. -]``,
|
||||
# so injection vectors remain rejected. A leading ~ is expanded to $HOME at
|
||||
# command-build time. (Drive letters stay ASCII: ``[A-Za-z]:``.)
|
||||
_LOCAL_DIR_RE = re.compile(r"^~?(?:/[\w. -]*)+$|^~$")
|
||||
_WINDOWS_LOCAL_DIR_RE = re.compile(r"^[A-Za-z]:[\\/](?:[\w. -]+(?:[\\/][\w. -]+)*[\\/]?)?$")
|
||||
_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]")
|
||||
|
||||
|
||||
@@ -97,9 +104,19 @@ def _validate_token(v: str | None) -> str | None:
|
||||
def _validate_local_dir(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
if len(v) >= 2 and v[0] == v[-1] and v[0] in {"'", '"'}:
|
||||
v = v[1:-1]
|
||||
v = v.rstrip("/") or "/"
|
||||
if not _LOCAL_DIR_RE.match(v):
|
||||
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
|
||||
if not (_LOCAL_DIR_RE.match(v) or _WINDOWS_LOCAL_DIR_RE.match(v)):
|
||||
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no shell metacharacters")
|
||||
# Reject path segments that start with '-' (option injection). '-' is in the
|
||||
# allowlist, so a dir like ``/models/-rf`` or ``D:\models\-rf`` could be read
|
||||
# as a CLI flag by hf/etc. — and quoting does NOT stop a value from being
|
||||
# parsed as an option. This is the one residual that command-build-time
|
||||
# quoting can't cover, so the guard lives here, keeping the safety wholly
|
||||
# inside the validator rather than relying on consumers.
|
||||
if any(seg.startswith("-") for seg in re.split(r"[\\/]", v) if seg):
|
||||
raise HTTPException(400, "Invalid local_dir — path segments cannot start with '-'")
|
||||
return v
|
||||
|
||||
|
||||
@@ -125,7 +142,7 @@ def _validate_gpus(v: str | None) -> str | None:
|
||||
def _shell_path(p: str) -> str:
|
||||
"""Render a validated path for a double-quoted shell context, expanding a
|
||||
leading ~ to $HOME (single quotes wouldn't expand it). Safe because
|
||||
_validate_local_dir already restricts the charset."""
|
||||
_validate_local_dir already rejects quotes and shell metacharacters."""
|
||||
if p == "~":
|
||||
return '"$HOME"'
|
||||
if p.startswith("~/"):
|
||||
@@ -386,6 +403,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
|
||||
" for root, dirs, fns in safe_walk(base):",
|
||||
" for fn in sorted(fns):",
|
||||
" if not fn.lower().endswith('.gguf'): continue",
|
||||
" if fn.startswith('._'): continue # macOS AppleDouble sidecar, not a real GGUF",
|
||||
" fp = os.path.join(root, fn)",
|
||||
" try: size = os.path.getsize(fp)",
|
||||
" except Exception: size = 0",
|
||||
|
||||
Reference in New Issue
Block a user