Allow serving cached local llama.cpp models

Co-authored-by: Kevin <120500656+oooindefatigable@users.noreply.github.com>
2026-06-17 10:15:27 -04:00 · 2026-06-01 10:10:08 -04:00
parent 9b1acf6612
commit 5e47e69e99
3 changed files with 35 additions and 5 deletions
@@ -16,6 +16,11 @@ logger = logging.getLogger(__name__)
 # HuggingFace repo IDs are <org>/<name>, both alphanumerics plus ._-
 # Rejecting anything else up front closes off shell-interpolation vectors.
 _REPO_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]*$")
+# Cached models scanned from a custom/local model dir are keyed by their leaf
+# folder name (no slash), e.g. `DeepSeek-R1-UD-IQ4_XS`. The serve command uses
+# the real on-disk path separately; this identifier is only for UI/task
+# bookkeeping, so serving should accept the same safe glyph set as repo IDs.
+_LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
 # Include pattern is a glob: allow typical safe glyphs only.
 _INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
 # Remote host: user@host (optionally with :port-free hostname parts).
@@ -40,6 +45,14 @@ def _validate_repo_id(v: str | None) -> str:
    return v


+def _validate_serve_model_id(v: str | None) -> str:
+    if not v:
+        raise HTTPException(400, "repo_id is required")
+    if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v):
+        return v
+    raise HTTPException(400, "Invalid repo_id — must be <org>/<name> or a cached local model id using [A-Za-z0-9._-]")
+
+
 def _validate_include(v: str | None) -> str | None:
    if v is None or v == "":
        return None