fix(cookbook): scan persisted HF cache paths (#3189)

This commit is contained in:
ooovenenoso
2026-06-07 12:19:47 -04:00
committed by GitHub
parent d7ece5b4a9
commit 681a2a3f2a
2 changed files with 46 additions and 1 deletions
+15 -1
View File
@@ -359,6 +359,20 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
" if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True",
" for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)",
" models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
"def hf_cache_paths():",
" candidates = []",
" def add(p):",
" if not p: return",
" p = os.path.expanduser(p)",
" if p not in candidates: candidates.append(p)",
" add(os.environ.get('HUGGINGFACE_HUB_CACHE'))",
" hf_home = os.environ.get('HF_HOME')",
" if hf_home: add(os.path.join(hf_home, 'hub'))",
" add('~/.cache/huggingface/hub')",
" # Docker images mount ./data/huggingface at /app/.cache/huggingface.",
" # When HOME is /root, expanduser() misses that persisted cache.",
" add('/app/.cache/huggingface/hub')",
" return candidates",
"def scan_dir(p):",
" if not os.path.isdir(p) or not safe_path(p): return",
" for d in sorted(os.listdir(p)):",
@@ -422,7 +436,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
" seen.add(name)",
" models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
" return",
"scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))",
"for _hf_cache in hf_cache_paths(): scan_hf(_hf_cache)",
"scan_ollama()",
"scan_ollama_api()",
]
+31
View File
@@ -1,4 +1,5 @@
import json
import os
import subprocess
import sys
@@ -557,6 +558,36 @@ def test_cached_model_scan_reports_plain_dir_gguf(tmp_path):
assert ggufs[3]["quant"] == "BF16"
def test_cached_model_scan_uses_huggingface_cache_env(tmp_path):
"""Docker recreates can leave the persisted HF cache outside HOME.
The Serve scanner should honor the cache env path instead of only ~/.cache.
"""
hf_cache = tmp_path / "app-cache" / "hub"
model = hf_cache / "models--Qwen--Qwen3.6-35B"
(model / "blobs").mkdir(parents=True)
(model / "blobs" / "weights.safetensors").write_bytes(b"weights")
(model / "snapshots" / "abc").mkdir(parents=True)
(model / "snapshots" / "abc" / "config.json").write_text("{}", encoding="utf-8")
empty_home = tmp_path / "home"
empty_home.mkdir()
scan_py = tmp_path / "scan_cache_env.py"
scan_py.write_text(_cached_model_scan_script(), encoding="utf-8")
env = dict(os.environ)
env["HOME"] = str(empty_home)
env["HUGGINGFACE_HUB_CACHE"] = str(hf_cache)
proc = subprocess.run(
[sys.executable, str(scan_py)],
check=True,
capture_output=True,
text=True,
env=env,
)
by_repo = {m["repo_id"]: m for m in json.loads(proc.stdout)}
assert by_repo["Qwen/Qwen3.6-35B"]["path"] == str(hf_cache)
# ── #1219 / #1459: keep big dependency wheel builds off the home pip cache ──
def test_pip_install_no_cache_injects_flag():