Odysseus v1.0

2026-06-16 09:45:24 -04:00 · 2026-05-31 23:58:26 +09:00
commit e5c99a5eee
421 changed files with 271349 additions and 0 deletions
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+"""
+add_hwfit_models.py — bulk-add Hugging Face models to the hwfit catalog
+(services/hwfit/data/hf_models.json).
+
+Adds:
+  * every model from one or more HF authors (e.g. cyankiwi's AWQ quants)
+  * any explicitly-listed repos
+
+Metadata is taken from the HF Hub `list_models(full=True)` response plus the
+repo name (which encodes the param size, e.g. "Qwen3.6-35B-A3B"). Param-less
+names fall back to a single per-repo model_info() call to read safetensors.
+
+Re-runnable: merges by `name`, leaving existing entries untouched unless
+--overwrite is passed. Writes a .bak first.
+
+Usage:
+    python3 scripts/add_hwfit_models.py
+"""
+import json
+import os
+import re
+import sys
+from datetime import datetime
+
+from huggingface_hub import HfApi
+
+DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "services", "hwfit", "data", "hf_models.json")
+DATA_PATH = os.path.abspath(DATA_PATH)
+
+AUTHORS = ["cyankiwi"]
+# Specific repos to add (in addition to the authors above). Optional explicit
+# overrides {repo: {field: value}} for things the name/metadata can't convey.
+EXTRA_REPOS = {
+    "deepseek-ai/DeepSeek-V4-Flash":            {"parameter_count": "168B", "quantization": "Q4_K_M"},
+    "MiniMaxAI/MiniMax-M2.7":                   {"parameter_count": "228.7B", "quantization": "Q4_K_M"},
+    "bullerwins/MiniMax-M2.7-REAP-172B-fp8":    {"parameter_count": "172B", "quantization": "FP8"},
+    "cyankiwi/MiniMax-M2.7-AWQ-4bit":           {"parameter_count": "228.7B", "quantization": "AWQ-4bit"},
+}
+
+# Tags that are not architecture names.
+_GENERIC_TAGS = {
+    "transformers", "safetensors", "conversational", "text-generation",
+    "image-text-to-text", "text-generation-inference", "endpoints_compatible",
+    "autotrain_compatible", "compressed-tensors", "gguf", "mlx", "vllm", "4-bit",
+    "8-bit", "awq", "gptq", "fp8", "quantized", "chat",
+}
+
+api = HfApi()
+
+
+def _parse_params(name):
+    """Return (parameters_raw, active_parameters_or_None) from a repo name.
+    Handles dense ("27B") and MoE ("235B-A22B") naming."""
+    base = name.split("/")[-1]
+    active = None
+    m_active = re.search(r"-[Aa](\d+\.?\d*)[Bb](?![a-zA-Z])", base)
+    if m_active:
+        active = int(float(m_active.group(1)) * 1e9)
+        base_wo = base[:m_active.start()] + base[m_active.end():]
+    else:
+        base_wo = base
+    # First "<num>B" token that is a plausible size. Case-insensitive b, but the
+    # negative lookahead means "8bit"/"4bit" are NOT treated as "8B"/"4B".
+    total = None
+    for m in re.finditer(r"(\d+\.?\d*)[Bb](?![a-zA-Z])", base_wo):
+        total = int(float(m.group(1)) * 1e9)
+        break
+    return total, active
+
+
+def _base_model_tag(tags):
+    """Return the `base_model:...` repo id from tags, if any."""
+    for t in (tags or []):
+        if t.startswith("base_model:"):
+            return t.split(":")[-1]
+    return None
+
+
+def _quant_from_name(name):
+    n = name.lower()
+    is8 = "8bit" in n or "8-bit" in n or "int8" in n
+    if "awq" in n:
+        return "AWQ-8bit" if is8 else "AWQ-4bit"
+    if "gptq" in n:
+        return "GPTQ-Int8" if is8 else "GPTQ-Int4"
+    if "mlx" in n:
+        if "6bit" in n:
+            return "mlx-6bit"
+        return "mlx-8bit" if is8 else "mlx-4bit"
+    if "fp8" in n:
+        return "FP8"
+    if "int4" in n or "4bit" in n or "4-bit" in n:
+        return "AWQ-4bit"
+    return "Q4_K_M"
+
+
+def _arch_from_tags(tags):
+    for t in (tags or []):
+        if ":" in t or t in _GENERIC_TAGS:
+            continue
+        if re.fullmatch(r"[a-z0-9_]+", t) and any(c.isalpha() for c in t):
+            return t
+    return ""
+
+
+def _entry_from_modelinfo(mi, overrides):
+    name = mi.id
+    provider = name.split("/")[0]
+    total, active = _parse_params(name)
+    # If the name has no size but an override supplies one, use that.
+    if total is None and overrides and overrides.get("parameter_count"):
+        total, _ov_active = _parse_params("x/" + overrides["parameter_count"])
+    # Next, try the base_model tag (the unquantized parent often names its size).
+    if total is None:
+        bm = _base_model_tag(getattr(mi, "tags", None))
+        if bm:
+            bt, ba = _parse_params(bm)
+            if bt:
+                total = bt
+                if ba and active is None:
+                    active = ba
+    # Last resort: read safetensors param count (note: for quantized repos this
+    # is the *packed* count, so it's only an approximation).
+    if total is None:
+        try:
+            full = api.model_info(name, files_metadata=False)
+            st = getattr(full, "safetensors", None)
+            if st and getattr(st, "total", None):
+                total = int(st.total)
+        except Exception:
+            pass
+    if total is None:
+        return None  # can't size it — skip
+    pb = total / 1e9
+    quant = _quant_from_name(name)
+    created = getattr(mi, "created_at", None)
+    rel = created.strftime("%Y-%m-%d") if created else datetime.utcnow().strftime("%Y-%m-%d")
+    # Rough RAM/VRAM hints (fit.py recomputes the real requirement from params+quant).
+    _BPP = {"AWQ-4bit": 0.58, "GPTQ-Int4": 0.58, "mlx-4bit": 0.55, "mlx-6bit": 0.85,
+            "AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1, "Q4_K_M": 0.6}
+    bpp = _BPP.get(quant, 0.6)
+    vram = round(pb * bpp + 0.5, 1)
+    entry = {
+        "name": name,
+        "provider": provider,
+        "parameter_count": f"{round(pb, 1)}B",
+        "parameters_raw": total,
+        "min_ram_gb": max(1.0, round(vram * 0.6, 1)),
+        "recommended_ram_gb": max(2.0, round(vram * 1.2, 1)),
+        "min_vram_gb": vram,
+        "quantization": quant,
+        "context_length": 32768,
+        "use_case": "General purpose",
+        "capabilities": [],
+        "pipeline_tag": getattr(mi, "pipeline_tag", None) or "text-generation",
+        "architecture": _arch_from_tags(getattr(mi, "tags", None)),
+        "hf_downloads": getattr(mi, "downloads", 0) or 0,
+        "hf_likes": getattr(mi, "likes", 0) or 0,
+        "release_date": rel,
+        "_discovered": True,
+    }
+    if active:
+        entry["is_moe"] = True
+        entry["active_parameters"] = active
+    entry.update(overrides or {})
+    # If an override set parameter_count, keep parameters_raw consistent.
+    if overrides and "parameter_count" in overrides and "parameters_raw" not in overrides:
+        t2, _ = _parse_params("x/" + overrides["parameter_count"])
+        if t2:
+            entry["parameters_raw"] = t2
+    return entry
+
+
+def main():
+    with open(DATA_PATH) as f:
+        catalog = json.load(f)
+    by_name = {m["name"]: m for m in catalog}
+    existing = set(by_name)
+
+    overwrite = "--overwrite" in sys.argv
+    to_add = {}
+
+    # Authors
+    for author in AUTHORS:
+        print(f"Fetching author: {author} ...", flush=True)
+        models = list(api.list_models(author=author, full=True, cardData=True))
+        print(f"  {len(models)} repos", flush=True)
+        for mi in models:
+            if mi.id in existing and not overwrite:
+                continue
+            ov = EXTRA_REPOS.get(mi.id)
+            entry = _entry_from_modelinfo(mi, ov)
+            if entry:
+                to_add[mi.id] = entry
+
+    # Explicit extra repos (not covered by an author scan)
+    for repo, ov in EXTRA_REPOS.items():
+        if repo in to_add:
+            continue
+        if repo in existing and not overwrite:
+            continue
+        try:
+            mi = api.model_info(repo, files_metadata=False)
+        except Exception as e:
+            print(f"  SKIP {repo}: {e}", flush=True)
+            continue
+        entry = _entry_from_modelinfo(mi, ov)
+        if entry:
+            to_add[repo] = entry
+
+    if not to_add:
+        print("Nothing new to add.")
+        return
+
+    # Backup + merge
+    with open(DATA_PATH + ".bak", "w") as f:
+        json.dump(catalog, f, indent=2)
+    for name, entry in to_add.items():
+        by_name[name] = entry
+    merged = list(by_name.values())
+    with open(DATA_PATH, "w") as f:
+        json.dump(merged, f, indent=2)
+
+    print(f"\nAdded/updated {len(to_add)} models. Catalog now {len(merged)} (was {len(catalog)}).")
+    for n in sorted(to_add)[:20]:
+        e = to_add[n]
+        print(f"  + {n}  [{e['parameter_count']}, {e['quantization']}]")
+    if len(to_add) > 20:
+        print(f"  ... and {len(to_add) - 20} more")
+
+
+if __name__ == "__main__":
+    main()