mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 09:45:24 -04:00
Odysseus v1.0
This commit is contained in:
@@ -0,0 +1,234 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
add_hwfit_models.py — bulk-add Hugging Face models to the hwfit catalog
|
||||
(services/hwfit/data/hf_models.json).
|
||||
|
||||
Adds:
|
||||
* every model from one or more HF authors (e.g. cyankiwi's AWQ quants)
|
||||
* any explicitly-listed repos
|
||||
|
||||
Metadata is taken from the HF Hub `list_models(full=True)` response plus the
|
||||
repo name (which encodes the param size, e.g. "Qwen3.6-35B-A3B"). Param-less
|
||||
names fall back to a single per-repo model_info() call to read safetensors.
|
||||
|
||||
Re-runnable: merges by `name`, leaving existing entries untouched unless
|
||||
--overwrite is passed. Writes a .bak first.
|
||||
|
||||
Usage:
|
||||
python3 scripts/add_hwfit_models.py
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "services", "hwfit", "data", "hf_models.json")
|
||||
DATA_PATH = os.path.abspath(DATA_PATH)
|
||||
|
||||
AUTHORS = ["cyankiwi"]
|
||||
# Specific repos to add (in addition to the authors above). Optional explicit
|
||||
# overrides {repo: {field: value}} for things the name/metadata can't convey.
|
||||
EXTRA_REPOS = {
|
||||
"deepseek-ai/DeepSeek-V4-Flash": {"parameter_count": "168B", "quantization": "Q4_K_M"},
|
||||
"MiniMaxAI/MiniMax-M2.7": {"parameter_count": "228.7B", "quantization": "Q4_K_M"},
|
||||
"bullerwins/MiniMax-M2.7-REAP-172B-fp8": {"parameter_count": "172B", "quantization": "FP8"},
|
||||
"cyankiwi/MiniMax-M2.7-AWQ-4bit": {"parameter_count": "228.7B", "quantization": "AWQ-4bit"},
|
||||
}
|
||||
|
||||
# Tags that are not architecture names.
|
||||
_GENERIC_TAGS = {
|
||||
"transformers", "safetensors", "conversational", "text-generation",
|
||||
"image-text-to-text", "text-generation-inference", "endpoints_compatible",
|
||||
"autotrain_compatible", "compressed-tensors", "gguf", "mlx", "vllm", "4-bit",
|
||||
"8-bit", "awq", "gptq", "fp8", "quantized", "chat",
|
||||
}
|
||||
|
||||
api = HfApi()
|
||||
|
||||
|
||||
def _parse_params(name):
|
||||
"""Return (parameters_raw, active_parameters_or_None) from a repo name.
|
||||
Handles dense ("27B") and MoE ("235B-A22B") naming."""
|
||||
base = name.split("/")[-1]
|
||||
active = None
|
||||
m_active = re.search(r"-[Aa](\d+\.?\d*)[Bb](?![a-zA-Z])", base)
|
||||
if m_active:
|
||||
active = int(float(m_active.group(1)) * 1e9)
|
||||
base_wo = base[:m_active.start()] + base[m_active.end():]
|
||||
else:
|
||||
base_wo = base
|
||||
# First "<num>B" token that is a plausible size. Case-insensitive b, but the
|
||||
# negative lookahead means "8bit"/"4bit" are NOT treated as "8B"/"4B".
|
||||
total = None
|
||||
for m in re.finditer(r"(\d+\.?\d*)[Bb](?![a-zA-Z])", base_wo):
|
||||
total = int(float(m.group(1)) * 1e9)
|
||||
break
|
||||
return total, active
|
||||
|
||||
|
||||
def _base_model_tag(tags):
|
||||
"""Return the `base_model:...` repo id from tags, if any."""
|
||||
for t in (tags or []):
|
||||
if t.startswith("base_model:"):
|
||||
return t.split(":")[-1]
|
||||
return None
|
||||
|
||||
|
||||
def _quant_from_name(name):
|
||||
n = name.lower()
|
||||
is8 = "8bit" in n or "8-bit" in n or "int8" in n
|
||||
if "awq" in n:
|
||||
return "AWQ-8bit" if is8 else "AWQ-4bit"
|
||||
if "gptq" in n:
|
||||
return "GPTQ-Int8" if is8 else "GPTQ-Int4"
|
||||
if "mlx" in n:
|
||||
if "6bit" in n:
|
||||
return "mlx-6bit"
|
||||
return "mlx-8bit" if is8 else "mlx-4bit"
|
||||
if "fp8" in n:
|
||||
return "FP8"
|
||||
if "int4" in n or "4bit" in n or "4-bit" in n:
|
||||
return "AWQ-4bit"
|
||||
return "Q4_K_M"
|
||||
|
||||
|
||||
def _arch_from_tags(tags):
|
||||
for t in (tags or []):
|
||||
if ":" in t or t in _GENERIC_TAGS:
|
||||
continue
|
||||
if re.fullmatch(r"[a-z0-9_]+", t) and any(c.isalpha() for c in t):
|
||||
return t
|
||||
return ""
|
||||
|
||||
|
||||
def _entry_from_modelinfo(mi, overrides):
|
||||
name = mi.id
|
||||
provider = name.split("/")[0]
|
||||
total, active = _parse_params(name)
|
||||
# If the name has no size but an override supplies one, use that.
|
||||
if total is None and overrides and overrides.get("parameter_count"):
|
||||
total, _ov_active = _parse_params("x/" + overrides["parameter_count"])
|
||||
# Next, try the base_model tag (the unquantized parent often names its size).
|
||||
if total is None:
|
||||
bm = _base_model_tag(getattr(mi, "tags", None))
|
||||
if bm:
|
||||
bt, ba = _parse_params(bm)
|
||||
if bt:
|
||||
total = bt
|
||||
if ba and active is None:
|
||||
active = ba
|
||||
# Last resort: read safetensors param count (note: for quantized repos this
|
||||
# is the *packed* count, so it's only an approximation).
|
||||
if total is None:
|
||||
try:
|
||||
full = api.model_info(name, files_metadata=False)
|
||||
st = getattr(full, "safetensors", None)
|
||||
if st and getattr(st, "total", None):
|
||||
total = int(st.total)
|
||||
except Exception:
|
||||
pass
|
||||
if total is None:
|
||||
return None # can't size it — skip
|
||||
pb = total / 1e9
|
||||
quant = _quant_from_name(name)
|
||||
created = getattr(mi, "created_at", None)
|
||||
rel = created.strftime("%Y-%m-%d") if created else datetime.utcnow().strftime("%Y-%m-%d")
|
||||
# Rough RAM/VRAM hints (fit.py recomputes the real requirement from params+quant).
|
||||
_BPP = {"AWQ-4bit": 0.58, "GPTQ-Int4": 0.58, "mlx-4bit": 0.55, "mlx-6bit": 0.85,
|
||||
"AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1, "Q4_K_M": 0.6}
|
||||
bpp = _BPP.get(quant, 0.6)
|
||||
vram = round(pb * bpp + 0.5, 1)
|
||||
entry = {
|
||||
"name": name,
|
||||
"provider": provider,
|
||||
"parameter_count": f"{round(pb, 1)}B",
|
||||
"parameters_raw": total,
|
||||
"min_ram_gb": max(1.0, round(vram * 0.6, 1)),
|
||||
"recommended_ram_gb": max(2.0, round(vram * 1.2, 1)),
|
||||
"min_vram_gb": vram,
|
||||
"quantization": quant,
|
||||
"context_length": 32768,
|
||||
"use_case": "General purpose",
|
||||
"capabilities": [],
|
||||
"pipeline_tag": getattr(mi, "pipeline_tag", None) or "text-generation",
|
||||
"architecture": _arch_from_tags(getattr(mi, "tags", None)),
|
||||
"hf_downloads": getattr(mi, "downloads", 0) or 0,
|
||||
"hf_likes": getattr(mi, "likes", 0) or 0,
|
||||
"release_date": rel,
|
||||
"_discovered": True,
|
||||
}
|
||||
if active:
|
||||
entry["is_moe"] = True
|
||||
entry["active_parameters"] = active
|
||||
entry.update(overrides or {})
|
||||
# If an override set parameter_count, keep parameters_raw consistent.
|
||||
if overrides and "parameter_count" in overrides and "parameters_raw" not in overrides:
|
||||
t2, _ = _parse_params("x/" + overrides["parameter_count"])
|
||||
if t2:
|
||||
entry["parameters_raw"] = t2
|
||||
return entry
|
||||
|
||||
|
||||
def main():
|
||||
with open(DATA_PATH) as f:
|
||||
catalog = json.load(f)
|
||||
by_name = {m["name"]: m for m in catalog}
|
||||
existing = set(by_name)
|
||||
|
||||
overwrite = "--overwrite" in sys.argv
|
||||
to_add = {}
|
||||
|
||||
# Authors
|
||||
for author in AUTHORS:
|
||||
print(f"Fetching author: {author} ...", flush=True)
|
||||
models = list(api.list_models(author=author, full=True, cardData=True))
|
||||
print(f" {len(models)} repos", flush=True)
|
||||
for mi in models:
|
||||
if mi.id in existing and not overwrite:
|
||||
continue
|
||||
ov = EXTRA_REPOS.get(mi.id)
|
||||
entry = _entry_from_modelinfo(mi, ov)
|
||||
if entry:
|
||||
to_add[mi.id] = entry
|
||||
|
||||
# Explicit extra repos (not covered by an author scan)
|
||||
for repo, ov in EXTRA_REPOS.items():
|
||||
if repo in to_add:
|
||||
continue
|
||||
if repo in existing and not overwrite:
|
||||
continue
|
||||
try:
|
||||
mi = api.model_info(repo, files_metadata=False)
|
||||
except Exception as e:
|
||||
print(f" SKIP {repo}: {e}", flush=True)
|
||||
continue
|
||||
entry = _entry_from_modelinfo(mi, ov)
|
||||
if entry:
|
||||
to_add[repo] = entry
|
||||
|
||||
if not to_add:
|
||||
print("Nothing new to add.")
|
||||
return
|
||||
|
||||
# Backup + merge
|
||||
with open(DATA_PATH + ".bak", "w") as f:
|
||||
json.dump(catalog, f, indent=2)
|
||||
for name, entry in to_add.items():
|
||||
by_name[name] = entry
|
||||
merged = list(by_name.values())
|
||||
with open(DATA_PATH, "w") as f:
|
||||
json.dump(merged, f, indent=2)
|
||||
|
||||
print(f"\nAdded/updated {len(to_add)} models. Catalog now {len(merged)} (was {len(catalog)}).")
|
||||
for n in sorted(to_add)[:20]:
|
||||
e = to_add[n]
|
||||
print(f" + {n} [{e['parameter_count']}, {e['quantization']}]")
|
||||
if len(to_add) > 20:
|
||||
print(f" ... and {len(to_add) - 20} more")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user