mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
2cbd55b8bd
- Agent: pass the open email reader (uid/folder/account/from/subject/body
preview) on every chat submit so 'reply to this' / 'write email saying
hi' route to ui_control open_email_reply with the right UID instead of
inventing a new .md draft. Code-level enforcement (chat_routes strips
create_document + send_email when active_email is set); cross-session
active_doc_id is now trusted instead of being silently dropped.
set_active_email/clear_active_email tool-layer helpers in
tool_implementations.
- ui_control open_email_reply: optional body argument so the agent can
open-and-write in one call; envelope now forwards uid/folder/account/
body/panel through tool_output. Tool description sharpened and the
parser rejects empty bodies on reply/reply-all (forces the agent to
write rather than open an empty draft).
- Email library: search now runs against [Gmail]/All Mail when the
current folder is INBOX (archived emails surface). Whirlpool spinner
+ 'Searching…' placeholder while in flight. Each search result is
stamped with its source folder so clicks open the right email instead
of whatever shares its UID in INBOX. Search no longer re-applies the
same text pill locally (which only checks subject/from/snippet, never
body) so body-only matches don't get dropped after IMAP returns them.
Initial inbox load bumped 100→500.
- Email favorites: 'Favorite (pin to top)' / 'Unfavorite' in both the
card menu and the open-reader more menu, backed by a new
/api/email/flag/{uid}?on=true|false endpoint. Flagged emails always
bubble to the top of the grid regardless of active sort.
- AI reply in doc editor: never overwrites existing draft text or the
quoted history. AI suggestion is prepended; AI-generated 'On …
wrote:' re-quotes are stripped so the original quote isn't visually
edited.
- Cookbook serve: pre-launch GPU driver / has_gpu / install / version-
floor checks (vllm minimax_m2 needs 0.10.0+, deepseek_r1 needs 0.7.0
etc.) before the launch chain starts. Detect 'another model already
running on this host' and offer Stop & launch (with graceful then
force tmux kill helpers, port release wait). Per-vendor deep-link
buttons (vLLM recipe / SGLang cookbook) with hardware hash. Backend
picker is now a custom dropdown with accent-coloured logos for vLLM,
SGLang, llama.cpp, Ollama, Diffusers; same glyphs added next to
package names in Dependencies. Runtime-readiness note moved inside
the panel (green when ready, red when missing) with an × dismiss.
Esc collapses the expanded card; expanded card scrolls when it
overflows; Trust Remote / Auto Tool / Reasoning Parser / Enforce
Eager / Prefix Caching / Expert Parallel / Speculative / MoE Env on
one row (Reasoning Parser auto-detected per model family).
Dtype→Row 1, GPUs→Row 2 (rightmost). Removed redundant GPU 'auto'
input — command builders read from the GPU button strip. Default
cookbook open is Download tab.
- Cookbook hwfit: 'Model (latest)' / 'Model (oldest)' header sorts by
release_date; release dates can be backfilled with the new
scripts/backfill_model_release_dates.py and recipe metadata pulled
with scripts/import_from_vllm_recipes.py against the upstream
vllm-project/recipes catalog (vllm_recipe + min_vllm_version stamped
on entries).
- Calendar: Quick add hint cycles a random Odysseus-themed example per
open (wooden horse Friday, crew muster 10am daily, council on
Ithaca, …). Typing a time like '11pm' in the event title updates
the hero clock live.
- Doc editor: email-mode Reply button (sparkle icon, accent) opens the
same Fast/Full + context popover the email reader uses; Ctrl+Alt+M
toggles markdown preview.
- Memories panel: custom sort picker with per-option icons, default
'Latest', visible Enabled/Disabled toggle text matching the section
description style.
342 lines
13 KiB
Python
Executable File
342 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Import models from the upstream vllm-project/recipes catalog into our
|
|
local hf_models.json. Two modes:
|
|
|
|
--update-existing Stamp min_vllm_version + vllm_recipe=True on rows we
|
|
already carry. Cheap, no HF API calls.
|
|
--add-missing Create new catalog rows for every recipe model we
|
|
don't carry. Hits the HF API for created_at + downloads
|
|
(~1 req per missing model, paced).
|
|
|
|
Both modes write atomically (tmp + rename) so a crashed run leaves the
|
|
catalog intact. Default with no mode flags runs both, prefer to pass them
|
|
explicitly.
|
|
|
|
Usage:
|
|
python scripts/import_from_vllm_recipes.py --update-existing
|
|
python scripts/import_from_vllm_recipes.py --add-missing
|
|
python scripts/import_from_vllm_recipes.py --dry-run
|
|
python scripts/import_from_vllm_recipes.py --limit 10
|
|
|
|
Auth: set HF_TOKEN to access gated repos when --add-missing.
|
|
"""
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
try:
|
|
import httpx
|
|
import yaml
|
|
except ImportError:
|
|
print("pip install httpx PyYAML", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
try:
|
|
from huggingface_hub import HfApi
|
|
from huggingface_hub.utils import HfHubHTTPError
|
|
except ImportError:
|
|
HfApi = None
|
|
HfHubHTTPError = Exception
|
|
|
|
|
|
CATALOG_PATH = Path(__file__).resolve().parent.parent / "services" / "hwfit" / "data" / "hf_models.json"
|
|
RECIPES_TREE_URL = (
|
|
"https://api.github.com/repos/vllm-project/recipes/git/trees/main?recursive=1"
|
|
)
|
|
RECIPE_RAW_URL = (
|
|
"https://raw.githubusercontent.com/vllm-project/recipes/main/models/{repo}.yaml"
|
|
)
|
|
|
|
|
|
# Map recipe `precision` to the closest catalog `quantization` label that
|
|
# fit.py / models.py already understand.
|
|
_PRECISION_TO_QUANT = {
|
|
"fp8": "FP8",
|
|
"nvfp4": "NVFP4",
|
|
"mxfp4": "MXFP4",
|
|
"bf16": "BF16",
|
|
"fp16": "F16",
|
|
"f16": "F16",
|
|
"fp4": "FP4",
|
|
"int8": "INT8",
|
|
"int4": "INT4",
|
|
"awq-4bit": "AWQ-4bit",
|
|
"awq-8bit": "AWQ-8bit",
|
|
}
|
|
|
|
# Architecture name → use_case fallback. fit.py weights use_case for filtering;
|
|
# missing field defaults to a generic bucket.
|
|
_ARCH_USE_CASE = {
|
|
"moe": "General-purpose reasoning, long-context",
|
|
"llama": "General-purpose chat",
|
|
"qwen2": "General-purpose chat",
|
|
"qwen3": "General-purpose reasoning",
|
|
"deepseek_v3_moe": "General-purpose reasoning, long-context",
|
|
"deepseek_v4_moe": "General-purpose reasoning, long-context",
|
|
}
|
|
|
|
|
|
def _parse_param_count(s) -> int:
|
|
"""'230B' / '8.6B' / '4.2T' → integer parameter count."""
|
|
if s is None:
|
|
return 0
|
|
s = str(s).strip().replace(",", "")
|
|
m = re.match(r"^([\d.]+)\s*([KMBT]?)$", s, re.I)
|
|
if not m:
|
|
return 0
|
|
num = float(m.group(1))
|
|
unit = (m.group(2) or "").upper()
|
|
mult = {"K": 1e3, "M": 1e6, "B": 1e9, "T": 1e12, "": 1.0}[unit]
|
|
return int(num * mult)
|
|
|
|
|
|
def _capabilities_for(arch: str, hardware: dict, ctx_len: int, has_reasoning: bool) -> list[str]:
|
|
caps = []
|
|
if "moe" in (arch or "").lower():
|
|
caps.append("moe")
|
|
if has_reasoning:
|
|
caps.append("reasoning")
|
|
if ctx_len and ctx_len >= 100_000:
|
|
caps.append("long_context")
|
|
if any(hw in (hardware or {}) for hw in ("mi300x", "mi325x", "mi350x", "mi355x")):
|
|
caps.append("amd_supported")
|
|
return caps
|
|
|
|
|
|
def _fetch_manifest(client: httpx.Client) -> set[str]:
|
|
r = client.get(RECIPES_TREE_URL, headers={"Accept": "application/vnd.github+json"}, timeout=15)
|
|
r.raise_for_status()
|
|
tree = (r.json() or {}).get("tree") or []
|
|
out: set[str] = set()
|
|
for e in tree:
|
|
path = (e or {}).get("path") or ""
|
|
if path.startswith("models/") and path.endswith(".yaml"):
|
|
body = path[len("models/"):-len(".yaml")]
|
|
if "/" in body:
|
|
out.add(body)
|
|
return out
|
|
|
|
|
|
def _fetch_recipe(client: httpx.Client, repo: str) -> dict | None:
|
|
url = RECIPE_RAW_URL.format(repo=repo)
|
|
try:
|
|
r = client.get(url, timeout=10)
|
|
if r.status_code != 200:
|
|
return None
|
|
return yaml.safe_load(r.text) or {}
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _stamp_from_recipe(entry: dict, recipe: dict) -> bool:
|
|
"""Mutate entry with recipe-derived fields. Returns True if anything changed."""
|
|
model = recipe.get("model") or {}
|
|
meta = recipe.get("meta") or {}
|
|
features = recipe.get("features") or {}
|
|
|
|
changed = False
|
|
new_min = (model.get("min_vllm_version") or "").strip()
|
|
if new_min and entry.get("min_vllm_version") != new_min:
|
|
entry["min_vllm_version"] = new_min
|
|
changed = True
|
|
if not entry.get("vllm_recipe"):
|
|
entry["vllm_recipe"] = True
|
|
changed = True
|
|
# Hardware support map — useful for filtering "which models run on my AMD box".
|
|
hw = meta.get("hardware") or {}
|
|
if hw and entry.get("recipe_hardware") != hw:
|
|
entry["recipe_hardware"] = {k: str(v) for k, v in hw.items()}
|
|
changed = True
|
|
# Tool/reasoning parser hints — purely informational at catalog level;
|
|
# the live launch command builder still reads them from the recipe API.
|
|
if features.get("reasoning") and not entry.get("has_reasoning_parser"):
|
|
entry["has_reasoning_parser"] = True
|
|
changed = True
|
|
if features.get("tool_calling") and not entry.get("has_tool_call_parser"):
|
|
entry["has_tool_call_parser"] = True
|
|
changed = True
|
|
return changed
|
|
|
|
|
|
def _build_new_entry(repo: str, recipe: dict, hf_info=None) -> dict | None:
|
|
"""Build a fresh catalog entry from a recipe + (optional) HF model info."""
|
|
model = recipe.get("model") or {}
|
|
meta = recipe.get("meta") or {}
|
|
features = recipe.get("features") or {}
|
|
variants = recipe.get("variants") or {}
|
|
|
|
org, name = repo.split("/", 1)
|
|
raw_params = _parse_param_count(model.get("parameter_count"))
|
|
active_raw = _parse_param_count(model.get("active_parameters"))
|
|
ctx = model.get("context_length") or 0
|
|
|
|
# Pick the smallest-VRAM variant as the catalog quant — that's what most
|
|
# users land on first. NVFP4/MXFP4 typically win this on Blackwell;
|
|
# FP8 elsewhere; BF16 baseline only.
|
|
pick_quant = None
|
|
pick_vram = None
|
|
for vk, vv in variants.items():
|
|
if not isinstance(vv, dict):
|
|
continue
|
|
prec = (vv.get("precision") or "").lower()
|
|
vram = vv.get("vram_minimum_gb") or 0
|
|
quant = _PRECISION_TO_QUANT.get(prec)
|
|
if quant and (pick_vram is None or (vram and vram < pick_vram)):
|
|
pick_quant = quant
|
|
pick_vram = vram or pick_vram
|
|
if not pick_quant:
|
|
pick_quant = "BF16"
|
|
|
|
arch = (model.get("architecture") or "").lower()
|
|
use_case = _ARCH_USE_CASE.get(arch, "General-purpose chat")
|
|
caps = _capabilities_for(arch, meta.get("hardware") or {}, ctx, bool(features.get("reasoning")))
|
|
|
|
rel_date = ""
|
|
downloads = 0
|
|
likes = 0
|
|
if hf_info is not None:
|
|
created = getattr(hf_info, "created_at", None)
|
|
if created:
|
|
rel_date = created.strftime("%Y-%m-%d")
|
|
downloads = int(getattr(hf_info, "downloads", 0) or 0)
|
|
likes = int(getattr(hf_info, "likes", 0) or 0)
|
|
if not rel_date:
|
|
rel_date = str(meta.get("date_updated") or datetime.utcnow().strftime("%Y-%m-%d"))
|
|
|
|
entry: dict = {
|
|
"name": repo,
|
|
"provider": org,
|
|
"parameter_count": str(model.get("parameter_count") or "?"),
|
|
"parameters_raw": raw_params,
|
|
"is_moe": "moe" in arch,
|
|
"quantization": pick_quant,
|
|
"context_length": int(ctx or 0),
|
|
"use_case": use_case,
|
|
"capabilities": caps,
|
|
"pipeline_tag": "text-generation",
|
|
"architecture": arch or "unknown",
|
|
"hf_downloads": downloads,
|
|
"hf_likes": likes,
|
|
"release_date": rel_date,
|
|
# Recipe-derived bits.
|
|
"vllm_recipe": True,
|
|
"min_vllm_version": (model.get("min_vllm_version") or "").strip() or None,
|
|
"recipe_hardware": {k: str(v) for k, v in (meta.get("hardware") or {}).items()},
|
|
"has_reasoning_parser": bool(features.get("reasoning")),
|
|
"has_tool_call_parser": bool(features.get("tool_calling")),
|
|
}
|
|
if active_raw:
|
|
entry["active_parameters"] = active_raw
|
|
if pick_vram:
|
|
# min_vram_gb is what hwfit uses for "does this fit". Recipe states a
|
|
# minimum for the chosen variant; round up slightly for KV-cache room.
|
|
entry["min_vram_gb"] = float(pick_vram)
|
|
entry["min_ram_gb"] = float(round(pick_vram * 0.6, 1))
|
|
entry["recommended_ram_gb"] = float(round(pick_vram * 1.2, 1))
|
|
# Drop empty / None fields to keep the JSON tidy.
|
|
return {k: v for k, v in entry.items() if v not in (None, "", [], {})}
|
|
|
|
|
|
def main():
|
|
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
p.add_argument("--update-existing", action="store_true", help="Stamp min_vllm_version + vllm_recipe on existing rows.")
|
|
p.add_argument("--add-missing", action="store_true", help="Add new rows for recipe models not in the catalog.")
|
|
p.add_argument("--limit", type=int, default=0, help="Stop after N recipe fetches.")
|
|
p.add_argument("--dry-run", action="store_true", help="Don't write back; just report.")
|
|
p.add_argument("--sleep", type=float, default=0.05, help="Seconds between HTTP requests.")
|
|
args = p.parse_args()
|
|
if not args.update_existing and not args.add_missing:
|
|
args.update_existing = args.add_missing = True
|
|
|
|
with CATALOG_PATH.open(encoding="utf-8") as f:
|
|
catalog = json.load(f)
|
|
by_name = {m.get("name"): m for m in catalog if m.get("name")}
|
|
|
|
client = httpx.Client(follow_redirects=True)
|
|
print(f"Catalog: {CATALOG_PATH} ({len(catalog)} entries)")
|
|
print("Fetching upstream manifest…")
|
|
try:
|
|
manifest = _fetch_manifest(client)
|
|
except Exception as e:
|
|
print(f"FATAL: manifest fetch failed: {e}", file=sys.stderr)
|
|
sys.exit(2)
|
|
print(f"Manifest: {len(manifest)} recipes")
|
|
|
|
existing = sorted(by_name.keys() & manifest)
|
|
missing = sorted(manifest - by_name.keys())
|
|
print(f"Match catalog ↔ manifest: existing={len(existing)} missing={len(missing)}")
|
|
|
|
targets: list[tuple[str, str]] = [] # (repo, action)
|
|
if args.update_existing:
|
|
targets.extend((r, "update") for r in existing)
|
|
if args.add_missing:
|
|
targets.extend((r, "add") for r in missing)
|
|
if args.limit:
|
|
targets = targets[: args.limit]
|
|
print(f"Targets: {len(targets)}")
|
|
|
|
hf_api = HfApi(token=os.environ.get("HF_TOKEN") or None) if HfApi else None
|
|
updated = added = skipped = 0
|
|
started = time.time()
|
|
|
|
for n, (repo, action) in enumerate(targets, 1):
|
|
recipe = _fetch_recipe(client, repo)
|
|
if not recipe:
|
|
print(f"[{n}/{len(targets)}] {repo:55} skip (no recipe fetched)")
|
|
skipped += 1
|
|
time.sleep(args.sleep)
|
|
continue
|
|
if action == "update":
|
|
entry = by_name[repo]
|
|
if _stamp_from_recipe(entry, recipe):
|
|
updated += 1
|
|
print(f"[{n}/{len(targets)}] {repo:55} updated")
|
|
else:
|
|
print(f"[{n}/{len(targets)}] {repo:55} unchanged")
|
|
else: # add
|
|
hf_info = None
|
|
if hf_api:
|
|
try:
|
|
hf_info = hf_api.model_info(repo, files_metadata=False)
|
|
except HfHubHTTPError as e:
|
|
code = getattr(getattr(e, "response", None), "status_code", "?")
|
|
print(f" HF {code} for {repo} — building from recipe only", file=sys.stderr)
|
|
except Exception as e:
|
|
print(f" HF error for {repo}: {e}", file=sys.stderr)
|
|
new_entry = _build_new_entry(repo, recipe, hf_info)
|
|
if new_entry:
|
|
catalog.append(new_entry)
|
|
by_name[repo] = new_entry
|
|
added += 1
|
|
print(f"[{n}/{len(targets)}] {repo:55} added ({new_entry.get('parameter_count','?')}, {new_entry.get('quantization','?')})")
|
|
else:
|
|
skipped += 1
|
|
print(f"[{n}/{len(targets)}] {repo:55} skip (couldn't build entry)")
|
|
time.sleep(args.sleep)
|
|
|
|
elapsed = time.time() - started
|
|
print()
|
|
print(f"Done in {elapsed:.1f}s — added={added}, updated={updated}, skipped={skipped}")
|
|
|
|
if args.dry_run:
|
|
print("Dry run — no write.")
|
|
return
|
|
if added or updated:
|
|
tmp = CATALOG_PATH.with_suffix(".json.tmp")
|
|
with tmp.open("w", encoding="utf-8") as f:
|
|
json.dump(catalog, f, indent=1, ensure_ascii=False)
|
|
f.write("\n")
|
|
tmp.replace(CATALOG_PATH)
|
|
print(f"Wrote {CATALOG_PATH} ({len(catalog)} entries)")
|
|
else:
|
|
print("No changes — catalog untouched.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|