mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 09:45:24 -04:00
811 lines
31 KiB
Python
811 lines
31 KiB
Python
import os
|
|
import platform
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
import shlex
|
|
|
|
from core.platform_compat import (
|
|
NVIDIA_PATH_CANDIDATES,
|
|
SSH_PATH_OVERRIDE,
|
|
run_ssh_command,
|
|
)
|
|
|
|
CACHE_TTL = 24 * 3600 # 24 h — hardware probes are user-initiated via the Rescan button; bumped
|
|
# from 30 min so changing filters doesn't keep re-probing the rig every
|
|
# half-hour during a long session.
|
|
|
|
|
|
_remote_host = None # set by detect_system(host=...)
|
|
_remote_port = None # set by detect_system(ssh_port=...)
|
|
_remote_platform = None # set by detect_system(platform=...): "windows", "linux", "termux"
|
|
_last_gpu_error = None # set by _detect_nvidia() when nvidia-smi errors (driver mismatch, etc.)
|
|
|
|
|
|
def _run(cmd):
|
|
try:
|
|
if _remote_host:
|
|
# Run command on remote host via SSH
|
|
if isinstance(cmd, list):
|
|
cmd_str = shlex.join(str(c) for c in cmd)
|
|
else:
|
|
cmd_str = cmd
|
|
r = run_ssh_command(
|
|
_remote_host,
|
|
_remote_port,
|
|
cmd_str,
|
|
timeout=15,
|
|
connect_timeout=5,
|
|
strict_host_key_checking=False,
|
|
text=True,
|
|
)
|
|
else:
|
|
r = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
|
if r.returncode == 0:
|
|
return r.stdout.strip()
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def _group_gpus(gpus):
|
|
"""Group identical GPUs by (name, rounded VRAM).
|
|
|
|
vLLM tensor-parallel only works across IDENTICAL GPUs, so a mixed box must
|
|
be split into homogeneous pools. Each group carries the device indices so a
|
|
serve command can pin CUDA_VISIBLE_DEVICES to exactly one pool. Biggest pool
|
|
(by total VRAM) first — that's the sensible auto-default serving target.
|
|
"""
|
|
groups = {}
|
|
order = []
|
|
for g in gpus:
|
|
key = (g["name"], round(g["vram_gb"]))
|
|
if key not in groups:
|
|
groups[key] = {
|
|
"name": g["name"],
|
|
"vram_each": round(g["vram_gb"], 1),
|
|
"count": 0,
|
|
"indices": [],
|
|
}
|
|
order.append(key)
|
|
groups[key]["count"] += 1
|
|
groups[key]["indices"].append(g.get("index"))
|
|
out = []
|
|
for key in order:
|
|
grp = groups[key]
|
|
grp["vram_total"] = round(grp["vram_each"] * grp["count"], 1)
|
|
out.append(grp)
|
|
out.sort(key=lambda x: x["vram_total"], reverse=True)
|
|
return out
|
|
|
|
|
|
def _detect_nvidia():
|
|
global _last_gpu_error
|
|
_last_gpu_error = None
|
|
out = _run(["nvidia-smi", "--query-gpu=memory.total,name", "--format=csv,noheader,nounits"])
|
|
# Fallback: a non-interactive shell (or WSL) often has a minimal PATH
|
|
# that omits where nvidia-smi lives (/usr/bin, /usr/local/cuda/bin,
|
|
# /usr/lib/wsl/lib), so the first call silently returns nothing →
|
|
# "No GPU" on machines that DO have GPUs.
|
|
# Retry through a login shell with the common CUDA bin dirs on PATH.
|
|
if not out and _remote_host:
|
|
out = _run(
|
|
f"bash -lc '{SSH_PATH_OVERRIDE}"
|
|
"nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits'"
|
|
)
|
|
# Last resort: call nvidia-smi by absolute path. Some hosts have a login
|
|
# shell that isn't bash (or a profile that errors), so the bash -lc retry
|
|
# above still comes back empty even though the binary is right there.
|
|
# Also handles WSL where nvidia-smi lives at /usr/lib/wsl/lib/ — a path
|
|
# that may not be in the server process's PATH.
|
|
if not out:
|
|
for _p in NVIDIA_PATH_CANDIDATES:
|
|
# Use list form so subprocess.run (local) resolves the absolute path
|
|
# correctly instead of treating the whole string as an executable name.
|
|
if _remote_host:
|
|
out = _run(f"{_p} --query-gpu=memory.total,name --format=csv,noheader,nounits")
|
|
else:
|
|
out = _run([_p, "--query-gpu=memory.total,name", "--format=csv,noheader,nounits"])
|
|
if out:
|
|
break
|
|
if not out:
|
|
return None
|
|
|
|
# nvidia-smi present but unable to talk to the driver (e.g. it was updated
|
|
# without a reboot). It prints an error and no GPU rows — surface that as a
|
|
# driver error rather than the misleading "No GPU".
|
|
_low = out.lower()
|
|
if ("nvml" in _low or "driver/library version mismatch" in _low
|
|
or "couldn't communicate" in _low or "no devices were found" in _low
|
|
or "failed to initialize" in _low):
|
|
_last_gpu_error = out.strip().split("\n")[0][:140] or "NVIDIA driver error"
|
|
return None
|
|
|
|
gpus = []
|
|
# Devices nvidia-smi lists with a real name but a non-numeric memory.total.
|
|
unified = []
|
|
# nvidia-smi lists GPUs in index order (0,1,2,...), so the row position is
|
|
# the CUDA device index we'd pass to CUDA_VISIBLE_DEVICES.
|
|
for idx, line in enumerate(out.strip().split("\n")):
|
|
parts = [p.strip() for p in line.split(",")]
|
|
if len(parts) >= 2:
|
|
try:
|
|
vram_mb = float(parts[0])
|
|
gpus.append({"index": idx, "name": parts[1], "vram_gb": vram_mb / 1024.0})
|
|
except ValueError:
|
|
# Grace Blackwell GB10 / DGX Spark and other unified-memory
|
|
# NVIDIA parts report memory.total as "[N/A]"/"Not Supported"
|
|
# because the GPU shares the system LPDDR pool instead of
|
|
# carrying discrete VRAM. Don't drop the device — remember it so
|
|
# we report a unified-memory GPU below rather than "No GPU" (#1340).
|
|
if parts[1]:
|
|
unified.append({"index": idx, "name": parts[1]})
|
|
continue
|
|
|
|
if not gpus:
|
|
if unified:
|
|
# Unified-memory CUDA box: report the GPU backed by system RAM so the
|
|
# Cookbook recommends models and serving works. The pool is shared
|
|
# (not per-GPU discrete VRAM), so report the RAM total once.
|
|
ram_gb = round(_get_ram_gb(), 1)
|
|
gpus = [{"index": g["index"], "name": g["name"], "vram_gb": ram_gb} for g in unified]
|
|
return {
|
|
"gpu_name": gpus[0]["name"],
|
|
"gpu_vram_gb": ram_gb,
|
|
"gpu_count": len(gpus),
|
|
"gpus": gpus,
|
|
"gpu_groups": _group_gpus(gpus),
|
|
"homogeneous": True,
|
|
"backend": "cuda",
|
|
"unified_memory": True,
|
|
}
|
|
return None
|
|
total_vram = sum(g["vram_gb"] for g in gpus)
|
|
groups = _group_gpus(gpus)
|
|
return {
|
|
"gpu_name": gpus[0]["name"],
|
|
"gpu_vram_gb": round(total_vram, 1),
|
|
"gpu_count": len(gpus),
|
|
"gpus": gpus,
|
|
"gpu_groups": groups,
|
|
"homogeneous": len(groups) <= 1,
|
|
"backend": "cuda",
|
|
}
|
|
|
|
|
|
def classify_amd_gfx(gfx):
|
|
"""Map an AMD ISA target (e.g. "gfx1200") to (gfx, family).
|
|
|
|
family is one of:
|
|
"rdna" — consumer Radeon RX (gfx10xx RDNA1/2, gfx11xx RDNA3, gfx12xx RDNA4)
|
|
"cdna" — datacenter Instinct (gfx908 MI100, gfx90a MI200, gfx94x/95x MI300+)
|
|
"gcn" — older GCN/Vega (gfx900/906)
|
|
"unknown" — empty/unrecognized; callers must treat conservatively
|
|
|
|
This drives the serving decision: vLLM/SGLang on ROCm are validated on CDNA
|
|
but fragile on consumer RDNA (AWQ kernels largely unsupported, FP8 needs
|
|
out-of-tree patches), so RDNA is steered to GGUF/llama.cpp.
|
|
"""
|
|
gfx = (gfx or "").lower().strip()
|
|
m = re.fullmatch(r"gfx(\d+[a-f]?)", gfx)
|
|
if not m:
|
|
return "", "unknown"
|
|
digits = m.group(1)
|
|
if digits[:2] in ("10", "11", "12"):
|
|
return gfx, "rdna"
|
|
if digits in ("908", "90a") or digits[:2] in ("94", "95"):
|
|
return gfx, "cdna"
|
|
if digits[:1] == "9":
|
|
return gfx, "gcn"
|
|
return gfx, "unknown"
|
|
|
|
|
|
def _detect_amd():
|
|
"""Detect AMD GPUs. Handles both discrete cards (with mem_info_vram_total)
|
|
and APUs / unified-memory SoCs like Strix Halo (which expose
|
|
mem_info_vis_vram_total instead, or only mem_info_gtt_total)."""
|
|
def _read(path):
|
|
if _remote_host:
|
|
val = _run(["cat", path])
|
|
return val.strip() if val else None
|
|
try:
|
|
with open(path, encoding="utf-8", errors="replace") as f:
|
|
return f.read().strip()
|
|
except Exception:
|
|
return None
|
|
|
|
def _list_drm_cards():
|
|
if _remote_host:
|
|
out = _run(["ls", "/sys/class/drm"])
|
|
if not out:
|
|
return []
|
|
return [e for e in out.split() if e.startswith("card") and "-" not in e]
|
|
try:
|
|
return [e for e in os.listdir("/sys/class/drm") if e.startswith("card") and "-" not in e]
|
|
except Exception:
|
|
return []
|
|
|
|
def _amd_arch():
|
|
"""Best-effort AMD GPU ISA + family from rocminfo.
|
|
|
|
rocminfo is the source of truth; its GPU agents report a `Name: gfxNNNN`
|
|
line (CPU agents report a brand string, not a gfx target), so the first
|
|
gfx match is the GPU ISA. Returns (gfx, family) — see classify_amd_gfx.
|
|
"""
|
|
info = _run(["rocminfo"]) or _run(["/opt/rocm/bin/rocminfo"]) or ""
|
|
m = re.search(r"gfx\d+[a-f]?", info)
|
|
return classify_amd_gfx(m.group(0) if m else "")
|
|
|
|
try:
|
|
cards = []
|
|
is_apu = False
|
|
for _cidx, entry in enumerate(_list_drm_cards()):
|
|
base = f"/sys/class/drm/{entry}/device"
|
|
vendor = _read(f"{base}/vendor")
|
|
if vendor != "0x1002":
|
|
continue
|
|
# Discrete cards usually report real VRAM in mem_info_vram_total,
|
|
# while some AMD APUs / Docker views expose a tiny vram_total and
|
|
# the usable pool in vis_vram_total. Use the larger of those two;
|
|
# only fall back to GTT if neither VRAM field is available.
|
|
vram_raw = _read(f"{base}/mem_info_vram_total")
|
|
vis_raw = _read(f"{base}/mem_info_vis_vram_total")
|
|
gtt_raw = _read(f"{base}/mem_info_gtt_total")
|
|
vram_val = int(vram_raw) if vram_raw and vram_raw.isdigit() else 0
|
|
vis_val = int(vis_raw) if vis_raw and vis_raw.isdigit() else 0
|
|
gtt_val = int(gtt_raw) if gtt_raw and gtt_raw.isdigit() else 0
|
|
vram_bytes = max(vram_val, vis_val)
|
|
if vram_bytes <= 0:
|
|
vram_bytes = gtt_val
|
|
if vis_val and vis_val >= vram_val:
|
|
is_apu = True
|
|
if vram_bytes <= 0:
|
|
continue
|
|
name = _read(f"{base}/product_name") or f"AMD GPU ({entry})"
|
|
cards.append({"index": _cidx, "name": name, "vram_gb": vram_bytes / (1024**3)})
|
|
|
|
if not cards:
|
|
return None
|
|
total_vram = sum(c["vram_gb"] for c in cards)
|
|
groups = _group_gpus(cards)
|
|
gfx, family = _amd_arch()
|
|
# NOTE: for APUs with BIOS UMA carveout (e.g. Strix Halo), vis_vram_total
|
|
# is the real usable GPU memory — it's physically backed but reserved
|
|
# by BIOS so it doesn't appear in /proc/meminfo. Don't cap it at system
|
|
# RAM: the two pools are separate from the OS's perspective.
|
|
return {
|
|
"gpu_name": cards[0]["name"],
|
|
"gpu_vram_gb": round(total_vram, 1),
|
|
"gpu_count": len(cards),
|
|
"gpus": cards,
|
|
"gpu_groups": groups,
|
|
"homogeneous": len(groups) <= 1,
|
|
"backend": "rocm",
|
|
"unified_memory": is_apu,
|
|
# AMD ISA/family so downstream can tell datacenter Instinct (CDNA,
|
|
# where vLLM/SGLang run AWQ/GPTQ reliably) from consumer Radeon
|
|
# (RDNA, where the practical path is GGUF via llama.cpp). Empty/
|
|
# "unknown" when rocminfo isn't available — callers must treat
|
|
# unknown conservatively, not assume vLLM works.
|
|
"gpu_arch": gfx,
|
|
"gpu_family": family,
|
|
}
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _detect_apple_silicon():
|
|
"""Detect Apple Silicon (M-series) GPUs.
|
|
|
|
Macs have no discrete VRAM — the GPU shares the system's unified memory.
|
|
We report a fraction of total RAM as the usable GPU budget (matching macOS's
|
|
default Metal working-set limit) so the Cookbook recommends models that
|
|
actually run on the GPU instead of classifying the machine as CPU-only.
|
|
|
|
backend="metal" is what services.hwfit.fit and the serve-command generation
|
|
key off of (they already understand MLX / llama.cpp-Metal). Works locally
|
|
(platform.system()=="Darwin") and over SSH (uname -s == Darwin).
|
|
"""
|
|
# Gate to macOS — locally via platform, remotely via uname.
|
|
if _remote_host:
|
|
if "darwin" not in (_run(["uname", "-s"]) or "").lower():
|
|
return None
|
|
arch = (_run(["uname", "-m"]) or "").lower()
|
|
else:
|
|
if platform.system() != "Darwin":
|
|
return None
|
|
arch = platform.machine().lower()
|
|
|
|
# Only Apple Silicon (arm64) has a Metal GPU worth serving LLMs on; Intel
|
|
# Macs fall through to the CPU path.
|
|
if "arm" not in arch and "aarch64" not in arch:
|
|
return None
|
|
|
|
# Chip name, e.g. "Apple M4 Max" — carries the Pro/Max/Ultra variant that
|
|
# the fit bandwidth table keys off of.
|
|
brand = (_run(["sysctl", "-n", "machdep.cpu.brand_string"]) or "Apple Silicon").strip()
|
|
|
|
# Total unified memory in bytes.
|
|
memsize = _run(["sysctl", "-n", "hw.memsize"])
|
|
try:
|
|
total_gb = int(memsize) / (1024**3) if memsize else 0.0
|
|
except ValueError:
|
|
total_gb = 0.0
|
|
if total_gb <= 0:
|
|
return None
|
|
|
|
# Usable GPU budget. macOS lets Metal use most of unified memory, but the
|
|
# default working-set limit scales with RAM: small machines have to keep
|
|
# more back for the OS + app. These fractions track Apple's
|
|
# recommendedMaxWorkingSetSize defaults across the lineup. Honour an
|
|
# explicit override if the user raised it with
|
|
# `sudo sysctl iogpu.wired_limit_mb=…`.
|
|
if total_gb <= 16:
|
|
frac = 0.67
|
|
elif total_gb <= 64:
|
|
frac = 0.75
|
|
else:
|
|
frac = 0.80
|
|
vram_gb = round(total_gb * frac, 1)
|
|
wired = _run(["sysctl", "-n", "iogpu.wired_limit_mb"])
|
|
try:
|
|
wired_mb = int(wired) if wired else 0
|
|
if wired_mb > 0:
|
|
vram_gb = round(wired_mb / 1024.0, 1)
|
|
except ValueError:
|
|
pass
|
|
|
|
gpu = {"index": 0, "name": brand, "vram_gb": vram_gb}
|
|
return {
|
|
"gpu_name": brand,
|
|
"gpu_vram_gb": vram_gb,
|
|
"gpu_count": 1,
|
|
"gpus": [gpu],
|
|
"gpu_groups": _group_gpus([gpu]),
|
|
"homogeneous": True,
|
|
"backend": "metal",
|
|
# Unified memory: the "VRAM" above is carved out of system RAM, not a
|
|
# separate pool — downstream fit logic uses this to avoid double-budgeting.
|
|
"unified_memory": True,
|
|
}
|
|
|
|
|
|
def _read_file(path):
|
|
"""Read a file, locally or via SSH."""
|
|
if _remote_host:
|
|
return _run(["cat", path])
|
|
try:
|
|
with open(path, encoding="utf-8", errors="replace") as f:
|
|
return f.read()
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _parse_meminfo():
|
|
"""Parse /proc/meminfo into a dict of key -> KB values."""
|
|
text = _read_file("/proc/meminfo")
|
|
if not text:
|
|
return {}
|
|
result = {}
|
|
for line in text.split("\n"):
|
|
if ":" in line:
|
|
key, val = line.split(":", 1)
|
|
parts = val.strip().split()
|
|
if parts:
|
|
try:
|
|
result[key.strip()] = int(parts[0])
|
|
except ValueError:
|
|
pass
|
|
return result
|
|
|
|
|
|
def _get_ram_gb():
|
|
meminfo = _parse_meminfo()
|
|
if "MemTotal" in meminfo:
|
|
return meminfo["MemTotal"] / (1024**2)
|
|
|
|
# os.sysconf only exists on Unix; on Windows it's absent (AttributeError)
|
|
# and these constants aren't defined — guard so this never raises there.
|
|
if not _remote_host and hasattr(os, "sysconf") and "SC_PHYS_PAGES" in getattr(os, "sysconf_names", {}):
|
|
try:
|
|
pages = os.sysconf("SC_PHYS_PAGES")
|
|
page_size = os.sysconf("SC_PAGE_SIZE")
|
|
if pages and page_size:
|
|
return (pages * page_size) / (1024**3)
|
|
except Exception:
|
|
pass
|
|
|
|
# macOS has no /proc/meminfo — fall back to sysctl (works locally and over
|
|
# SSH to a remote Mac, where the sysconf path above isn't taken).
|
|
memsize = _run(["sysctl", "-n", "hw.memsize"])
|
|
if memsize:
|
|
try:
|
|
return int(memsize.strip()) / (1024**3)
|
|
except ValueError:
|
|
pass
|
|
return 0.0
|
|
|
|
|
|
def _get_available_ram_gb():
|
|
meminfo = _parse_meminfo()
|
|
if "MemAvailable" in meminfo:
|
|
return meminfo["MemAvailable"] / (1024**2)
|
|
return _get_ram_gb() * 0.7
|
|
|
|
|
|
def _get_cpu_name():
|
|
text = _read_file("/proc/cpuinfo")
|
|
if text:
|
|
for line in text.split("\n"):
|
|
if line.startswith("model name"):
|
|
return line.split(":", 1)[1].strip()
|
|
|
|
# macOS has no /proc/cpuinfo — sysctl gives the chip name (e.g. "Apple M4").
|
|
# Harmlessly returns nothing on Linux, so it's safe to try unconditionally.
|
|
brand = _run(["sysctl", "-n", "machdep.cpu.brand_string"])
|
|
if brand and brand.strip():
|
|
return brand.strip()
|
|
|
|
if not _remote_host:
|
|
return platform.processor() or "unknown"
|
|
return "unknown"
|
|
|
|
|
|
def _get_cpu_count():
|
|
if _remote_host:
|
|
# nproc on Linux; hw.ncpu via sysctl on a remote Mac (no nproc there).
|
|
out = _run(["nproc"]) or _run(["sysctl", "-n", "hw.ncpu"])
|
|
if out:
|
|
try:
|
|
return int(out.strip())
|
|
except ValueError:
|
|
pass
|
|
# fallback: count "processor" lines in /proc/cpuinfo
|
|
text = _read_file("/proc/cpuinfo")
|
|
if text:
|
|
return sum(1 for line in text.split("\n") if line.startswith("processor"))
|
|
return os.cpu_count() or 1
|
|
|
|
|
|
def _powershell_exe():
|
|
"""Pick the best PowerShell executable for LOCAL execution: prefer pwsh
|
|
(PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute
|
|
path so we don't depend on a particular PATH ordering."""
|
|
return shutil.which("pwsh") or shutil.which("powershell") or "powershell"
|
|
|
|
|
|
def _detect_windows():
|
|
"""Detect Windows hardware via PowerShell/WMI.
|
|
|
|
Works for BOTH local (host="") and remote (SSH) detection:
|
|
* remote -> `_run` ships the string to the host over SSH.
|
|
* local -> `_run` executes a list argv directly (no shell quoting hell).
|
|
"""
|
|
# Single PowerShell command that gathers all hardware info at once
|
|
ps_cmd = (
|
|
"""
|
|
$r = @{}
|
|
$os = Get-CimInstance Win32_OperatingSystem
|
|
$r.ram_gb = [math]::Round($os.TotalVisibleMemorySize / 1048576, 1)
|
|
$r.avail_gb = [math]::Round($os.FreePhysicalMemory / 1048576, 1)
|
|
$cpu = Get-CimInstance Win32_Processor | Select-Object -First 1
|
|
$r.cpu_name = $cpu.Name
|
|
$r.cpu_cores = (Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum
|
|
$r.arch = $cpu.AddressWidth
|
|
# GPU detection via nvidia-smi (fastest) or WMI fallback
|
|
try {
|
|
$nv = nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>$null
|
|
if ($LASTEXITCODE -eq 0 -and $nv) {
|
|
$gpus = @()
|
|
foreach ($line in $nv -split "`n") {
|
|
$p = $line -split ','
|
|
if ($p.Count -ge 2) { $gpus += [pscustomobject]@{name = $p[1].Trim(); vram_mb = [double]$p[0].Trim() } }
|
|
}
|
|
$r.gpu_name = $gpus[0].name
|
|
$r.gpu_vram_gb = [math]::Round(($gpus | Measure-Object -Property vram_mb -Sum).Sum / 1024, 1)
|
|
$r.gpu_count = $gpus.Count
|
|
$r.gpu_backend = 'cuda'
|
|
}
|
|
}
|
|
catch {}
|
|
if (-not $r.gpu_name) {
|
|
$wmiGpu = Get-CimInstance Win32_VideoController | Where-Object { $_.AdapterRAM -gt 0 } | Select-Object -First 1
|
|
$GPUDriverKey = "HKLM:\\SYSTEM\\CurrentControlSet\\Control\\Class\\{4d36e968-e325-11ce-bfc1-08002be10318}\\0*"
|
|
$GPUDeviceID = $wmiGpu.PNPDeviceID.Split('&')[0..1] -join '&'
|
|
$VRAMfromRegistry = Get-ItemProperty -Path $GPUDriverKey |
|
|
Where-Object { $_.MatchingDeviceId -like "${GPUDeviceID}*" } |
|
|
# Sometimes there happen to be multiple driver classes for the same gpu.
|
|
Select-Object -ExpandProperty HardwareInformation.qwMemorySize -ErrorAction SilentlyContinue -First 1
|
|
if ($wmiGpu) {
|
|
$r.gpu_name = $wmiGpu.Name
|
|
# Edge case: driver is broken, otherwise $wmiGpu.AdapterRAM is redundant
|
|
if ($VRAMfromRegistry -ge $wmiGpu.AdapterRAM) {
|
|
$r.gpu_vram_gb = [math]::Round($VRAMfromRegistry / 1073741824, 1)
|
|
}
|
|
else {
|
|
$r.gpu_vram_gb = [math]::Round($wmiGpu.AdapterRAM / 1073741824, 1)
|
|
}
|
|
$r.gpu_count = 1
|
|
# WMI doesn't tell us CUDA/ROCm
|
|
$r.gpu_backend = 'cpu_x86';
|
|
}
|
|
}
|
|
$r | ConvertTo-Json -Compress
|
|
"""
|
|
)
|
|
if _remote_host:
|
|
# Remote: ship a single command string over SSH. The remote shell parses
|
|
# the quoting; PowerShell on the far side runs the -Command payload.
|
|
out = _run(f'powershell -Command "{ps_cmd}"')
|
|
else:
|
|
# Local: pass a LIST argv straight to subprocess so the OS hands ps_cmd
|
|
# to PowerShell verbatim — no fragile string-level quote escaping. Prefer
|
|
# pwsh (PS7), else Windows PowerShell 5.1.
|
|
out = _run([_powershell_exe(), "-NoProfile", "-NonInteractive", "-Command", ps_cmd])
|
|
if not out:
|
|
return None
|
|
import json as _json
|
|
try:
|
|
d = _json.loads(out)
|
|
# PowerShell's Measure-Object .Sum / .Count come back as JSON numbers and
|
|
# decode to float; the Linux path returns plain ints for these — coerce
|
|
# so the dict shape (and downstream int math) matches across platforms.
|
|
def _as_int(v, default):
|
|
try:
|
|
return int(v)
|
|
except (TypeError, ValueError):
|
|
return default
|
|
_cpu_name = (d.get("cpu_name") or "unknown")
|
|
if isinstance(_cpu_name, str):
|
|
_cpu_name = _cpu_name.strip() or "unknown"
|
|
result = {
|
|
"total_ram_gb": d.get("ram_gb", 0),
|
|
"available_ram_gb": d.get("avail_gb", 0),
|
|
"cpu_cores": _as_int(d.get("cpu_cores"), 1),
|
|
"cpu_name": _cpu_name,
|
|
"has_gpu": bool(d.get("gpu_name")),
|
|
"gpu_name": d.get("gpu_name"),
|
|
"gpu_vram_gb": d.get("gpu_vram_gb"),
|
|
"gpu_count": _as_int(d.get("gpu_count"), 0),
|
|
"backend": d.get("gpu_backend", "cpu_x86"),
|
|
"homogeneous": True,
|
|
"gpu_error": None,
|
|
"platform": "windows",
|
|
}
|
|
# PowerShell only reports aggregate GPU info, not per-card detail, so we
|
|
# can't tell a mixed box from a uniform one here — assume one homogeneous
|
|
# pool spanning all reported GPUs (the common Windows case).
|
|
_n = result["gpu_count"] or 0
|
|
if result["has_gpu"] and _n > 0:
|
|
_each = round((result["gpu_vram_gb"] or 0) / _n, 1)
|
|
result["gpus"] = [
|
|
{"index": i, "name": result["gpu_name"], "vram_gb": _each} for i in range(_n)
|
|
]
|
|
result["gpu_groups"] = [{
|
|
"name": result["gpu_name"],
|
|
"vram_each": _each,
|
|
"count": _n,
|
|
"indices": list(range(_n)),
|
|
"vram_total": result["gpu_vram_gb"],
|
|
}]
|
|
result["homogeneous"] = True
|
|
return result
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
_cache_by_host = {} # host -> (timestamp, result)
|
|
|
|
|
|
def _cache_key(host: str, ssh_port: str, platform_name: str):
|
|
"""Build a stable cache key that isolates remote SSH context.
|
|
|
|
Same host aliases can have different hardware due to visibility, forwarding etc.
|
|
To avoid using the wrong cached hardware info, include the SSH port and platform in the cache key.
|
|
"""
|
|
return (
|
|
host or "_local",
|
|
str(ssh_port or ""),
|
|
str(platform_name or "").lower(),
|
|
)
|
|
|
|
|
|
def _is_containerized():
|
|
"""Best-effort check for whether the local Odysseus process is running in a container."""
|
|
if _remote_host:
|
|
return False
|
|
|
|
if os.path.exists("/.dockerenv"):
|
|
return True
|
|
|
|
try:
|
|
with open("/proc/1/cgroup", encoding="utf-8", errors="replace") as f:
|
|
text = f.read().lower()
|
|
return any(marker in text for marker in ("docker", "containerd", "kubepods"))
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def _hardware_visibility_warning(result):
|
|
"""Return a non-blocking UX warning when detected hardware may only be container-visible."""
|
|
if not isinstance(result, dict):
|
|
return None
|
|
|
|
if result.get("manual_hardware"):
|
|
return None
|
|
|
|
if not result.get("containerized"):
|
|
return None
|
|
|
|
if result.get("gpu_error"):
|
|
return None
|
|
|
|
if not result.get("has_gpu"):
|
|
return {
|
|
"code": "container_no_gpu_visible",
|
|
"severity": "warning",
|
|
"title": "No GPU visible inside Docker",
|
|
"message": (
|
|
"Cookbook is scanning hardware from inside the Odysseus container. "
|
|
"If your host has a GPU, Docker may not be exposing it to the container, "
|
|
"so model recommendations may be CPU-only or too conservative."
|
|
),
|
|
"actions": [
|
|
"manual_hardware",
|
|
"rescan",
|
|
"copy_diagnostics",
|
|
],
|
|
}
|
|
|
|
total_ram = result.get("total_ram_gb") or 0
|
|
if total_ram and total_ram <= 8:
|
|
return {
|
|
"code": "container_low_ram_visible",
|
|
"severity": "info",
|
|
"title": "Container-visible RAM may be lower than host RAM",
|
|
"message": (
|
|
"Cookbook is seeing the RAM available inside the container. "
|
|
"If your host has more memory, validate host RAM separately or use Manual Hardware."
|
|
),
|
|
"actions": [
|
|
"manual_hardware",
|
|
"rescan",
|
|
"copy_diagnostics",
|
|
],
|
|
}
|
|
|
|
return None
|
|
|
|
|
|
def _attach_probe_context(result, host=""):
|
|
"""Attach probe-scope metadata and optional hardware visibility warning."""
|
|
if not isinstance(result, dict) or result.get("error"):
|
|
return result
|
|
|
|
is_remote = bool(host)
|
|
containerized = False if is_remote else _is_containerized()
|
|
|
|
result["probe_scope"] = "remote" if is_remote else ("container" if containerized else "native")
|
|
result["containerized"] = containerized
|
|
|
|
warning = _hardware_visibility_warning(result)
|
|
if warning:
|
|
result["hardware_visibility_warning"] = warning
|
|
else:
|
|
result.pop("hardware_visibility_warning", None)
|
|
|
|
return result
|
|
|
|
|
|
def detect_system(host="", ssh_port="", platform="", fresh=False):
|
|
"""Detect system hardware: RAM, CPU, GPU. Cached per host (hardware rarely
|
|
changes, and probing a remote host over SSH is slow). Pass fresh=True to
|
|
bypass the cache and re-probe (the "Rescan" button).
|
|
If host is set (e.g. 'user@server'), runs detection commands over SSH.
|
|
platform: "windows", "linux", "termux", or "" (auto-detect).
|
|
"""
|
|
global _remote_host, _remote_port, _remote_platform
|
|
|
|
cache_key = _cache_key(host, ssh_port, platform)
|
|
now = time.time()
|
|
if not fresh and cache_key in _cache_by_host:
|
|
ts, cached = _cache_by_host[cache_key]
|
|
if (now - ts) < CACHE_TTL:
|
|
return cached
|
|
|
|
_remote_host = host or None
|
|
_remote_port = ssh_port or None
|
|
_remote_platform = platform or None
|
|
|
|
# Windows: single PowerShell command for all hardware info
|
|
if _remote_platform == "windows" and _remote_host:
|
|
result = _detect_windows()
|
|
if result:
|
|
result = _attach_probe_context(result, host=host)
|
|
_remote_host = None
|
|
_remote_platform = None
|
|
_cache_by_host[cache_key] = (now, result)
|
|
return result
|
|
# If Windows detection failed, return error
|
|
result = {"error": f"Cannot connect to {host}", "host": host}
|
|
_remote_host = None
|
|
_remote_platform = None
|
|
_cache_by_host[cache_key] = (now, result)
|
|
return result
|
|
|
|
# Local Windows: the Linux /proc + /sys + os.sysconf path returns 0 GB RAM,
|
|
# "unknown" CPU and no GPU on Windows (and os.sysconf doesn't even exist),
|
|
# so detect locally via PowerShell/WMI instead. _detect_windows() runs the
|
|
# same probe used for remote Windows, but _run() executes it locally.
|
|
if not _remote_host and os.name == "nt":
|
|
result = _detect_windows()
|
|
if result:
|
|
result = _attach_probe_context(result, host=host)
|
|
_cache_by_host[cache_key] = (now, result)
|
|
return result
|
|
# PowerShell probe failed entirely — fall through to the generic path
|
|
# below so we at least return a well-shaped dict rather than crashing.
|
|
|
|
# Linux/Termux: existing multi-command detection
|
|
total_ram = round(_get_ram_gb(), 1)
|
|
# If remote host returns 0 RAM, connection likely failed
|
|
if _remote_host and total_ram <= 0:
|
|
result = {"error": f"Cannot connect to {host}", "host": host}
|
|
_cache_by_host[cache_key] = (now, result)
|
|
_remote_host = None
|
|
_remote_platform = None
|
|
return result
|
|
available_ram = round(_get_available_ram_gb(), 1)
|
|
cpu_cores = _get_cpu_count()
|
|
cpu_name = _get_cpu_name()
|
|
|
|
gpu_info = _detect_apple_silicon() or _detect_nvidia() or _detect_amd()
|
|
|
|
if gpu_info:
|
|
result = {
|
|
"total_ram_gb": total_ram,
|
|
"available_ram_gb": available_ram,
|
|
"cpu_cores": cpu_cores,
|
|
"cpu_name": cpu_name,
|
|
"has_gpu": True,
|
|
"gpu_name": gpu_info["gpu_name"],
|
|
"gpu_vram_gb": gpu_info["gpu_vram_gb"],
|
|
"gpu_count": gpu_info["gpu_count"],
|
|
"gpus": gpu_info.get("gpus", []),
|
|
"gpu_groups": gpu_info.get("gpu_groups", []),
|
|
"homogeneous": gpu_info.get("homogeneous", True),
|
|
"backend": gpu_info["backend"],
|
|
# Apple Silicon / AMD APUs share system RAM with the GPU — carry the
|
|
# flag through so callers can tell unified from discrete VRAM.
|
|
"unified_memory": gpu_info.get("unified_memory", False),
|
|
}
|
|
else:
|
|
if _remote_host:
|
|
arch_out = _run(["uname", "-m"]) or ""
|
|
else:
|
|
import platform as _platform
|
|
arch_out = _platform.machine().lower()
|
|
backend = "cpu_arm" if "aarch64" in arch_out or "arm" in arch_out else "cpu_x86"
|
|
result = {
|
|
"total_ram_gb": total_ram,
|
|
"available_ram_gb": available_ram,
|
|
"cpu_cores": cpu_cores,
|
|
"cpu_name": cpu_name,
|
|
"has_gpu": False,
|
|
"gpu_name": None,
|
|
"gpu_vram_gb": None,
|
|
"gpu_count": 0,
|
|
"backend": backend,
|
|
# Set when nvidia-smi exists but failed (e.g. driver/library
|
|
# version mismatch) — lets the UI say "GPU driver error" instead
|
|
# of the misleading "No GPU".
|
|
"gpu_error": _last_gpu_error,
|
|
}
|
|
|
|
result = _attach_probe_context(result, host=host)
|
|
_remote_host = None
|
|
_remote_platform = None
|
|
_cache_by_host[cache_key] = (now, result)
|
|
return result
|