mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 09:45:24 -04:00
Generate macOS/Metal serve commands and surface the Metal GPU
cookbook_routes.py adds a macOS serve path (Ollama, Metal-aware llama.cpp build using `sysctl hw.ncpu` instead of `nproc`, and a clear error if vLLM is attempted). The frontend defaults Metal serving to llama.cpp and offers llama.cpp/Ollama instead of vLLM/SGLang. The odysseus-cookbook CLI's `gpus` command reports the Metal GPU via sysctl/vm_stat. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -171,6 +171,13 @@ export function _isWindows(hostOrTask) {
|
||||
return _getPlatform(hostOrTask) === 'windows';
|
||||
}
|
||||
|
||||
/** Check if the detected (local) hardware is Apple Silicon / Metal. Keys off the
|
||||
* hardware probe's backend rather than a platform string, since a local Mac
|
||||
* reports no platform but does report backend: "metal". */
|
||||
export function _isMetal() {
|
||||
return ['metal', 'mps', 'apple'].includes(String(_hwfitCache?.system?.backend || '').toLowerCase());
|
||||
}
|
||||
|
||||
/** Detect model-specific vLLM optimizations */
|
||||
function _detectModelOptimizations(modelName) {
|
||||
const n = (modelName || '').toLowerCase();
|
||||
@@ -252,6 +259,13 @@ export function _detectBackend(model) {
|
||||
return { backend: 'llamacpp', label: 'llama.cpp' };
|
||||
}
|
||||
|
||||
// Apple Silicon (Metal) → llama.cpp (GGUF). vLLM/SGLang are CUDA/ROCm-only and
|
||||
// don't run on macOS; AWQ/GPTQ/FP8 (vLLM-only) models are already filtered out
|
||||
// of metal Cookbook results, so llama.cpp is always the right engine here.
|
||||
if (['metal', 'mps', 'apple'].includes(sysBackend)) {
|
||||
return { backend: 'llamacpp', label: 'llama.cpp' };
|
||||
}
|
||||
|
||||
// AWQ / GPTQ / FP8 → vLLM
|
||||
if (/^AWQ|^GPTQ/.test(q) || q === 'FP8') {
|
||||
return { backend: 'vllm', label: 'vLLM' };
|
||||
@@ -1761,6 +1775,7 @@ const shared = {
|
||||
_sshPrefix,
|
||||
_getPlatform,
|
||||
_isWindows,
|
||||
_isMetal,
|
||||
_buildEnvPrefix,
|
||||
_buildServeCmd,
|
||||
_shellQuote,
|
||||
|
||||
@@ -16,6 +16,7 @@ let _getPort;
|
||||
let _sshPrefix;
|
||||
let _getPlatform;
|
||||
let _isWindows;
|
||||
let _isMetal;
|
||||
let _buildEnvPrefix;
|
||||
let _buildServeCmd;
|
||||
let _shellQuote;
|
||||
@@ -382,6 +383,8 @@ function _rerenderCachedModels() {
|
||||
panelHtml += `<div class="hwfit-serve-row">`;
|
||||
const _backendChoices = _isWindows()
|
||||
? [['llamacpp','llama.cpp']]
|
||||
: _isMetal()
|
||||
? [['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']]
|
||||
: [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['diffusers','Diffusers']];
|
||||
const backendOpts = _backendChoices.map(([v,l]) => `<option value="${v}"${defaultBackend===v?' selected':''}>${l}</option>`).join('');
|
||||
panelHtml += `<label>${_l('Backend','Inference engine: vLLM, SGLang, llama.cpp, or Diffusers')}<select class="hwfit-sf" data-field="backend">${backendOpts}</select></label>`;
|
||||
@@ -1592,6 +1595,7 @@ export function initServe(shared) {
|
||||
_sshPrefix = shared._sshPrefix;
|
||||
_getPlatform = shared._getPlatform;
|
||||
_isWindows = shared._isWindows;
|
||||
_isMetal = shared._isMetal;
|
||||
_buildEnvPrefix = shared._buildEnvPrefix;
|
||||
_buildServeCmd = shared._buildServeCmd;
|
||||
_shellQuote = shared._shellQuote;
|
||||
|
||||
Reference in New Issue
Block a user