Generate macOS/Metal serve commands and surface the Metal GPU

cookbook_routes.py adds a macOS serve path (Ollama, Metal-aware llama.cpp build using `sysctl hw.ncpu` instead of `nproc`, and a clear error if vLLM is attempted). The frontend defaults Metal serving to llama.cpp and offers llama.cpp/Ollama instead of vLLM/SGLang. The odysseus-cookbook CLI's `gpus` command reports the Metal GPU via sysctl/vm_stat. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 02:05:22 -04:00 · 2026-05-31 20:24:38 -05:00
parent 32ac81dbc6
commit 4ba01ce25d
4 changed files with 122 additions and 7 deletions
@@ -16,6 +16,7 @@ let _getPort;
 let _sshPrefix;
 let _getPlatform;
 let _isWindows;
+let _isMetal;
 let _buildEnvPrefix;
 let _buildServeCmd;
 let _shellQuote;
@@ -382,6 +383,8 @@ function _rerenderCachedModels() {
      panelHtml += `<div class="hwfit-serve-row">`;
      const _backendChoices = _isWindows()
        ? [['llamacpp','llama.cpp']]
+        : _isMetal()
+        ? [['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']]
        : [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['diffusers','Diffusers']];
      const backendOpts = _backendChoices.map(([v,l]) => `<option value="${v}"${defaultBackend===v?' selected':''}>${l}</option>`).join('');
      panelHtml += `<label>${_l('Backend','Inference engine: vLLM, SGLang, llama.cpp, or Diffusers')}<select class="hwfit-sf" data-field="backend">${backendOpts}</select></label>`;
@@ -1592,6 +1595,7 @@ export function initServe(shared) {
  _sshPrefix = shared._sshPrefix;
  _getPlatform = shared._getPlatform;
  _isWindows = shared._isWindows;
+  _isMetal = shared._isMetal;
  _buildEnvPrefix = shared._buildEnvPrefix;
  _buildServeCmd = shared._buildServeCmd;
  _shellQuote = shared._shellQuote;