Expose advanced llama.cpp serve controls

This commit is contained in:
spooky
2026-06-02 13:46:16 +10:00
committed by GitHub
parent 05fb48e9d5
commit 0f3280ee05
4 changed files with 92 additions and 0 deletions
+14
View File
@@ -1195,10 +1195,24 @@ function _parseServeCmdToFields(cmd) {
dtype: ex(/--dtype\s+(\w+)/) || 'auto',
max_seqs: ex(/--max-num-seqs\s+(\d+)/) || '',
gpus: ex(/CUDA_VISIBLE_DEVICES=(\S+)/) || '',
cache_type: ex(/(?:--cache-type-k|-ctk)\s+(\S+)/) || '',
llama_fit: ex(/(?:--fit|-fit)\s+(on|off)/) || '',
llama_split_mode: ex(/(?:--split-mode|-sm)\s+(none|layer|row|tensor)/) || '',
llama_tensor_split: ex(/(?:--tensor-split|-ts)\s+([0-9.,]+)/) || '',
llama_main_gpu: ex(/(?:--main-gpu|-mg)\s+(\d+)/) || '',
llama_parallel: ex(/(?:--parallel|-np)\s+(\d+)/) || '',
llama_batch_size: ex(/(?:--batch-size|-b)\s+(\d+)/) || '',
llama_ubatch_size: ex(/(?:--ubatch-size|-ub)\s+(\d+)/) || '',
llama_spec_tokens: ex(/--spec-draft-n-max\s+(\d+)/) || '3',
enforce_eager: cmd.includes('--enforce-eager'),
trust_remote: cmd.includes('--trust-remote-code'),
prefix_cache: cmd.includes('--enable-prefix-caching'),
auto_tool: cmd.includes('--enable-auto-tool-choice'),
flash_attn: /--flash-attn\s+on\b/.test(cmd),
unified_mem: /GGML_CUDA_ENABLE_UNIFIED_MEMORY=1/.test(cmd),
llama_no_mmap: /--no-mmap\b/.test(cmd),
llama_no_warmup: /--no-warmup\b/.test(cmd),
llama_speculative_mtp: /--spec-type\s+\S*draft-mtp/.test(cmd),
speculative: cmd.includes('--speculative-config'),
};
const spec = cmd.match(/--speculative-config\s+'?\{[^}]*"method"\s*:\s*"([^"]+)"[^}]*"num_speculative_tokens"\s*:\s*(\d+)/);