mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 02:05:22 -04:00
Expose advanced llama.cpp serve controls
This commit is contained in:
@@ -1195,10 +1195,24 @@ function _parseServeCmdToFields(cmd) {
|
||||
dtype: ex(/--dtype\s+(\w+)/) || 'auto',
|
||||
max_seqs: ex(/--max-num-seqs\s+(\d+)/) || '',
|
||||
gpus: ex(/CUDA_VISIBLE_DEVICES=(\S+)/) || '',
|
||||
cache_type: ex(/(?:--cache-type-k|-ctk)\s+(\S+)/) || '',
|
||||
llama_fit: ex(/(?:--fit|-fit)\s+(on|off)/) || '',
|
||||
llama_split_mode: ex(/(?:--split-mode|-sm)\s+(none|layer|row|tensor)/) || '',
|
||||
llama_tensor_split: ex(/(?:--tensor-split|-ts)\s+([0-9.,]+)/) || '',
|
||||
llama_main_gpu: ex(/(?:--main-gpu|-mg)\s+(\d+)/) || '',
|
||||
llama_parallel: ex(/(?:--parallel|-np)\s+(\d+)/) || '',
|
||||
llama_batch_size: ex(/(?:--batch-size|-b)\s+(\d+)/) || '',
|
||||
llama_ubatch_size: ex(/(?:--ubatch-size|-ub)\s+(\d+)/) || '',
|
||||
llama_spec_tokens: ex(/--spec-draft-n-max\s+(\d+)/) || '3',
|
||||
enforce_eager: cmd.includes('--enforce-eager'),
|
||||
trust_remote: cmd.includes('--trust-remote-code'),
|
||||
prefix_cache: cmd.includes('--enable-prefix-caching'),
|
||||
auto_tool: cmd.includes('--enable-auto-tool-choice'),
|
||||
flash_attn: /--flash-attn\s+on\b/.test(cmd),
|
||||
unified_mem: /GGML_CUDA_ENABLE_UNIFIED_MEMORY=1/.test(cmd),
|
||||
llama_no_mmap: /--no-mmap\b/.test(cmd),
|
||||
llama_no_warmup: /--no-warmup\b/.test(cmd),
|
||||
llama_speculative_mtp: /--spec-type\s+\S*draft-mtp/.test(cmd),
|
||||
speculative: cmd.includes('--speculative-config'),
|
||||
};
|
||||
const spec = cmd.match(/--speculative-config\s+'?\{[^}]*"method"\s*:\s*"([^"]+)"[^}]*"num_speculative_tokens"\s*:\s*(\d+)/);
|
||||
|
||||
Reference in New Issue
Block a user