feat(cookbook): add Gemma4 thinking chat template (#2955)

* feat(cookbook): add Gemma4 thinking chat template

* fix(cookbook): place Gemma4 thinking token in system turn
This commit is contained in:
ooovenenoso
2026-06-05 16:43:31 -04:00
committed by GitHub
parent c8b4cd24e0
commit 2bdf43b74d
2 changed files with 48 additions and 0 deletions
+17
View File
@@ -188,6 +188,19 @@ export function _isMetal() {
return ['metal', 'mps', 'apple'].includes(String(_hwfitCache?.system?.backend || '').toLowerCase());
}
const GEMMA4_THINKING_CHAT_TEMPLATE = `{% for message in messages %}{% if message['role'] == 'system' %}<|turn>system\n<|think|>{{ message['content'] }}<turn|>\n{% elif message['role'] == 'user' %}<|turn>user\n{{ message['content'] }}<turn|>\n{% elif message['role'] == 'assistant' %}<|turn>model\n{{ message['content'] }}<turn|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|turn>model\n<|channel>thought{% endif %}`;
function _isGemma4ThinkingModel(modelName) {
const n = (modelName || '').toLowerCase();
return n.includes('gemma-4') || n.includes('gemma4');
}
function _gemma4ThinkingChatTemplateArg(modelName) {
return _isGemma4ThinkingModel(modelName)
? _shellQuote(GEMMA4_THINKING_CHAT_TEMPLATE)
: '';
}
/** Detect model-specific vLLM optimizations */
function _detectModelOptimizations(modelName) {
const n = (modelName || '').toLowerCase();
@@ -388,6 +401,8 @@ export function _buildServeCmd(f, modelName, backend) {
const _extraEnv = (f.extra_env ?? '').toString().replace(/\s+/g, ' ').trim();
if (_extraEnv) cmd += _extraEnv + ' ';
cmd += `${_vllmBin} serve ${modelName} --host 0.0.0.0 --port ${f.port || '8000'}`;
const _gemma4ChatTemplate = _gemma4ThinkingChatTemplateArg(modelName);
if (_gemma4ChatTemplate) cmd += ` --chat-template ${_gemma4ChatTemplate}`;
cmd += ` --tensor-parallel-size ${f.tp || '1'}`;
cmd += ` --max-model-len ${f.ctx || '8192'}`;
cmd += ` --gpu-memory-utilization ${f.gpu_mem || '0.90'}`;
@@ -418,6 +433,8 @@ export function _buildServeCmd(f, modelName, backend) {
const _extraEnv = (f.extra_env ?? '').toString().replace(/\s+/g, ' ').trim();
if (_extraEnv) cmd += _extraEnv + ' ';
cmd += `${_py3Bin} -m sglang.launch_server --model-path ${modelName} --host 0.0.0.0 --port ${f.port || '30000'}`;
const _gemma4ChatTemplate = _gemma4ThinkingChatTemplateArg(modelName);
if (_gemma4ChatTemplate) cmd += ` --chat-template ${_gemma4ChatTemplate}`;
if (f.tp && f.tp !== '1') cmd += ` --tp ${f.tp}`;
if (f.ctx) cmd += ` --context-length ${f.ctx}`;
if (f.gpu_mem && f.gpu_mem !== '0.90') cmd += ` --mem-fraction-static ${f.gpu_mem}`;