fix: normalize Gemma 4 thought-channel output (#2224)

This commit is contained in:
RaresKeY
2026-06-04 20:26:58 +03:00
committed by GitHub
parent 7ce6ec7f50
commit c12c2aa233
7 changed files with 249 additions and 41 deletions
+40 -7
View File
@@ -116,8 +116,13 @@ function sanitizeAllowedHtml(html) {
* Check if text has unclosed think tag
*/
export function hasUnclosedThinkTag(text) {
const openCount = (text.match(/<think(?:ing)?>/gi) || []).length;
const closeCount = (text.match(/<\/think(?:ing)?>/gi) || []).length;
text = text || '';
const openCount =
(text.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi) || []).length
+ (text.match(/<\|channel>thought/gi) || []).length;
const closeCount =
(text.match(/<\/(?:think(?:ing)?|thought)>/gi) || []).length
+ (text.match(/<channel\|>/gi) || []).length;
return openCount > closeCount;
}
@@ -125,8 +130,25 @@ export function startsWithReasoningPrefix(text) {
return /^\s*(?:thinking(?:\s+process)?\s*:|the user |i need |i should |i will |they are |the question |i can )/i.test(text || '');
}
export function normalizeThinkingMarkup(text) {
if (!text) return text;
let normalized = text;
normalized = normalized.replace(/<thought(\s+[^>]*)?>/gi, (_m, attrs = '') => `<think${attrs || ''}>`);
normalized = normalized.replace(/<\/thought>/gi, '</think>');
normalized = normalized.replace(/<\|channel>thought\s*\n?([\s\S]*?)<channel\|>\s*/gi, (_m, content = '') => {
const thought = String(content || '').trim();
return thought ? `<think>${thought}</think>\n` : '';
});
normalized = normalized.replace(/<\|channel>response\s*\n?([\s\S]*?)<channel\|>/gi, (_m, content = '') => content || '');
normalized = normalized.replace(/<\|channel>response\s*\n?/gi, '');
normalized = normalized.replace(/<channel\|>/gi, '');
return normalized;
}
function normalizePlainThinking(text) {
if (!text || /<think/i.test(text)) return text;
if (!text) return text;
text = normalizeThinkingMarkup(text);
if (/<think/i.test(text)) return text;
const trimmed = text.trimStart();
if (!startsWithReasoningPrefix(trimmed)) return text;
@@ -220,11 +242,21 @@ export function extractThinkingBlocks(text) {
// (b) Cut-off mid-generation — there's already real reply text before the
// opener. Drop from the tag onward as before (it's truncated thinking).
if (hasUnclosedThinkTag(normalized)) {
const strayOpener = cleanContent.match(/^\s*<think(?:ing)?(?:\s+[^>]*)?>([\s\S]*)$/i);
if (strayOpener) {
cleanContent = strayOpener[1];
const gemmaThoughtStart = cleanContent.search(/<\|channel>thought/i);
if (gemmaThoughtStart >= 0) {
const leakedThought = cleanContent
.slice(gemmaThoughtStart)
.replace(/^<\|channel>thought\s*\n?/i, '')
.trim();
if (gemmaThoughtStart === 0 && leakedThought) thinkingBlocks.push(leakedThought);
cleanContent = cleanContent.slice(0, gemmaThoughtStart);
} else {
cleanContent = cleanContent.replace(/<think(?:ing)?(?:\s+[^>]*)?>[\s\S]*$/gi, '');
const strayOpener = cleanContent.match(/^\s*<think(?:ing)?(?:\s+[^>]*)?>([\s\S]*)$/i);
if (strayOpener) {
cleanContent = strayOpener[1];
} else {
cleanContent = cleanContent.replace(/<think(?:ing)?(?:\s+[^>]*)?>[\s\S]*$/gi, '');
}
}
}
@@ -686,6 +718,7 @@ const markdownModule = {
createCollapsible,
hasUnclosedThinkTag,
extractThinkingBlocks,
normalizeThinkingMarkup,
startsWithReasoningPrefix,
renderMermaid
};