Cookbook: don't auto-fold Direct Download from inside its own body

The capture-phase scroll listener was firing for scrolls anywhere in the modal — including the Trending models list, which lives inside the Direct Download fold body. Scrolling that list was auto-folding the section that contains it. Bail early if the scroll target is the fold body or a descendant — the section only folds on scrolls in sibling scrollers (.cookbook-body, .hwfit-list, .modal-content).
2026-06-15 17:25:26 -04:00 · 2026-06-13 22:26:04 +09:00
parent 45b3cd15df
commit 654f9f82c7
1 changed files with 48 additions and 3 deletions
@@ -233,16 +233,18 @@ function _detectModelOptimizations(modelName) {
  const n = (modelName || '').toLowerCase();
  const opts = { envVars: [], flags: [], tips: [] };

-  // Qwen3.5 MoE models
+  // Qwen3.5 MoE models — MoE-specific env vars + expert-parallel.
+  // The --reasoning-parser flag is added uniformly below via
+  // _detectReasoningParser, no longer hardcoded here.
  if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
    opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1', 'VLLM_USE_FLASHINFER_SAMPLER=0', 'OMP_NUM_THREADS=4');
-    opts.flags.push('--enable-expert-parallel', '--reasoning-parser qwen3');
+    opts.flags.push('--enable-expert-parallel');
    opts.tips.push('MoE optimizations: expert parallel + flashinfer MoE kernels');
  }
  // Qwen3 MoE (non-3.5)
  else if (n.includes('qwen3') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
    opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1');
-    opts.flags.push('--enable-expert-parallel', '--reasoning-parser qwen3');
+    opts.flags.push('--enable-expert-parallel');
    opts.tips.push('MoE optimizations: expert parallel');
  }
  // DeepSeek MoE
@@ -250,6 +252,15 @@ function _detectModelOptimizations(modelName) {
    opts.flags.push('--enable-expert-parallel');
    opts.tips.push('MoE expert parallel for DeepSeek');
  }
+  // Reasoning parser — applies independently of MoE detection. Without this
+  // flag, models like MiniMax-M2.x, DeepSeek-R1, Qwen3 reasoning, GLM-4.x,
+  // gpt-oss leak <think> blocks as plain text instead of separating them
+  // into the reasoning_content channel.
+  const _reasoningParser = _detectReasoningParser(modelName);
+  if (_reasoningParser) {
+    opts.flags.push(`--reasoning-parser ${_reasoningParser}`);
+    opts.tips.push(`Reasoning parser (${_reasoningParser}): splits <think> tokens into a separate channel`);
+  }
  // Speculative decoding — pick the right MTP method per model family.
  // opts.spec.{method,tokens} seed the UI dropdown/input; the actual flag is
  // assembled by the command builder so the user can edit before launching.
@@ -273,6 +284,36 @@ function _detectModelOptimizations(modelName) {
  return opts;
 }

+/** Detect the right vLLM --reasoning-parser based on model name.
+ *  Returns the parser slug (matches vLLM's official list) or null when the
+ *  model isn't a reasoning model. Without the right parser, thinking tokens
+ *  leak as plain text instead of being split into a separate channel.
+ *  Source: vllm/reasoning/__init__.py registered parsers.
+ */
+export function _detectReasoningParser(modelName) {
+  const n = (modelName || '').toLowerCase();
+  // MiniMax M2 / M2.5 / M2.7 — released with a dedicated parser. Catch M2
+  // before plain "minimax" so M2.x doesn't fall through to a wrong parser.
+  if (n.includes('minimax') && n.match(/\bm2(?:\.\d)?\b/)) return 'minimax_m2';
+  // DeepSeek-R1 / V3-Thinking / V3.1-Thinking variants. Bare V3/V3.1 (non-
+  // thinking) skip this — they're not reasoning models.
+  if (n.includes('deepseek') && (n.includes('r1') || n.includes('thinking'))) return 'deepseek_r1';
+  // Qwen3 / Qwen3.5 reasoning models. Qwen3-Coder + Qwen3-Instruct don't
+  // emit <think> blocks, so skip the parser there.
+  if (n.includes('qwen3') && !n.includes('coder') && !n.includes('instruct')) return 'qwen3';
+  // GLM-4 / GLM-4.5 / GLM-4.6 with reasoning.
+  if (n.includes('glm-4') || n.includes('glm-5')) return 'glm45';
+  // OpenAI gpt-oss family.
+  if (n.includes('gpt-oss')) return 'gpt_oss';
+  // Hunyuan A13B reasoning.
+  if (n.includes('hunyuan') && n.includes('a13b')) return 'hunyuan_a13b';
+  // IBM Granite reasoning.
+  if (n.includes('granite') && (n.includes('reason') || n.includes('think'))) return 'granite';
+  // InternLM reasoning.
+  if (n.includes('internlm')) return 'internlm';
+  return null;
+}
+
 /** Detect the right vLLM tool-call-parser based on model name.
 *  Qwen tool-call formats split by generation:
 *   - Qwen3-Coder           → qwen3_coder  (XML <tool_call> with named params)
@@ -1436,6 +1477,10 @@ function _wireTabEvents(body) {
    _modal.addEventListener('scroll', (e) => {
      const tgt = e.target;
      if (!tgt || typeof tgt.scrollTop !== 'number') return;
+      // Ignore scrolls that originate INSIDE the Direct Download body
+      // (e.g. the Trending models list) — those are local to the
+      // section and shouldn't auto-fold the section that owns them.
+      if (dlFoldBody.contains && (tgt === dlFoldBody || dlFoldBody.contains(tgt))) return;
      const y = tgt.scrollTop;
      const prev = _lastY.get(tgt) || 0;
      if (y > prev) _maybeFold();