mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 02:05:22 -04:00
Cookbook: don't auto-fold Direct Download from inside its own body
The capture-phase scroll listener was firing for scrolls anywhere in the modal — including the Trending models list, which lives inside the Direct Download fold body. Scrolling that list was auto-folding the section that contains it. Bail early if the scroll target is the fold body or a descendant — the section only folds on scrolls in sibling scrollers (.cookbook-body, .hwfit-list, .modal-content).
This commit is contained in:
+48
-3
@@ -233,16 +233,18 @@ function _detectModelOptimizations(modelName) {
|
|||||||
const n = (modelName || '').toLowerCase();
|
const n = (modelName || '').toLowerCase();
|
||||||
const opts = { envVars: [], flags: [], tips: [] };
|
const opts = { envVars: [], flags: [], tips: [] };
|
||||||
|
|
||||||
// Qwen3.5 MoE models
|
// Qwen3.5 MoE models — MoE-specific env vars + expert-parallel.
|
||||||
|
// The --reasoning-parser flag is added uniformly below via
|
||||||
|
// _detectReasoningParser, no longer hardcoded here.
|
||||||
if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
|
if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
|
||||||
opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1', 'VLLM_USE_FLASHINFER_SAMPLER=0', 'OMP_NUM_THREADS=4');
|
opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1', 'VLLM_USE_FLASHINFER_SAMPLER=0', 'OMP_NUM_THREADS=4');
|
||||||
opts.flags.push('--enable-expert-parallel', '--reasoning-parser qwen3');
|
opts.flags.push('--enable-expert-parallel');
|
||||||
opts.tips.push('MoE optimizations: expert parallel + flashinfer MoE kernels');
|
opts.tips.push('MoE optimizations: expert parallel + flashinfer MoE kernels');
|
||||||
}
|
}
|
||||||
// Qwen3 MoE (non-3.5)
|
// Qwen3 MoE (non-3.5)
|
||||||
else if (n.includes('qwen3') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
|
else if (n.includes('qwen3') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
|
||||||
opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1');
|
opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1');
|
||||||
opts.flags.push('--enable-expert-parallel', '--reasoning-parser qwen3');
|
opts.flags.push('--enable-expert-parallel');
|
||||||
opts.tips.push('MoE optimizations: expert parallel');
|
opts.tips.push('MoE optimizations: expert parallel');
|
||||||
}
|
}
|
||||||
// DeepSeek MoE
|
// DeepSeek MoE
|
||||||
@@ -250,6 +252,15 @@ function _detectModelOptimizations(modelName) {
|
|||||||
opts.flags.push('--enable-expert-parallel');
|
opts.flags.push('--enable-expert-parallel');
|
||||||
opts.tips.push('MoE expert parallel for DeepSeek');
|
opts.tips.push('MoE expert parallel for DeepSeek');
|
||||||
}
|
}
|
||||||
|
// Reasoning parser — applies independently of MoE detection. Without this
|
||||||
|
// flag, models like MiniMax-M2.x, DeepSeek-R1, Qwen3 reasoning, GLM-4.x,
|
||||||
|
// gpt-oss leak <think> blocks as plain text instead of separating them
|
||||||
|
// into the reasoning_content channel.
|
||||||
|
const _reasoningParser = _detectReasoningParser(modelName);
|
||||||
|
if (_reasoningParser) {
|
||||||
|
opts.flags.push(`--reasoning-parser ${_reasoningParser}`);
|
||||||
|
opts.tips.push(`Reasoning parser (${_reasoningParser}): splits <think> tokens into a separate channel`);
|
||||||
|
}
|
||||||
// Speculative decoding — pick the right MTP method per model family.
|
// Speculative decoding — pick the right MTP method per model family.
|
||||||
// opts.spec.{method,tokens} seed the UI dropdown/input; the actual flag is
|
// opts.spec.{method,tokens} seed the UI dropdown/input; the actual flag is
|
||||||
// assembled by the command builder so the user can edit before launching.
|
// assembled by the command builder so the user can edit before launching.
|
||||||
@@ -273,6 +284,36 @@ function _detectModelOptimizations(modelName) {
|
|||||||
return opts;
|
return opts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Detect the right vLLM --reasoning-parser based on model name.
|
||||||
|
* Returns the parser slug (matches vLLM's official list) or null when the
|
||||||
|
* model isn't a reasoning model. Without the right parser, thinking tokens
|
||||||
|
* leak as plain text instead of being split into a separate channel.
|
||||||
|
* Source: vllm/reasoning/__init__.py registered parsers.
|
||||||
|
*/
|
||||||
|
export function _detectReasoningParser(modelName) {
|
||||||
|
const n = (modelName || '').toLowerCase();
|
||||||
|
// MiniMax M2 / M2.5 / M2.7 — released with a dedicated parser. Catch M2
|
||||||
|
// before plain "minimax" so M2.x doesn't fall through to a wrong parser.
|
||||||
|
if (n.includes('minimax') && n.match(/\bm2(?:\.\d)?\b/)) return 'minimax_m2';
|
||||||
|
// DeepSeek-R1 / V3-Thinking / V3.1-Thinking variants. Bare V3/V3.1 (non-
|
||||||
|
// thinking) skip this — they're not reasoning models.
|
||||||
|
if (n.includes('deepseek') && (n.includes('r1') || n.includes('thinking'))) return 'deepseek_r1';
|
||||||
|
// Qwen3 / Qwen3.5 reasoning models. Qwen3-Coder + Qwen3-Instruct don't
|
||||||
|
// emit <think> blocks, so skip the parser there.
|
||||||
|
if (n.includes('qwen3') && !n.includes('coder') && !n.includes('instruct')) return 'qwen3';
|
||||||
|
// GLM-4 / GLM-4.5 / GLM-4.6 with reasoning.
|
||||||
|
if (n.includes('glm-4') || n.includes('glm-5')) return 'glm45';
|
||||||
|
// OpenAI gpt-oss family.
|
||||||
|
if (n.includes('gpt-oss')) return 'gpt_oss';
|
||||||
|
// Hunyuan A13B reasoning.
|
||||||
|
if (n.includes('hunyuan') && n.includes('a13b')) return 'hunyuan_a13b';
|
||||||
|
// IBM Granite reasoning.
|
||||||
|
if (n.includes('granite') && (n.includes('reason') || n.includes('think'))) return 'granite';
|
||||||
|
// InternLM reasoning.
|
||||||
|
if (n.includes('internlm')) return 'internlm';
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/** Detect the right vLLM tool-call-parser based on model name.
|
/** Detect the right vLLM tool-call-parser based on model name.
|
||||||
* Qwen tool-call formats split by generation:
|
* Qwen tool-call formats split by generation:
|
||||||
* - Qwen3-Coder → qwen3_coder (XML <tool_call> with named params)
|
* - Qwen3-Coder → qwen3_coder (XML <tool_call> with named params)
|
||||||
@@ -1436,6 +1477,10 @@ function _wireTabEvents(body) {
|
|||||||
_modal.addEventListener('scroll', (e) => {
|
_modal.addEventListener('scroll', (e) => {
|
||||||
const tgt = e.target;
|
const tgt = e.target;
|
||||||
if (!tgt || typeof tgt.scrollTop !== 'number') return;
|
if (!tgt || typeof tgt.scrollTop !== 'number') return;
|
||||||
|
// Ignore scrolls that originate INSIDE the Direct Download body
|
||||||
|
// (e.g. the Trending models list) — those are local to the
|
||||||
|
// section and shouldn't auto-fold the section that owns them.
|
||||||
|
if (dlFoldBody.contains && (tgt === dlFoldBody || dlFoldBody.contains(tgt))) return;
|
||||||
const y = tgt.scrollTop;
|
const y = tgt.scrollTop;
|
||||||
const prev = _lastY.get(tgt) || 0;
|
const prev = _lastY.get(tgt) || 0;
|
||||||
if (y > prev) _maybeFold();
|
if (y > prev) _maybeFold();
|
||||||
|
|||||||
Reference in New Issue
Block a user