mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
Cookbook: don't auto-fold Direct Download from inside its own body
The capture-phase scroll listener was firing for scrolls anywhere in the modal — including the Trending models list, which lives inside the Direct Download fold body. Scrolling that list was auto-folding the section that contains it. Bail early if the scroll target is the fold body or a descendant — the section only folds on scrolls in sibling scrollers (.cookbook-body, .hwfit-list, .modal-content).
This commit is contained in:
+48
-3
@@ -233,16 +233,18 @@ function _detectModelOptimizations(modelName) {
|
||||
const n = (modelName || '').toLowerCase();
|
||||
const opts = { envVars: [], flags: [], tips: [] };
|
||||
|
||||
// Qwen3.5 MoE models
|
||||
// Qwen3.5 MoE models — MoE-specific env vars + expert-parallel.
|
||||
// The --reasoning-parser flag is added uniformly below via
|
||||
// _detectReasoningParser, no longer hardcoded here.
|
||||
if (n.includes('qwen3.5') || n.includes('qwen3-') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
|
||||
opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1', 'VLLM_USE_FLASHINFER_SAMPLER=0', 'OMP_NUM_THREADS=4');
|
||||
opts.flags.push('--enable-expert-parallel', '--reasoning-parser qwen3');
|
||||
opts.flags.push('--enable-expert-parallel');
|
||||
opts.tips.push('MoE optimizations: expert parallel + flashinfer MoE kernels');
|
||||
}
|
||||
// Qwen3 MoE (non-3.5)
|
||||
else if (n.includes('qwen3') && (n.includes('a10b') || n.includes('a22b') || n.includes('a3b'))) {
|
||||
opts.envVars.push('VLLM_USE_DEEP_GEMM=0', 'VLLM_USE_FLASHINFER_MOE_FP16=1');
|
||||
opts.flags.push('--enable-expert-parallel', '--reasoning-parser qwen3');
|
||||
opts.flags.push('--enable-expert-parallel');
|
||||
opts.tips.push('MoE optimizations: expert parallel');
|
||||
}
|
||||
// DeepSeek MoE
|
||||
@@ -250,6 +252,15 @@ function _detectModelOptimizations(modelName) {
|
||||
opts.flags.push('--enable-expert-parallel');
|
||||
opts.tips.push('MoE expert parallel for DeepSeek');
|
||||
}
|
||||
// Reasoning parser — applies independently of MoE detection. Without this
|
||||
// flag, models like MiniMax-M2.x, DeepSeek-R1, Qwen3 reasoning, GLM-4.x,
|
||||
// gpt-oss leak <think> blocks as plain text instead of separating them
|
||||
// into the reasoning_content channel.
|
||||
const _reasoningParser = _detectReasoningParser(modelName);
|
||||
if (_reasoningParser) {
|
||||
opts.flags.push(`--reasoning-parser ${_reasoningParser}`);
|
||||
opts.tips.push(`Reasoning parser (${_reasoningParser}): splits <think> tokens into a separate channel`);
|
||||
}
|
||||
// Speculative decoding — pick the right MTP method per model family.
|
||||
// opts.spec.{method,tokens} seed the UI dropdown/input; the actual flag is
|
||||
// assembled by the command builder so the user can edit before launching.
|
||||
@@ -273,6 +284,36 @@ function _detectModelOptimizations(modelName) {
|
||||
return opts;
|
||||
}
|
||||
|
||||
/** Detect the right vLLM --reasoning-parser based on model name.
|
||||
* Returns the parser slug (matches vLLM's official list) or null when the
|
||||
* model isn't a reasoning model. Without the right parser, thinking tokens
|
||||
* leak as plain text instead of being split into a separate channel.
|
||||
* Source: vllm/reasoning/__init__.py registered parsers.
|
||||
*/
|
||||
export function _detectReasoningParser(modelName) {
|
||||
const n = (modelName || '').toLowerCase();
|
||||
// MiniMax M2 / M2.5 / M2.7 — released with a dedicated parser. Catch M2
|
||||
// before plain "minimax" so M2.x doesn't fall through to a wrong parser.
|
||||
if (n.includes('minimax') && n.match(/\bm2(?:\.\d)?\b/)) return 'minimax_m2';
|
||||
// DeepSeek-R1 / V3-Thinking / V3.1-Thinking variants. Bare V3/V3.1 (non-
|
||||
// thinking) skip this — they're not reasoning models.
|
||||
if (n.includes('deepseek') && (n.includes('r1') || n.includes('thinking'))) return 'deepseek_r1';
|
||||
// Qwen3 / Qwen3.5 reasoning models. Qwen3-Coder + Qwen3-Instruct don't
|
||||
// emit <think> blocks, so skip the parser there.
|
||||
if (n.includes('qwen3') && !n.includes('coder') && !n.includes('instruct')) return 'qwen3';
|
||||
// GLM-4 / GLM-4.5 / GLM-4.6 with reasoning.
|
||||
if (n.includes('glm-4') || n.includes('glm-5')) return 'glm45';
|
||||
// OpenAI gpt-oss family.
|
||||
if (n.includes('gpt-oss')) return 'gpt_oss';
|
||||
// Hunyuan A13B reasoning.
|
||||
if (n.includes('hunyuan') && n.includes('a13b')) return 'hunyuan_a13b';
|
||||
// IBM Granite reasoning.
|
||||
if (n.includes('granite') && (n.includes('reason') || n.includes('think'))) return 'granite';
|
||||
// InternLM reasoning.
|
||||
if (n.includes('internlm')) return 'internlm';
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Detect the right vLLM tool-call-parser based on model name.
|
||||
* Qwen tool-call formats split by generation:
|
||||
* - Qwen3-Coder → qwen3_coder (XML <tool_call> with named params)
|
||||
@@ -1436,6 +1477,10 @@ function _wireTabEvents(body) {
|
||||
_modal.addEventListener('scroll', (e) => {
|
||||
const tgt = e.target;
|
||||
if (!tgt || typeof tgt.scrollTop !== 'number') return;
|
||||
// Ignore scrolls that originate INSIDE the Direct Download body
|
||||
// (e.g. the Trending models list) — those are local to the
|
||||
// section and shouldn't auto-fold the section that owns them.
|
||||
if (dlFoldBody.contains && (tgt === dlFoldBody || dlFoldBody.contains(tgt))) return;
|
||||
const y = tgt.scrollTop;
|
||||
const prev = _lastY.get(tgt) || 0;
|
||||
if (y > prev) _maybeFold();
|
||||
|
||||
Reference in New Issue
Block a user