CodeQL hardening for cookbook sync

This commit is contained in:
pewdiepie-archdaemon
2026-06-22 02:39:18 +00:00
parent 19dd82b8f6
commit fbdec22dcb
8 changed files with 166 additions and 50 deletions
+5 -3
View File
@@ -281,7 +281,8 @@ def setup_cookbook_routes() -> APIRouter:
fails KV-cache setup, and FlashInfer sampler JIT fails on this host's
system nvcc. Normalize server-side before writing the tmux runner.
"""
if not cmd or "vllm serve" not in cmd or not re.search(r"minimax.*m3", cmd, re.I):
cmd_lower = (cmd or "").lower()
if not cmd or "vllm serve" not in cmd_lower or "minimax" not in cmd_lower or "m3" not in cmd_lower:
return cmd
try:
parts = shlex.split(cmd)
@@ -2823,8 +2824,9 @@ def setup_cookbook_routes() -> APIRouter:
if resp.status_code != 200:
return {"ok": False, "files": [], "error": f"HF API HTTP {resp.status_code}"}
data = resp.json()
except Exception as e:
return {"ok": False, "files": [], "error": str(e)}
except Exception:
logger.exception("HF GGUF file scan failed for %s", repo)
return {"ok": False, "files": [], "error": "HF API request failed"}
files = [
str(s.get("rfilename") or "")
for s in data.get("siblings", [])
+12 -3
View File
@@ -1645,6 +1645,13 @@ def setup_email_routes():
return {"error": f"Attachment index {index} not found"}
from pathlib import Path as _Path
target_root = _Path(target_dir).resolve()
filepath = _Path(filepath).resolve()
try:
filepath.relative_to(target_root)
except ValueError:
logger.warning("Rejected attachment path outside extraction dir: %s", filepath)
return {"error": "Invalid attachment path"}
base = _Path(filepath).name
if base.startswith("."):
return {"error": "Invalid filename", "filename": base}
@@ -1727,8 +1734,9 @@ def setup_email_routes():
return f"# Attached email: {base}\n\n_(empty email attachment)_"
try:
attached_msg = email_mod.message_from_bytes(raw_bytes)
except Exception as e:
return f"# Attached email: {base}\n\nCould not parse this email attachment: {e}"
except Exception:
logger.exception("Failed to parse attached email %s", base)
return f"# Attached email: {base}\n\nCould not parse this email attachment."
attached_subject = _decode_header(attached_msg.get("Subject", "")) or base
attached_from = _decode_header(attached_msg.get("From", ""))
@@ -1809,7 +1817,8 @@ def setup_email_routes():
try:
content = _attached_email_markdown(filepath)
except Exception as e:
return {"error": f"Failed to read email attachment: {e}", "filename": base}
logger.exception("Failed to read email attachment %s", base)
return {"error": "Failed to read email attachment", "filename": base}
doc_id = _create_markdown_doc(content, "Imported attached email")
return {"doc_id": doc_id, "filename": filepath.name}
+17 -2
View File
@@ -332,8 +332,23 @@ def setup_hwfit_routes():
# "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct".
s = (s or "").lower().strip()
s = s.split("/")[-1] # drop org prefix
s = re.sub(r"[-_.]?gguf$", "", s) # drop trailing gguf marker
s = re.sub(r"[-_.](q\d[^/]*|iq\d[^/]*|fp8|bf16|f16|awq[^/]*|gptq[^/]*)$", "", s)
for suffix in ("-gguf", "_gguf", ".gguf", "gguf"):
if s.endswith(suffix):
s = s[: -len(suffix)]
break
cut_at = None
for idx, ch in enumerate(s):
if ch not in "-_." or idx + 1 >= len(s):
continue
suffix = s[idx + 1:]
if (
suffix in {"fp8", "bf16", "f16"}
or suffix.startswith(("awq", "gptq", "iq"))
or (suffix.startswith("q") and len(suffix) > 1 and suffix[1].isdigit())
):
cut_at = idx
if cut_at is not None:
s = s[:cut_at]
return s
m = catalog.get(model)
+15 -3
View File
@@ -582,6 +582,18 @@ def _safe_build_headers(api_key: Optional[str], base_url: str) -> dict:
return {"Authorization": f"Bearer {api_key}"} if api_key else {}
def _redact_url_for_log(url: str) -> str:
"""Return a URL safe for logs by removing userinfo and query/fragment."""
try:
parsed = urlparse(url or "")
host = parsed.hostname or ""
if parsed.port:
host = f"{host}:{parsed.port}"
return urlunparse((parsed.scheme, host, parsed.path, "", "", ""))
except Exception:
return "<endpoint>"
def _is_discovery_only_provider(provider: str) -> bool:
return provider == "chatgpt-subscription"
@@ -789,13 +801,13 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
return [m for m in models if _is_chat_model(m)]
except httpx.HTTPStatusError as e:
if e.response is not None and _is_loading_model_response(e.response):
logger.info(f"Endpoint still loading model at {url}")
logger.info("Endpoint still loading model at %s", _redact_url_for_log(url))
return []
if api_key:
status = e.response.status_code if e.response is not None else "unknown"
logger.warning(f"Failed to probe {url} with API key: HTTP {status}")
logger.warning("Failed to probe %s with API key: HTTP %s", _redact_url_for_log(url), status)
return []
logger.warning(f"Failed to probe {url}: {e}")
logger.warning("Failed to probe %s: %s", _redact_url_for_log(url), e)
except Exception as e:
if api_key:
logger.warning(f"Failed to probe {url} with API key: {e}")
+52 -16
View File
@@ -61,14 +61,11 @@ _XML_DIRECT_TOOL_RE = re.compile(
# <tool▁call▁begin>tool_name<tool▁sep>{...}<tool▁call▁end>
# These can leak as text through llama.cpp/Ollama-style endpoints when the
# engine does not return structured OpenAI tool_calls.
_STEPFUN_TOOL_CALL_RE = re.compile(
r"<tool▁call▁begin>\s*([A-Za-z_][\w.-]*)\s*<tool▁sep>\s*([\s\S]*?)\s*<tool▁call▁end>",
re.IGNORECASE,
)
_STEPFUN_TOOL_CALLS_WRAPPER_RE = re.compile(
r"</?tool▁calls▁(?:begin|end)>",
re.IGNORECASE,
)
_STEPFUN_CALL_BEGIN = "<tool▁call▁begin>"
_STEPFUN_CALL_SEP = "<tool▁sep>"
_STEPFUN_CALL_END = "<tool▁call▁end>"
_STEPFUN_CALLS_BEGIN = "<tool▁calls▁begin>"
_STEPFUN_CALLS_END = "<tool▁calls▁end>"
# Pattern 4: <tool_code> blocks (MiniMax-M2.5 style)
# {tool => 'tool_name', args => '<param>value</param>'}
@@ -508,13 +505,53 @@ def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
return function_call_to_tool_block(mapped, json.dumps(params))
def _parse_stepfun_tool_call(call_match) -> Optional[ToolBlock]:
def _iter_stepfun_tool_calls(text: str):
"""Yield StepFun native tool-call token bodies without regex backtracking."""
pos = 0
while True:
start = text.find(_STEPFUN_CALL_BEGIN, pos)
if start < 0:
return
name_start = start + len(_STEPFUN_CALL_BEGIN)
sep = text.find(_STEPFUN_CALL_SEP, name_start)
if sep < 0:
return
end = text.find(_STEPFUN_CALL_END, sep + len(_STEPFUN_CALL_SEP))
if end < 0:
return
raw_name = text[name_start:sep].strip()
body = text[sep + len(_STEPFUN_CALL_SEP):end].strip()
if raw_name and len(raw_name) <= 128:
yield raw_name, body
pos = end + len(_STEPFUN_CALL_END)
def _strip_stepfun_tool_markup(text: str) -> str:
"""Remove StepFun tool-call token blocks and wrappers using literal scans."""
out = []
pos = 0
while True:
start = text.find(_STEPFUN_CALL_BEGIN, pos)
if start < 0:
out.append(text[pos:])
break
end = text.find(_STEPFUN_CALL_END, start + len(_STEPFUN_CALL_BEGIN))
if end < 0:
out.append(text[pos:])
break
out.append(text[pos:start])
pos = end + len(_STEPFUN_CALL_END)
cleaned = "".join(out)
return cleaned.replace(_STEPFUN_CALLS_BEGIN, "").replace(_STEPFUN_CALLS_END, "")
def _parse_stepfun_tool_call(tool_name: str, body: str) -> Optional[ToolBlock]:
"""Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
tool_name = call_match.group(1).lower().replace("-", "_").replace(".", "_")
tool_name = tool_name.lower().replace("-", "_").replace(".", "_")
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = call_match.group(2).strip()
body = (body or "").strip()
if not body:
return None
try:
@@ -660,8 +697,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
# Pattern 3: XML-style <tool_call>/<invoke> blocks
if not blocks:
for step_call in _STEPFUN_TOOL_CALL_RE.finditer(text):
block = _parse_stepfun_tool_call(step_call)
for tool_name, body in _iter_stepfun_tool_calls(text):
block = _parse_stepfun_tool_call(tool_name, body)
if block:
blocks.append(block)
if blocks:
@@ -733,8 +770,7 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
text = _normalize_dsml(text)
cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text)
cleaned = _TOOL_CALL_RE.sub('', cleaned)
cleaned = _STEPFUN_TOOL_CALL_RE.sub('', cleaned)
cleaned = _STEPFUN_TOOL_CALLS_WRAPPER_RE.sub('', cleaned)
cleaned = _strip_stepfun_tool_markup(cleaned)
cleaned = _XML_TOOL_CALL_RE.sub('', cleaned)
cleaned = _XML_OPEN_TOOL_CALL_RE.sub('', cleaned)
cleaned = _TOOL_CODE_RE.sub('', cleaned)
@@ -744,6 +780,6 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
_, (start, end) = raw_web_json
cleaned = cleaned[:start] + cleaned[end:]
# Strip bare <invoke> blocks not wrapped in <tool_call>
cleaned = re.sub(r'<invoke\s+name=["\'].*?</invoke>', '', cleaned, flags=re.DOTALL | re.IGNORECASE)
cleaned = _XML_INVOKE_RE.sub('', cleaned)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
return cleaned.strip()
+33 -14
View File
@@ -619,7 +619,7 @@ async function _startQueuedDownload(task) {
if (t.sessionId === data.session_id) return false;
return !(key && t.type === 'download' && t.status === 'queued' && _downloadDedupeKey(t) === key);
});
if (!found) tasks.push(_stripTaskSecrets(launchedTask));
if (!found) tasks.push(_redactTaskForStorage(launchedTask));
_saveTasks(tasks);
_renderRunningTab();
_startBackgroundMonitor();
@@ -760,12 +760,22 @@ function _isTombstoned(id) {
return ts != null && (Date.now() - ts) <= _TOMBSTONE_TTL_MS;
}
function _stripTaskSecrets(task) {
function _redactStoredText(value) {
return String(value || '')
.replace(/hf_[A-Za-z0-9]{20,}/g, '[redacted-token]')
.replace(/((?:api[_-]?key|token|authorization|password|passwd|secret)\s*[=:]\s*)(["']?)[^\s"']+/gi, '$1$2[redacted]');
}
function _redactTaskForStorage(task) {
if (!task || typeof task !== 'object') return task;
const safe = { ...task };
if (typeof safe.output === 'string') safe.output = _redactStoredText(safe.output);
if (safe.payload && typeof safe.payload === 'object') {
safe.payload = { ...safe.payload };
delete safe.payload.hf_token;
delete safe.payload.hfToken;
if (typeof safe.payload._cmd === 'string') safe.payload._cmd = _redactStoredText(safe.payload._cmd);
if (typeof safe.payload.cmd === 'string') safe.payload.cmd = _redactStoredText(safe.payload.cmd);
}
return safe;
}
@@ -774,15 +784,14 @@ function _stripStateSecrets(state) {
const safe = { ...state };
if (safe.env && typeof safe.env === 'object') {
const { hfToken, ...env } = safe.env;
if (hfToken) env.hfToken = hfToken;
safe.env = env;
}
if (Array.isArray(safe.tasks)) safe.tasks = safe.tasks.map(_stripTaskSecrets);
if (Array.isArray(safe.tasks)) safe.tasks = safe.tasks.map(_redactTaskForStorage);
return safe;
}
export function _saveTasks(tasks) {
localStorage.setItem(TASKS_KEY, JSON.stringify((tasks || []).map(_stripTaskSecrets)));
localStorage.setItem(TASKS_KEY, JSON.stringify((tasks || []).map(_redactTaskForStorage)));
_syncToServer();
}
@@ -807,7 +816,7 @@ export function _addTask(sessionId, name, type, payload) {
return !(key && t.type === 'download' && t.status === 'queued' && _downloadDedupeKey(t) === key);
});
}
const task = _stripTaskSecrets({ id: sessionId, sessionId, name, type, status: 'running', output: '', ts: Date.now(), payload: payload || null, remoteHost, remoteServerKey, remoteServerName, sshPort, platform });
const task = _redactTaskForStorage({ id: sessionId, sessionId, name, type, status: 'running', output: '', ts: Date.now(), payload: payload || null, remoteHost, remoteServerKey, remoteServerName, sshPort, platform });
tasks.push(task);
_saveTasks(tasks);
// New action → collapse all other cards, leave only this one open.
@@ -1102,14 +1111,24 @@ function _presetEnvFields(task) {
};
}
function _redactPresetForStorage(preset) {
if (!preset || typeof preset !== 'object') return preset;
const safe = { ...preset };
if (typeof safe.cmd === 'string') safe.cmd = _redactStoredText(safe.cmd);
if (typeof safe.command === 'string') safe.command = _redactStoredText(safe.command);
delete safe.hf_token;
delete safe.hfToken;
return safe;
}
function _saveTaskAsPreset(task, label) {
const host = task.remoteHost || 'localhost';
const portMatch = task.payload?._cmd?.match(/--port\s+(\d+)/);
const port = portMatch ? portMatch[1] : '8000';
const presets = _loadPresets();
if (presets.some(p => p.cmd === task.payload._cmd)) return false;
presets.push({ name: task.name, model: task.payload.repo_id, backend: 'vllm', host, port, cmd: task.payload._cmd, remoteHost: task.remoteHost || '', label: label || task.name, ..._presetEnvFields(task) });
_savePresets(presets);
presets.push(_redactPresetForStorage({ name: task.name, model: task.payload.repo_id, backend: 'vllm', host, port, cmd: task.payload._cmd, remoteHost: task.remoteHost || '', label: label || task.name, ..._presetEnvFields(task) }));
_savePresets(presets.map(_redactPresetForStorage));
return true;
}
@@ -1152,7 +1171,7 @@ function _autoSaveWorkingConfig(task) {
const existing = presets.find(p => p.cmd === cmd);
if (existing) {
task._autoSaved = true;
if (!existing.confirmedWorking) { existing.confirmedWorking = true; _savePresets(presets); }
if (!existing.confirmedWorking) { existing.confirmedWorking = true; _savePresets(presets.map(_redactPresetForStorage)); }
return; // already saved → just confirm it, no duplicate, no toast
}
// Respect the per-model cap the manual save flow uses (max 5).
@@ -1160,13 +1179,13 @@ function _autoSaveWorkingConfig(task) {
const host = task.remoteHost || 'localhost';
const portMatch = cmd.match(/--port[=\s]+(\d+)/);
const port = portMatch ? portMatch[1] : '8000';
presets.push({
presets.push(_redactPresetForStorage({
name: task.name, model, backend: 'vllm', host, port,
cmd, remoteHost: task.remoteHost || '',
label: _autoConfigLabel(task), confirmedWorking: true, autoSaved: true,
..._presetEnvFields(task),
});
_savePresets(presets);
}));
_savePresets(presets.map(_redactPresetForStorage));
task._autoSaved = true;
uiModule.showToast('Saved working config');
}
@@ -1252,7 +1271,7 @@ export async function _syncFromServer() {
merged.push(t);
}
}
localStorage.setItem(TASKS_KEY, JSON.stringify(merged.map(_stripTaskSecrets)));
localStorage.setItem(TASKS_KEY, JSON.stringify(merged.map(_redactTaskForStorage)));
if (state.env) {
// The active server selection (remoteHost + its env/path/platform) is a
@@ -3711,7 +3730,7 @@ async function _pollBackgroundStatus() {
}
}
if (added > 0) {
localStorage.setItem(TASKS_KEY, JSON.stringify(merged.map(_stripTaskSecrets)));
localStorage.setItem(TASKS_KEY, JSON.stringify(merged.map(_redactTaskForStorage)));
_renderRunningTab();
}
}
+30 -7
View File
@@ -61,6 +61,28 @@ function _saveServeFavorites(favorites) {
} catch {}
}
function _redactStoredCommand(value) {
return String(value || '')
.replace(/hf_[A-Za-z0-9]{20,}/g, '[redacted-token]')
.replace(/((?:api[_-]?key|token|authorization|password|passwd|secret)\s*[=:]\s*)(["']?)[^\s"']+/gi, '$1$2[redacted]');
}
function _redactServeStateForStorage(value) {
if (!value || typeof value !== 'object') return value;
if (Array.isArray(value)) return value.map(_redactServeStateForStorage);
const safe = { ...value };
for (const key of Object.keys(safe)) {
if (/token|password|passwd|secret|api[_-]?key/i.test(key)) {
delete safe[key];
} else if (typeof safe[key] === 'string' && /cmd|command|args|env/i.test(key)) {
safe[key] = _redactStoredCommand(safe[key]);
} else if (safe[key] && typeof safe[key] === 'object') {
safe[key] = _redactServeStateForStorage(safe[key]);
}
}
return safe;
}
function _isServeFavorite(repo) {
return _loadServeFavorites().has(String(repo || ''));
}
@@ -2118,7 +2140,7 @@ function _rerenderCachedModels() {
if (el.type === 'checkbox') fields[el.dataset.field] = el.checked;
else fields[el.dataset.field] = el.value;
});
presets.push({ name: shortName, model: repo, cmd, remoteHost: host, port: fields.port || '8000', label, fields });
presets.push(_redactServeStateForStorage({ name: shortName, model: repo, cmd, remoteHost: host, port: fields.port || '8000', label, fields }));
_savePresets(presets);
uiModule.showToast(`Saved "${label}"`);
_updateSavedToggleLabel();
@@ -2209,7 +2231,7 @@ function _rerenderCachedModels() {
const target = _presetsForModel(cur, repo)[slotIdx];
if (target) {
target.favorite = !target.favorite;
_savePresets(cur);
_savePresets(cur.map(_redactServeStateForStorage));
uiModule.showToast(target.favorite ? 'Favorited — pinned to top' : 'Unfavorited');
_showSavedConfigMenu(anchor);
}
@@ -2223,7 +2245,7 @@ function _rerenderCachedModels() {
if (toRemove) {
const gi = cur.indexOf(toRemove);
if (gi >= 0) cur.splice(gi, 1);
_savePresets(cur);
_savePresets(cur.map(_redactServeStateForStorage));
}
uiModule.showToast(`Deleted "${label}"`);
_updateSavedToggleLabel();
@@ -3205,7 +3227,7 @@ function _rerenderCachedModels() {
const _saved = { ...serveState, _forceBackend: true };
delete _saved._replaceTaskId;
byRepo[repo] = _saved;
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _saved }));
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify(_redactServeStateForStorage({ _byRepo: byRepo, _lastUsed: _saved })));
} catch {}
const origEnv = _envState.env;
const origEnvPath = _envState.envPath;
@@ -3352,6 +3374,7 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
const host = _resolveCacheHost();
let cmd;
if (_isWindows()) {
const _psSingleQuote = (value) => `'${String(value || '').replace(/'/g, "''")}'`;
const winTarget = target.startsWith('~')
? target.replace(/^~/, '$env:USERPROFILE').replace(/\//g, '\\')
: target.replace(/\//g, '\\');
@@ -3361,9 +3384,9 @@ async function _deleteCachedModel(repo, itemEl, skipConfirm = false, model = nul
.filter(Boolean)
.map(rel => `${winTarget}\\${rel.replace(/\//g, '\\')}`);
if (!targets.length) return;
cmd = targets.map(p => `Remove-Item -Force "${p.replace(/"/g, '\\"')}" -ErrorAction SilentlyContinue`).join('; ');
cmd = targets.map(p => `Remove-Item -Force ${_psSingleQuote(p)} -ErrorAction SilentlyContinue`).join('; ');
} else {
cmd = `Remove-Item -Recurse -Force "${winTarget}" -ErrorAction SilentlyContinue`;
cmd = `Remove-Item -Recurse -Force ${_psSingleQuote(winTarget)} -ErrorAction SilentlyContinue`;
}
if (host) {
const pf = _sshPrefix(_getPort(host));
@@ -3490,7 +3513,7 @@ export async function openServePanelForRepo(repo, fields) {
// overridable defaults.
const _seeded = { ...fields, _forceBackend: true };
byRepo[repo] = _seeded;
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _seeded }));
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify(_redactServeStateForStorage({ _byRepo: byRepo, _lastUsed: _seeded })));
} catch {}
}
// Switch to the Serve tab (its click handler triggers _fetchCachedModels).
+2 -2
View File
@@ -1825,13 +1825,13 @@ function _renderNotes() {
const agentTitle = agentStatus === 'stream_complete'
? 'Agent stream finished for this todo'
: (agentStatus === 'running' ? 'Agent is working on this todo' : 'Solve this todo with the agent');
const agentSessionAttr = item.agent_session_id ? ` data-session-id="${_esc(item.agent_session_id)}"` : '';
const agentSessionAttr = item.agent_session_id ? ` data-session-id="${_attrEsc(item.agent_session_id)}"` : '';
const agentMenuTitle = item.agent_session_title || `Agent: ${(item.text || '').slice(0, 40)}`;
const indent = Math.min(item.indent || 0, 3);
contentHtml += `<div class="note-checkbox${doneClass}" data-note-id="${note.id}" data-idx="${i}" style="padding-left:${indent * 16}px">
<span class="note-check-dot" title="Mark done"></span>
<span class="note-check-text">${_linkify(item.text)}</span>
<button class="note-checkbox-agent${agentDoneClass}" data-note-id="${note.id}" data-idx="${i}"${agentSessionAttr} data-agent-title="${_attrEsc(agentMenuTitle)}" title="${agentTitle}">
<button class="note-checkbox-agent${agentDoneClass}" data-note-id="${_attrEsc(note.id)}" data-idx="${i}"${agentSessionAttr} data-agent-title="${_attrEsc(agentMenuTitle)}" title="${_attrEsc(agentTitle)}">
<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 8V4H8"/><rect x="4" y="8" width="16" height="12" rx="2"/><path d="M2 14h2M20 14h2M15 13v2M9 13v2"/></svg>
</button>
<button class="note-checkbox-rm" data-note-id="${note.id}" data-idx="${i}" title="Delete item">