mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
Cookbook UI: Ollama browser, advanced serve fold, API tokens form, diagnosis toolbar, polish
Surface a lot of accumulated cookbook + UI work as a single non-agent
commit so the agent rework lands cleanly.
Highlights:
- Ollama as a first-class backend in the Cookbook:
* Download input accepts ollama-style names (name:tag) → backend=ollama
* /api/cookbook/ollama/library (cached scrape of ollama.com + curated
fallback so classic models like qwen2.5 stay reachable)
* "Browse Ollama library" toggle below Download with size chips
* Engine=Ollama in hwfit toolbar merges the Ollama library into the
main scan list as per-tag rows with the same Fit/Param/Quant/VRAM
columns; click → fills Download input
- API Tokens form added to Integrations panel (matching wired
loadTokens()/initTokenForm() that had no HTML)
- Serve panel polish: Advanced fold tightening (-8px nudges on vLLM
checks, Extra args, Spec row), n_cpu_moe + Split Mode controls
pulled up 8px to align with the row's checkboxes, GGUF File dropdown
exposed for Ollama backend, GPU re-render on Edit serve restore,
_forceBackend flag so saved serveState wins over backend detection,
cookbook:servers-changed CustomEvent so panels don't need refresh
- Models page redesign: Add Models row (URL + hidden API key reveal +
Type select + Scan/Ollama/Key/Test/Add icon buttons), Probe All +
Clear-offline buttons in Added Models toolbar, offline-pill removed
(opacity already conveys state), Engine dropdown gains Ollama option
- _ping_endpoint probes /v1/models then base, accepts 4xx as
reachable (vLLM returns 404 on bare /v1, fully working endpoints
were showing offline)
- Diagnosis card: × dismiss + Copy bundle buttons restored on the
serve error feedback card
- Orphan tmux sweep re-enabled behind a 60s rate-limit + background
Thread (off the main event loop) so dead serves get discovered
- cookbook_routes auto-register watchdog: drops the endpoint if the
serve session exits non-zero within the first ~3min
- ollama-rocm sidecar awareness in download wrapper (`docker exec
ollama-rocm ollama pull` when host ollama isn't installed)
- Skill extractor sets initial_status="published" when
auto_approve_skills pref is on (audit demotes later)
- Skill list / model list / cookbook scan misc polish
This commit is contained in:
@@ -650,6 +650,10 @@ app.include_router(calendar_router)
|
||||
from routes.shell_routes import setup_shell_routes
|
||||
app.include_router(setup_shell_routes())
|
||||
|
||||
# Terminal agents (tmux-backed Codex/Claude/shell sessions)
|
||||
from routes.terminal_agent_routes import setup_terminal_agent_routes
|
||||
app.include_router(setup_terminal_agent_routes())
|
||||
|
||||
# Cookbook (model download/serve/cache, cookbook state sync)
|
||||
from routes.cookbook_routes import setup_cookbook_routes
|
||||
app.include_router(setup_cookbook_routes())
|
||||
|
||||
+532
-1
@@ -22,6 +22,7 @@ import os
|
||||
import os.path
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import uuid
|
||||
|
||||
from mcp.server import Server
|
||||
from mcp.server.stdio import stdio_server
|
||||
@@ -67,6 +68,59 @@ def _db_path() -> Path:
|
||||
return Path(APP_DB)
|
||||
|
||||
|
||||
def _load_email_writing_style() -> str:
|
||||
"""Return the existing Settings > Email > Writing Style value."""
|
||||
try:
|
||||
settings_path = DATA_DIR / "settings.json"
|
||||
if not settings_path.exists():
|
||||
return ""
|
||||
settings = json.loads(settings_path.read_text(encoding="utf-8"))
|
||||
return str(settings.get("email_writing_style") or "").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _writing_style_guidance() -> str:
|
||||
style = _load_email_writing_style()
|
||||
if not style:
|
||||
return (
|
||||
"No saved writing style is configured in Settings > Email > Writing Style. "
|
||||
"Use a concise, natural tone and do not invent facts."
|
||||
)
|
||||
return (
|
||||
"Use this saved writing style from Settings > Email > Writing Style when "
|
||||
"drafting the body. It overrides generic tone guidance:\n"
|
||||
f"{style}"
|
||||
)
|
||||
|
||||
|
||||
def _default_document_owner() -> str | None:
|
||||
"""Best-effort owner for MCP-created documents.
|
||||
|
||||
MCP stdio tools do not receive the browser request's authenticated user,
|
||||
but the document library is owner-filtered. Stamp drafts to the configured
|
||||
single/default admin so assistant-created email drafts are visible.
|
||||
"""
|
||||
owner = os.environ.get("ODYSSEUS_DOCUMENT_OWNER", "").strip()
|
||||
if owner:
|
||||
return owner
|
||||
try:
|
||||
auth_path = DATA_DIR / "auth.json"
|
||||
if not auth_path.exists():
|
||||
return None
|
||||
users = (json.loads(auth_path.read_text(encoding="utf-8")).get("users") or {})
|
||||
if not isinstance(users, dict) or not users:
|
||||
return None
|
||||
admins = [name for name, data in users.items() if isinstance(data, dict) and data.get("is_admin")]
|
||||
if len(admins) == 1:
|
||||
return admins[0]
|
||||
if len(users) == 1:
|
||||
return next(iter(users))
|
||||
return admins[0] if admins else next(iter(users))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _list_accounts_raw() -> list:
|
||||
"""Return list of dicts from the email_accounts table. Empty list if table
|
||||
missing or empty. Never raises."""
|
||||
@@ -896,6 +950,340 @@ def _send_email(to, subject, body, in_reply_to=None, references=None, cc=None, b
|
||||
}
|
||||
|
||||
|
||||
def _build_email_document_content(
|
||||
to,
|
||||
subject,
|
||||
body,
|
||||
*,
|
||||
cc=None,
|
||||
bcc=None,
|
||||
in_reply_to=None,
|
||||
references=None,
|
||||
source_uid=None,
|
||||
source_folder=None,
|
||||
):
|
||||
header_lines = [f"To: {to or ''}"]
|
||||
if cc:
|
||||
header_lines.append(f"Cc: {cc}")
|
||||
if bcc:
|
||||
header_lines.append(f"Bcc: {bcc}")
|
||||
header_lines.append(f"Subject: {subject or ''}")
|
||||
if in_reply_to:
|
||||
header_lines.append(f"In-Reply-To: {in_reply_to}")
|
||||
if references:
|
||||
header_lines.append(f"References: {references}")
|
||||
if source_uid:
|
||||
header_lines.append(f"X-Source-UID: {source_uid}")
|
||||
if source_folder:
|
||||
header_lines.append(f"X-Source-Folder: {source_folder}")
|
||||
return "\n".join(header_lines) + "\n---\n" + (body or "")
|
||||
|
||||
|
||||
def _merge_email_reply_body(existing_content: str, reply_body: str) -> str:
|
||||
"""Preserve email headers and quoted chain while replacing the editable reply body."""
|
||||
if "\n---\n" not in (existing_content or ""):
|
||||
return reply_body or ""
|
||||
head, body = existing_content.split("\n---\n", 1)
|
||||
quote_markers = (
|
||||
"---------- Previous message ----------",
|
||||
"-----Original Message-----",
|
||||
"----- Original Message -----",
|
||||
)
|
||||
quote_index = -1
|
||||
for marker in quote_markers:
|
||||
idx = body.find(marker)
|
||||
if idx != -1 and (quote_index == -1 or idx < quote_index):
|
||||
quote_index = idx
|
||||
quote = body[quote_index:].strip() if quote_index != -1 else ""
|
||||
merged_body = (reply_body or "").strip()
|
||||
if quote:
|
||||
merged_body = f"{merged_body}\n\n{quote}" if merged_body else quote
|
||||
return f"{head}\n---\n{merged_body}"
|
||||
|
||||
|
||||
def _create_email_draft_document(
|
||||
*,
|
||||
to,
|
||||
subject,
|
||||
body,
|
||||
title=None,
|
||||
cc=None,
|
||||
bcc=None,
|
||||
in_reply_to=None,
|
||||
references=None,
|
||||
source_uid=None,
|
||||
source_folder=None,
|
||||
account=None,
|
||||
source_message_id=None,
|
||||
):
|
||||
"""Create an Odysseus email compose document for user review. Does not send."""
|
||||
from core.database import SessionLocal, Document, DocumentVersion
|
||||
try:
|
||||
from src.event_bus import fire_event
|
||||
except Exception:
|
||||
fire_event = None
|
||||
|
||||
cfg = _load_config(account) if account else _load_config(None)
|
||||
content = _build_email_document_content(
|
||||
to,
|
||||
subject,
|
||||
body,
|
||||
cc=cc,
|
||||
bcc=bcc,
|
||||
in_reply_to=in_reply_to,
|
||||
references=references,
|
||||
source_uid=source_uid,
|
||||
source_folder=source_folder,
|
||||
)
|
||||
doc_id = str(uuid.uuid4())
|
||||
ver_id = str(uuid.uuid4())
|
||||
doc_title = (title or subject or "Email draft").strip() or "Email draft"
|
||||
doc_owner = _default_document_owner()
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
if source_uid and source_folder:
|
||||
existing = (
|
||||
db.query(Document)
|
||||
.filter(Document.is_active == True)
|
||||
.filter(Document.language == "email")
|
||||
.filter(Document.owner == doc_owner)
|
||||
.filter(Document.source_email_uid == str(source_uid))
|
||||
.filter(Document.source_email_folder == source_folder)
|
||||
.order_by(Document.updated_at.desc())
|
||||
.first()
|
||||
)
|
||||
if existing and "\n---\n" in (existing.current_content or ""):
|
||||
existing.current_content = _merge_email_reply_body(existing.current_content, body or "")
|
||||
existing.version_count = (existing.version_count or 0) + 1
|
||||
ver = DocumentVersion(
|
||||
id=ver_id,
|
||||
document_id=existing.id,
|
||||
version_number=existing.version_count,
|
||||
content=existing.current_content,
|
||||
summary="Updated by email MCP draft tool",
|
||||
source="ai",
|
||||
)
|
||||
db.add(ver)
|
||||
db.commit()
|
||||
if fire_event:
|
||||
try:
|
||||
fire_event("document_updated", doc_owner)
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"draft": True,
|
||||
"updated": True,
|
||||
"doc_id": existing.id,
|
||||
"title": existing.title,
|
||||
"language": existing.language,
|
||||
"account": cfg.get("account_name"),
|
||||
"account_id": cfg.get("account_id"),
|
||||
"to": to,
|
||||
"subject": subject,
|
||||
}
|
||||
|
||||
doc = Document(
|
||||
id=doc_id,
|
||||
session_id=None,
|
||||
title=doc_title,
|
||||
language="email",
|
||||
current_content=content,
|
||||
version_count=1,
|
||||
is_active=True,
|
||||
owner=doc_owner,
|
||||
source_email_uid=source_uid,
|
||||
source_email_folder=source_folder,
|
||||
source_email_account_id=cfg.get("account_id"),
|
||||
source_email_message_id=source_message_id,
|
||||
)
|
||||
ver = DocumentVersion(
|
||||
id=ver_id,
|
||||
document_id=doc_id,
|
||||
version_number=1,
|
||||
content=content,
|
||||
summary="Created by email MCP draft tool",
|
||||
source="ai",
|
||||
)
|
||||
db.add(doc)
|
||||
db.add(ver)
|
||||
db.commit()
|
||||
if fire_event:
|
||||
try:
|
||||
fire_event("document_created", doc_owner)
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"draft": True,
|
||||
"doc_id": doc_id,
|
||||
"title": doc_title,
|
||||
"language": "email",
|
||||
"account": cfg.get("account_name"),
|
||||
"account_id": cfg.get("account_id"),
|
||||
"to": to,
|
||||
"subject": subject,
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _draft_reply_to_email(uid, body, folder="INBOX", reply_all=False, account=None, title=None):
|
||||
"""Create a threaded Odysseus reply draft document. Does not send."""
|
||||
conn = _imap_connect(account)
|
||||
conn.select(folder, readonly=True)
|
||||
status, msg_data = conn.uid("FETCH", _b(uid), "(RFC822)")
|
||||
conn.logout()
|
||||
if status != "OK" or not msg_data or not msg_data[0]:
|
||||
return {"error": f"Failed to fetch email UID {uid}"}
|
||||
raw = msg_data[0][1]
|
||||
orig = email.message_from_bytes(raw)
|
||||
|
||||
orig_subject = _decode_header(orig.get("Subject", ""))
|
||||
reply_subject = orig_subject if orig_subject.lower().startswith("re:") else f"Re: {orig_subject}"
|
||||
orig_message_id = orig.get("Message-ID", "")
|
||||
orig_references = orig.get("References", "")
|
||||
new_references = (orig_references + " " + orig_message_id).strip() if orig_references else orig_message_id
|
||||
|
||||
sender = _decode_header(orig.get("From", ""))
|
||||
_, sender_addr = email.utils.parseaddr(sender)
|
||||
to_addrs = sender_addr
|
||||
|
||||
cc = None
|
||||
if reply_all:
|
||||
cc_addrs = []
|
||||
cfg = _load_config(account)
|
||||
own_addrs = {
|
||||
(cfg.get("imap_user") or "").strip().lower(),
|
||||
(cfg.get("from_address") or "").strip().lower(),
|
||||
}
|
||||
for header_name in ("To", "Cc"):
|
||||
for _, addr in email.utils.getaddresses([orig.get(header_name, "")]):
|
||||
addr_l = (addr or "").strip().lower()
|
||||
if addr and addr != sender_addr and addr_l not in own_addrs:
|
||||
cc_addrs.append(addr)
|
||||
if cc_addrs:
|
||||
cc = ", ".join(dict.fromkeys(cc_addrs))
|
||||
|
||||
return _create_email_draft_document(
|
||||
to=to_addrs,
|
||||
subject=reply_subject,
|
||||
body=body,
|
||||
title=title or reply_subject,
|
||||
cc=cc,
|
||||
in_reply_to=orig_message_id,
|
||||
references=new_references,
|
||||
source_uid=uid,
|
||||
source_folder=folder,
|
||||
account=account,
|
||||
source_message_id=orig_message_id,
|
||||
)
|
||||
|
||||
|
||||
async def _ai_draft_reply_to_email(uid, folder="INBOX", reply_all=False, account=None, title=None):
|
||||
"""Generate a reply with Odysseus' AI-reply prompt/style, then create a compose doc."""
|
||||
read_result = _read_email(uid=uid, folder=folder, account=account)
|
||||
if "error" in read_result:
|
||||
return read_result
|
||||
|
||||
to_addr = read_result.get("from_address") or email.utils.parseaddr(read_result.get("from") or "")[1]
|
||||
subject = read_result.get("subject") or ""
|
||||
reply_subject = subject if subject.lower().startswith("re:") else f"Re: {subject}"
|
||||
original_body = read_result.get("body") or ""
|
||||
message_id = read_result.get("message_id") or ""
|
||||
|
||||
if not original_body.strip():
|
||||
return {"error": "No email body available for AI reply"}
|
||||
|
||||
try:
|
||||
from routes.email_helpers import (
|
||||
_EMAIL_REPLY_SYS_PROMPT_BASE,
|
||||
_apply_email_style_mechanics,
|
||||
_extract_reply,
|
||||
_load_settings,
|
||||
)
|
||||
from src.endpoint_resolver import (
|
||||
resolve_endpoint,
|
||||
resolve_utility_fallback_candidates,
|
||||
resolve_chat_fallback_candidates,
|
||||
)
|
||||
from src.llm_core import llm_call_async_with_fallback
|
||||
except Exception as exc:
|
||||
return {"error": f"AI reply helpers unavailable: {exc}"}
|
||||
|
||||
settings = _load_settings()
|
||||
style = settings.get("email_writing_style", "")
|
||||
system_prompt = _EMAIL_REPLY_SYS_PROMPT_BASE
|
||||
if style:
|
||||
system_prompt += f"\n\nWRITING STYLE TO MATCH:\n{style}"
|
||||
|
||||
user_msg = (
|
||||
f"Recipient: {to_addr}\nSubject: {reply_subject}\n\n"
|
||||
f"Original email and any current draft:\n{original_body[:6000]}\n\n"
|
||||
"Draft a reply. Return only the reply body text."
|
||||
)
|
||||
|
||||
candidates = []
|
||||
seen = set()
|
||||
|
||||
def _add(url, model, headers):
|
||||
key = (url or "", model or "")
|
||||
if not url or not model or key in seen:
|
||||
return
|
||||
seen.add(key)
|
||||
candidates.append((url, model, headers))
|
||||
|
||||
try:
|
||||
_add(*resolve_endpoint("utility", owner=None))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
_add(*resolve_endpoint("default", owner=None))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
utility_fallbacks = resolve_utility_fallback_candidates(owner=None) or []
|
||||
except TypeError:
|
||||
utility_fallbacks = resolve_utility_fallback_candidates() or []
|
||||
for cand in utility_fallbacks:
|
||||
_add(*cand)
|
||||
try:
|
||||
chat_fallbacks = resolve_chat_fallback_candidates(owner=None) or []
|
||||
except TypeError:
|
||||
chat_fallbacks = resolve_chat_fallback_candidates() or []
|
||||
for cand in chat_fallbacks:
|
||||
_add(*cand)
|
||||
|
||||
if not candidates:
|
||||
return {"error": "No LLM endpoint configured for AI reply"}
|
||||
|
||||
try:
|
||||
raw_reply = await llm_call_async_with_fallback(
|
||||
candidates,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_msg},
|
||||
],
|
||||
temperature=0.7,
|
||||
max_tokens=1024,
|
||||
timeout=60,
|
||||
)
|
||||
except Exception as exc:
|
||||
return {"error": f"AI reply generation failed: {exc}"}
|
||||
|
||||
reply = _apply_email_style_mechanics(_extract_reply(raw_reply or ""))
|
||||
if not reply:
|
||||
return {"error": "AI reply generation returned an empty response"}
|
||||
|
||||
return _draft_reply_to_email(
|
||||
uid=uid,
|
||||
body=reply,
|
||||
folder=folder,
|
||||
reply_all=reply_all,
|
||||
account=account,
|
||||
title=title or reply_subject,
|
||||
)
|
||||
|
||||
|
||||
def _reply_to_email(uid, body, folder="INBOX", reply_all=False, account=None):
|
||||
"""Reply to an existing email by UID. Threads via In-Reply-To/References."""
|
||||
conn = None
|
||||
@@ -1189,6 +1577,8 @@ async def list_tools() -> list[Tool]:
|
||||
name="send_email",
|
||||
description=(
|
||||
"Send a new email via SMTP. Provide recipient(s), subject, and body. "
|
||||
"This sends immediately; for normal assistant-written email, prefer "
|
||||
"draft_email so the user can review and send from Odysseus. "
|
||||
"For replying to an existing thread, use reply_to_email instead. "
|
||||
"Pass `account` to send from a non-default mailbox."
|
||||
),
|
||||
@@ -1205,10 +1595,35 @@ async def list_tools() -> list[Tool]:
|
||||
"required": ["to", "subject", "body"],
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="draft_email",
|
||||
description=(
|
||||
"Create a new Odysseus email compose draft document. This DOES NOT send. "
|
||||
"Use this as the default way to write an email for the user: it opens "
|
||||
"a reviewable email document with To/Cc/Bcc/Subject/body, and the user "
|
||||
"can edit or press Send in Odysseus. "
|
||||
f"{_writing_style_guidance()}"
|
||||
),
|
||||
inputSchema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"to": {"type": "string", "description": "Recipient email address(es), comma-separated"},
|
||||
"subject": {"type": "string", "description": "Email subject line"},
|
||||
"body": {"type": "string", "description": "Draft body"},
|
||||
"cc": {"type": "string", "description": "CC address(es), comma-separated (optional)"},
|
||||
"bcc": {"type": "string", "description": "BCC address(es), comma-separated (optional)"},
|
||||
"title": {"type": "string", "description": "Optional Odysseus document title"},
|
||||
**ACCOUNT_PROP,
|
||||
},
|
||||
"required": ["to", "subject", "body"],
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="reply_to_email",
|
||||
description=(
|
||||
"Reply to an existing email by UID. Automatically threads the reply with "
|
||||
"Reply to an existing email by UID. This sends immediately; for normal "
|
||||
"assistant-written replies, prefer draft_email_reply so the user can "
|
||||
"review and send from Odysseus. Automatically threads the reply with "
|
||||
"In-Reply-To and References headers, prefixes 'Re:' on the subject, and "
|
||||
"uses the original sender as the recipient. Set reply_all=true to also CC "
|
||||
"the original To/Cc recipients. For follow-up 'reply ...' requests, use "
|
||||
@@ -1226,6 +1641,49 @@ async def list_tools() -> list[Tool]:
|
||||
"required": ["uid", "body"],
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="draft_email_reply",
|
||||
description=(
|
||||
"Create an Odysseus email reply draft document for an existing email UID. "
|
||||
"This DOES NOT send. It threads the draft with In-Reply-To/References, "
|
||||
"prefills the recipient and subject, and stores source email metadata so "
|
||||
"the user can review and send from the normal email composer. "
|
||||
f"{_writing_style_guidance()}"
|
||||
),
|
||||
inputSchema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"uid": {"type": "string", "description": "Exact Email UID from list_emails/read_email; never invent UID 1"},
|
||||
"body": {"type": "string", "description": "Draft reply body text"},
|
||||
"folder": {"type": "string", "description": "IMAP folder (default: INBOX)", "default": "INBOX"},
|
||||
"reply_all": {"type": "boolean", "description": "Reply to all recipients (default: false)", "default": False},
|
||||
"title": {"type": "string", "description": "Optional Odysseus document title"},
|
||||
**ACCOUNT_PROP,
|
||||
},
|
||||
"required": ["uid", "body"],
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="ai_draft_email_reply",
|
||||
description=(
|
||||
"Generate an AI reply using Odysseus' existing AI Reply behavior, "
|
||||
"including Settings > Email > Writing Style, then create an email "
|
||||
"compose document for review. This DOES NOT send and does NOT save "
|
||||
"to the mailbox Drafts folder. Use this when the user asks you to "
|
||||
"write or draft a reply to an email without dictating the exact body."
|
||||
),
|
||||
inputSchema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"uid": {"type": "string", "description": "Exact Email UID from list_emails/read_email; never invent UID 1"},
|
||||
"folder": {"type": "string", "description": "IMAP folder (default: INBOX)", "default": "INBOX"},
|
||||
"reply_all": {"type": "boolean", "description": "Reply to all recipients (default: false)", "default": False},
|
||||
"title": {"type": "string", "description": "Optional Odysseus document title"},
|
||||
**ACCOUNT_PROP,
|
||||
},
|
||||
"required": ["uid"],
|
||||
},
|
||||
),
|
||||
Tool(
|
||||
name="archive_email",
|
||||
description="Move an email out of the inbox into the Archive folder. Use after handling an email you want to keep but no longer need in the inbox.",
|
||||
@@ -1552,6 +2010,31 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||
acct_note = f" (from {result['account']})" if result.get("account") else ""
|
||||
return [TextContent(type="text", text=f"Sent email to {result['to']} with subject '{result['subject']}'{acct_note}.")]
|
||||
|
||||
elif name == "draft_email":
|
||||
to = arguments.get("to")
|
||||
subject = arguments.get("subject")
|
||||
body = arguments.get("body")
|
||||
if not to or not subject or body is None:
|
||||
return [TextContent(type="text", text="Error: to, subject, and body are required")]
|
||||
result = _create_email_draft_document(
|
||||
to=to,
|
||||
subject=subject,
|
||||
body=body,
|
||||
title=arguments.get("title"),
|
||||
cc=arguments.get("cc"),
|
||||
bcc=arguments.get("bcc"),
|
||||
account=acct,
|
||||
)
|
||||
acct_note = f" from {result['account']}" if result.get("account") else ""
|
||||
return [TextContent(
|
||||
type="text",
|
||||
text=(
|
||||
f"Created Odysseus email draft `{result['title']}` "
|
||||
f"(document ID: {result['doc_id']}){acct_note}. "
|
||||
"It has not been sent; open the document in Odysseus to review and send."
|
||||
),
|
||||
)]
|
||||
|
||||
elif name == "reply_to_email":
|
||||
uid = arguments.get("uid")
|
||||
body = arguments.get("body")
|
||||
@@ -1573,6 +2056,54 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||
pass
|
||||
return [TextContent(type="text", text=f"Replied to UID {uid}: '{result['subject']}' → {result['to']}")]
|
||||
|
||||
elif name == "draft_email_reply":
|
||||
uid = arguments.get("uid")
|
||||
body = arguments.get("body")
|
||||
if not uid or body is None:
|
||||
return [TextContent(type="text", text="Error: uid and body are required")]
|
||||
result = _draft_reply_to_email(
|
||||
uid=uid,
|
||||
body=body,
|
||||
folder=arguments.get("folder", "INBOX"),
|
||||
reply_all=bool(arguments.get("reply_all", False)),
|
||||
account=acct,
|
||||
title=arguments.get("title"),
|
||||
)
|
||||
if "error" in result:
|
||||
return [TextContent(type="text", text=f"Error: {result['error']}")]
|
||||
acct_note = f" from {result['account']}" if result.get("account") else ""
|
||||
return [TextContent(
|
||||
type="text",
|
||||
text=(
|
||||
f"Created Odysseus reply draft `{result['title']}` for UID {uid} "
|
||||
f"(document ID: {result['doc_id']}){acct_note}. "
|
||||
"It has not been sent; open the document in Odysseus to review and send."
|
||||
),
|
||||
)]
|
||||
|
||||
elif name == "ai_draft_email_reply":
|
||||
uid = arguments.get("uid")
|
||||
if not uid:
|
||||
return [TextContent(type="text", text="Error: uid is required")]
|
||||
result = await _ai_draft_reply_to_email(
|
||||
uid=uid,
|
||||
folder=arguments.get("folder", "INBOX"),
|
||||
reply_all=bool(arguments.get("reply_all", False)),
|
||||
account=acct,
|
||||
title=arguments.get("title"),
|
||||
)
|
||||
if "error" in result:
|
||||
return [TextContent(type="text", text=f"Error: {result['error']}")]
|
||||
acct_note = f" from {result['account']}" if result.get("account") else ""
|
||||
return [TextContent(
|
||||
type="text",
|
||||
text=(
|
||||
f"Generated AI reply and created Odysseus compose draft "
|
||||
f"`{result['title']}` for UID {uid} (document ID: {result['doc_id']}){acct_note}. "
|
||||
"It has not been sent; open the document in Odysseus to review and send."
|
||||
),
|
||||
)]
|
||||
|
||||
elif name == "archive_email":
|
||||
uid = arguments.get("uid")
|
||||
if not uid:
|
||||
|
||||
@@ -25,6 +25,8 @@ ALLOWED_SCOPES = {
|
||||
"calendar:write",
|
||||
"memory:read",
|
||||
"memory:write",
|
||||
"cookbook:read",
|
||||
"cookbook:launch",
|
||||
}
|
||||
TOKEN_PROFILES = {
|
||||
"chat": ["chat"],
|
||||
|
||||
@@ -30,8 +30,9 @@ _LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
|
||||
_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
|
||||
# Include pattern is a glob: allow typical safe glyphs only.
|
||||
_INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
|
||||
# Remote host: user@host (optionally with :port-free hostname parts).
|
||||
_REMOTE_HOST_RE = re.compile(r"^[A-Za-z0-9._-]+@[A-Za-z0-9._-]+$")
|
||||
# Remote host: either `user@host` or plain `host` (alias is allowed), where host
|
||||
# is a safe DNS-like token or a short SSH config alias.
|
||||
_REMOTE_HOST_RE = re.compile(r"^(?:[A-Za-z0-9._-]+@)?[A-Za-z0-9._-]+$")
|
||||
# HF tokens and API tokens are url-safe base64-like.
|
||||
_TOKEN_RE = re.compile(r"^[A-Za-z0-9._~+/=-]+$")
|
||||
# Session IDs we mint look like "cookbook-deadbeef" or "serve-deadbeef".
|
||||
@@ -81,7 +82,7 @@ def _validate_remote_host(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
if not _REMOTE_HOST_RE.match(v):
|
||||
raise HTTPException(400, "Invalid remote_host — must be user@host, no SSH option syntax")
|
||||
raise HTTPException(400, "Invalid remote_host — must be host or user@host, no SSH option syntax")
|
||||
return v
|
||||
|
||||
|
||||
@@ -787,6 +788,7 @@ def _llama_cpp_rebuild_cmd() -> str:
|
||||
|
||||
class ModelDownloadRequest(BaseModel):
|
||||
repo_id: str
|
||||
backend: str | None = None # "hf" (default) or "ollama"
|
||||
include: str | None = None # glob pattern e.g. "*Q4_K_M*"
|
||||
hf_token: str | None = None
|
||||
env_prefix: str | None = None # e.g. "source ~/venv/bin/activate"
|
||||
|
||||
+834
-323
File diff suppressed because it is too large
Load Diff
+18
-1
@@ -196,7 +196,24 @@ def setup_hwfit_routes():
|
||||
if target_context is not None:
|
||||
target_context = max(1024, min(target_context, 1000000))
|
||||
|
||||
results = rank_models(system, use_case=use_case or None, limit=limit, search=search or None, sort=sort, quant=quant or None, target_context=target_context, fit_only=fit_only)
|
||||
rank_kwargs = {
|
||||
"use_case": use_case or None,
|
||||
"limit": limit,
|
||||
"search": search or None,
|
||||
"sort": sort,
|
||||
"quant": quant or None,
|
||||
"fit_only": fit_only,
|
||||
}
|
||||
if target_context is not None:
|
||||
rank_kwargs["target_context"] = target_context
|
||||
try:
|
||||
import inspect
|
||||
supported = set(inspect.signature(rank_models).parameters)
|
||||
rank_kwargs = {k: v for k, v in rank_kwargs.items() if k in supported}
|
||||
except Exception:
|
||||
rank_kwargs.pop("target_context", None)
|
||||
rank_kwargs.pop("fit_only", None)
|
||||
results = rank_models(system, **rank_kwargs)
|
||||
return {"system": system, "models": results}
|
||||
|
||||
@router.get("/profiles")
|
||||
|
||||
+87
-189
@@ -5,7 +5,6 @@ import re
|
||||
import uuid
|
||||
import json
|
||||
import socket
|
||||
import hashlib
|
||||
import time as _time
|
||||
import logging
|
||||
import httpx
|
||||
@@ -283,11 +282,8 @@ _HOST_TO_CURATED = (
|
||||
("fireworks.ai", "fireworks"),
|
||||
("googleapis.com", "google"),
|
||||
("x.ai", "xai"),
|
||||
|
||||
("openrouter.ai", "openrouter"),
|
||||
("ollama.com", "ollama"),
|
||||
("opencode.ai/zen/go", "opencode-go"),
|
||||
("opencode.ai/zen", "opencode-zen"),
|
||||
)
|
||||
|
||||
|
||||
@@ -494,8 +490,6 @@ _NON_CHAT_EXACT_PREFIXES = (
|
||||
def _is_chat_model(model_id: str) -> bool:
|
||||
"""Return True if the model ID looks like a chat/completions-capable model."""
|
||||
mid = model_id.lower()
|
||||
if mid in {"gpt-5.1-codex"}:
|
||||
return True
|
||||
for prefix in _NON_CHAT_PREFIXES:
|
||||
if mid.startswith(prefix):
|
||||
return False
|
||||
@@ -508,67 +502,9 @@ def _is_chat_model(model_id: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _delete_orphaned_provider_auth(db, auth_id: Optional[str], exclude_ep_id: Optional[str] = None) -> bool:
|
||||
"""Delete a ProviderAuthSession once no endpoint still references it.
|
||||
|
||||
Subscription providers (e.g. ChatGPT Subscription) keep their refresh token
|
||||
in ProviderAuthSession rather than ModelEndpoint.api_key. When the last
|
||||
endpoint backed by that auth row is removed, the stored credentials should
|
||||
be cleared instead of lingering. Returns True if a row was deleted.
|
||||
``exclude_ep_id`` drops the endpoint currently being deleted from the
|
||||
reference count so it does not keep its own auth alive.
|
||||
"""
|
||||
if not auth_id:
|
||||
return False
|
||||
from core.database import ProviderAuthSession
|
||||
still_referenced = db.query(ModelEndpoint.id).filter(
|
||||
ModelEndpoint.provider_auth_id == auth_id,
|
||||
ModelEndpoint.id != exclude_ep_id,
|
||||
).first()
|
||||
if still_referenced is not None:
|
||||
return False
|
||||
auth_row = db.query(ProviderAuthSession).filter(ProviderAuthSession.id == auth_id).first()
|
||||
if auth_row is None:
|
||||
return False
|
||||
db.delete(auth_row)
|
||||
return True
|
||||
|
||||
|
||||
def _is_discovery_only_provider(provider: str) -> bool:
|
||||
"""Provider that only supports model discovery, not live probing.
|
||||
|
||||
ChatGPT Subscription speaks the Responses/Codex API and has no
|
||||
chat-completions or general health endpoint, so completion probes and
|
||||
reachability pings are skipped — status is derived from cached models.
|
||||
"""
|
||||
return provider == "chatgpt-subscription"
|
||||
|
||||
|
||||
def _resolve_probe_key(ep) -> Optional[str]:
|
||||
"""API key/bearer to probe an endpoint with.
|
||||
|
||||
Delegates to ``resolve_endpoint_runtime``, which already returns the static
|
||||
``ModelEndpoint.api_key`` for keyed endpoints and resolves (and refreshes)
|
||||
the runtime bearer for session-backed providers (e.g. ChatGPT Subscription).
|
||||
Returns None if resolution fails (e.g. re-auth required) so probing skips
|
||||
rather than raising. Reads only already-loaded scalar attributes of ``ep``.
|
||||
"""
|
||||
try:
|
||||
from src.endpoint_resolver import resolve_endpoint_runtime
|
||||
_base, key = resolve_endpoint_runtime(ep, owner=getattr(ep, "owner", None))
|
||||
return key
|
||||
except Exception as e:
|
||||
logger.warning("Probe key resolution failed for %s: %s", getattr(ep, "id", "?"), e)
|
||||
return None
|
||||
|
||||
|
||||
def _probe_single_model(base: str, api_key: Optional[str], model_id: str, timeout: int = 10, with_tools: bool = False) -> dict:
|
||||
def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 10, with_tools: bool = False) -> dict:
|
||||
"""Send a realistic completion request to a single model. Returns {status, latency_ms, error?}."""
|
||||
provider = _detect_provider(base)
|
||||
if _is_discovery_only_provider(provider):
|
||||
# Responses/Codex API, not chat-completions: a completion probe would
|
||||
# 400 and the re-probe flow would then hide every model. Discovery-only.
|
||||
return {"status": "ok", "latency_ms": 0, "skipped": True}
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Say OK"},
|
||||
@@ -682,11 +618,6 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
For Anthropic, queries their /v1/models API, falling back to hardcoded list."""
|
||||
from src.endpoint_resolver import resolve_url
|
||||
base = resolve_url(_normalize_base(base_url))
|
||||
if _detect_provider(base) == "chatgpt-subscription":
|
||||
from src.chatgpt_subscription import fetch_available_models
|
||||
if api_key:
|
||||
return fetch_available_models(api_key, timeout=timeout)
|
||||
return []
|
||||
if _detect_provider(base) == "anthropic":
|
||||
# Try Anthropic's /v1/models endpoint first
|
||||
url = build_models_url(base)
|
||||
@@ -713,10 +644,6 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
logger.warning(f"Anthropic /v1/models failed, using hardcoded list: {e}")
|
||||
return list(ANTHROPIC_MODELS)
|
||||
url = build_models_url(base)
|
||||
if not url:
|
||||
curated_key = _match_provider_curated(base, None)
|
||||
fallback = _PROVIDER_CURATED.get(curated_key) if curated_key else None
|
||||
return list(fallback or [])
|
||||
headers = build_headers(api_key, base)
|
||||
try:
|
||||
r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
@@ -770,6 +697,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
return list(fallback)
|
||||
return []
|
||||
|
||||
|
||||
def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> Dict[str, Any]:
|
||||
"""Reachability probe that does not require installed/listed models."""
|
||||
from src.endpoint_resolver import resolve_url
|
||||
@@ -785,10 +713,6 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
||||
or "ollama" in (parsed_base.hostname or "").lower()
|
||||
)
|
||||
|
||||
# APFEL-specific detection
|
||||
host = (parsed_base.hostname or "").lower()
|
||||
looks_like_apfel = "apfel" in host or parsed_base.port == 11435
|
||||
|
||||
def _result_from_response(r) -> Dict[str, Any]:
|
||||
if 300 <= r.status_code < 400:
|
||||
loc = r.headers.get("location", "")
|
||||
@@ -810,23 +734,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
||||
last_error: Optional[str] = None
|
||||
|
||||
try:
|
||||
# APFEL does not behave like Ollama; use its health endpoint.
|
||||
if looks_like_apfel:
|
||||
root = base
|
||||
for suffix in ("/v1", "/api"):
|
||||
if root.endswith(suffix):
|
||||
root = root[: -len(suffix)].rstrip("/")
|
||||
break
|
||||
try:
|
||||
r = httpx.get(root + "/health", timeout=timeout, verify=llm_verify())
|
||||
result = _result_from_response(r)
|
||||
if result["reachable"]:
|
||||
return result
|
||||
last_error = result.get("error")
|
||||
except Exception as e:
|
||||
last_error = str(e)[:120]
|
||||
|
||||
elif looks_like_ollama:
|
||||
if looks_like_ollama:
|
||||
root = base
|
||||
for suffix in ("/v1", "/api"):
|
||||
if root.endswith(suffix):
|
||||
@@ -844,33 +752,44 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# OpenAI-compatible servers (vLLM, llama.cpp, SGLang, lmdeploy, …) expose
|
||||
# /v1/models but return 404 on the bare /v1 root. The probe used to GET
|
||||
# the base URL only, so a fully-working vLLM endpoint (chats fine!) read
|
||||
# as offline because /v1 → 404. Try /models first; fall back to the base
|
||||
# URL only if /models couldn't be reached (TCP-level failure).
|
||||
models_url = build_models_url(base)
|
||||
try:
|
||||
r = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
result = _result_from_response(r)
|
||||
if result["reachable"]:
|
||||
return result
|
||||
last_error = result.get("error")
|
||||
except Exception as e:
|
||||
last_error = str(e)[:120]
|
||||
|
||||
try:
|
||||
r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
result = _result_from_response(r)
|
||||
# If the bare base URL returns a non-auth 4xx (e.g. 404), try /models
|
||||
# as a fallback. OpenAI-compatible servers like llama-swap return 404
|
||||
# on the base /v1 prefix but 200 on /v1/models. Auth failures (401/403)
|
||||
# are definitive — probing /models would just repeat the same rejection.
|
||||
if (
|
||||
not result["reachable"]
|
||||
and result.get("status_code") is not None
|
||||
and 400 <= result["status_code"] < 500
|
||||
and result["status_code"] not in (401, 403)
|
||||
):
|
||||
models_url = build_models_url(base)
|
||||
try:
|
||||
r2 = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
result2 = _result_from_response(r2)
|
||||
if result2["reachable"]:
|
||||
return result2
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
if result["reachable"]:
|
||||
return result
|
||||
# 4xx from a reachable HTTP server (404 /v1, 401/403 missing key) is
|
||||
# still proof the upstream is alive. Only treat connection-level
|
||||
# failures, 5xx, and redirect-to-/login as truly offline.
|
||||
sc = result.get("status_code") or 0
|
||||
if 400 <= sc < 500 and sc not in (407, 408, 421, 425, 429):
|
||||
return {
|
||||
"reachable": True,
|
||||
"status_code": sc,
|
||||
"error": None,
|
||||
}
|
||||
last_error = result.get("error") or last_error
|
||||
except Exception as e:
|
||||
last_error = str(e)[:120]
|
||||
|
||||
return {"reachable": False, "status_code": None, "error": last_error}
|
||||
|
||||
|
||||
|
||||
def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
|
||||
"""Return a provider-aware error message for failed endpoint probes."""
|
||||
ping = ping or {}
|
||||
@@ -959,14 +878,6 @@ def _visible_models(cached_models, hidden_models, pinned_models=None):
|
||||
return [m for m in merged if m not in hidden]
|
||||
|
||||
|
||||
def _api_key_fingerprint(api_key: Optional[str]) -> str:
|
||||
"""Stable, non-secret label for distinguishing same-URL credentials."""
|
||||
key = (api_key or "").strip()
|
||||
if not key:
|
||||
return ""
|
||||
return hashlib.sha256(key.encode("utf-8")).hexdigest()[:8]
|
||||
|
||||
|
||||
def setup_model_routes(model_discovery):
|
||||
router = APIRouter(prefix="/api")
|
||||
|
||||
@@ -1068,17 +979,6 @@ def setup_model_routes(model_discovery):
|
||||
ok, info = _should_refresh_endpoint(ep, now, force=force)
|
||||
if not ok:
|
||||
continue
|
||||
if getattr(ep, "provider_auth_id", None):
|
||||
try:
|
||||
from src.endpoint_resolver import resolve_endpoint_runtime
|
||||
info["base"], info["api_key"] = resolve_endpoint_runtime(
|
||||
ep,
|
||||
owner=getattr(ep, "owner", None),
|
||||
)
|
||||
info["key"] = _refresh_key(info["base"], info["api_key"])
|
||||
except Exception as e:
|
||||
logger.warning("Skipping model refresh for %s: could not resolve provider auth: %s", getattr(ep, "name", ep.id), e)
|
||||
continue
|
||||
groups.setdefault(info["key"], {
|
||||
"base": info["base"],
|
||||
"api_key": info["api_key"],
|
||||
@@ -1232,9 +1132,8 @@ def setup_model_routes(model_discovery):
|
||||
raise HTTPException(401, "Not authenticated")
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error('Auth gate error in GET /api/models, failing closed: %s', e)
|
||||
raise HTTPException(status_code=500, detail='Internal error')
|
||||
except Exception:
|
||||
pass
|
||||
# Admins see every endpoint (they manage the global pool); regular
|
||||
# users get the owner-scoped view.
|
||||
_is_admin = False
|
||||
@@ -1298,7 +1197,14 @@ def setup_model_routes(model_discovery):
|
||||
t0 = _time.time()
|
||||
try:
|
||||
import asyncio as _asyncio
|
||||
ping = await _asyncio.to_thread(_ping_endpoint, data["base"], data.get("api_key"), 1.5)
|
||||
# Bumped 1.5s → 3.5s. The previous 1.5s budget was clipping
|
||||
# local vLLM endpoints on Tailscale links where the model
|
||||
# server is still loading (Qwen3.5-122B takes 2–3 min to
|
||||
# warm); /v1/models can take 500–2500 ms on a busy box,
|
||||
# which pushed _ping_endpoint's full path-discovery sweep
|
||||
# past the cap and marked the row offline despite the
|
||||
# user actively chatting with it.
|
||||
ping = await _asyncio.to_thread(_ping_endpoint, data["base"], data.get("api_key"), 3.5)
|
||||
lat = round((_time.time() - t0) * 1000)
|
||||
return {
|
||||
"alive": bool(ping.get("reachable")),
|
||||
@@ -1348,20 +1254,12 @@ def setup_model_routes(model_discovery):
|
||||
"endpoint_kind": kind,
|
||||
}
|
||||
try:
|
||||
if _is_discovery_only_provider(provider):
|
||||
# No general health endpoint — an unauthenticated GET just
|
||||
# 401s. Report status from cached models instead of pinging.
|
||||
entry["latency_ms"] = None
|
||||
entry["status"] = "online" if cached_count else "offline"
|
||||
entry["error"] = None
|
||||
entry["model_count"] = cached_count
|
||||
else:
|
||||
t0 = _time.time()
|
||||
ping = _ping_endpoint(base, ep.api_key, timeout=1.5)
|
||||
entry["latency_ms"] = round((_time.time() - t0) * 1000)
|
||||
entry["status"] = "online" if ping.get("reachable") or cached_count else "offline"
|
||||
entry["error"] = ping.get("error")
|
||||
entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0)
|
||||
t0 = _time.time()
|
||||
ping = _ping_endpoint(base, ep.api_key, timeout=1.5)
|
||||
entry["latency_ms"] = round((_time.time() - t0) * 1000)
|
||||
entry["status"] = "online" if ping.get("reachable") or cached_count else "offline"
|
||||
entry["error"] = ping.get("error")
|
||||
entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0)
|
||||
except Exception as e:
|
||||
entry["latency_ms"] = None
|
||||
entry["status"] = "online" if cached_count else "offline"
|
||||
@@ -1394,7 +1292,7 @@ def setup_model_routes(model_discovery):
|
||||
if ep_id and ep_id not in endpoints_cache:
|
||||
ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == ep_id).first()
|
||||
if ep:
|
||||
endpoints_cache[ep_id] = {"base_url": ep.base_url, "api_key": _resolve_probe_key(ep)}
|
||||
endpoints_cache[ep_id] = {"base_url": ep.base_url, "api_key": ep.api_key}
|
||||
ep_data = endpoints_cache.get(ep_id)
|
||||
if not ep_data:
|
||||
# Try to find by base_url from the model's endpoint field
|
||||
@@ -1433,7 +1331,7 @@ def setup_model_routes(model_discovery):
|
||||
"id": ep.id,
|
||||
"name": ep.name,
|
||||
"base_url": ep.base_url,
|
||||
"api_key": _resolve_probe_key(ep),
|
||||
"api_key": ep.api_key,
|
||||
})
|
||||
finally:
|
||||
db.close()
|
||||
@@ -1522,21 +1420,43 @@ def setup_model_routes(model_discovery):
|
||||
# Endpoint counts as reachable if it has any model — including
|
||||
# admin-pinned IDs that a probe would never surface.
|
||||
status = "online" if (all_models or pinned) else "offline"
|
||||
base = _normalize_base(r.base_url)
|
||||
ping = None
|
||||
# Discovery-only providers have no health endpoint — an
|
||||
# unauthenticated ping just 401s, so don't bother.
|
||||
if not all_models and not pinned and r.is_enabled and not _is_discovery_only_provider(_detect_provider(base)):
|
||||
ping = _ping_endpoint(r.base_url, r.api_key, timeout=1.0)
|
||||
# When cached_models is empty, do a quick reachability probe.
|
||||
# Bumped 1.0s → 3.5s because the user reported endpoints they
|
||||
# were ACTIVELY chatting with showed "offline" — the previous
|
||||
# 1s timeout was clipping live cloud endpoints (DeepSeek can
|
||||
# take 1.5–2.5s on /v1/models when their region is under load,
|
||||
# vLLM on a remote GPU box behind SSH can also push past 1s).
|
||||
# 3.5s still keeps the picker render snappy in the common
|
||||
# "everything's already cached" path because this branch only
|
||||
# runs for endpoints with an empty cached_models.
|
||||
if not all_models and not pinned and r.is_enabled:
|
||||
ping = _ping_endpoint(r.base_url, r.api_key, timeout=3.5)
|
||||
if ping.get("reachable"):
|
||||
status = "empty"
|
||||
# Best-effort: if the probe came back reachable, try
|
||||
# to populate cached_models in the background so the
|
||||
# NEXT picker load shows "online" instead of "empty".
|
||||
# Failure here is silent — we already returned the
|
||||
# "empty" status, and the existing background refresh
|
||||
# path will eventually fill it in too.
|
||||
try:
|
||||
probed = _probe_endpoint(r.base_url, r.api_key, timeout=5)
|
||||
if probed:
|
||||
r.cached_models = json.dumps(probed)
|
||||
db.commit()
|
||||
all_models = probed
|
||||
visible = _visible_models(all_models, r.hidden_models, pinned)
|
||||
status = "online"
|
||||
except Exception as _refill_err:
|
||||
logger.debug(f"opportunistic cached_models refill failed for {r.id}: {_refill_err!r}")
|
||||
base = _normalize_base(r.base_url)
|
||||
kind = _effective_endpoint_kind(r, base)
|
||||
results.append({
|
||||
"id": r.id,
|
||||
"name": r.name,
|
||||
"base_url": r.base_url,
|
||||
"has_key": bool(r.api_key),
|
||||
"api_key_fingerprint": _api_key_fingerprint(r.api_key),
|
||||
"is_enabled": r.is_enabled,
|
||||
"models": visible,
|
||||
"pinned_models": pinned,
|
||||
@@ -1603,34 +1523,21 @@ def setup_model_routes(model_discovery):
|
||||
)
|
||||
explicit_timeout = _explicit_model_list_timeout(base_url, requested_kind, refresh_timeout)
|
||||
|
||||
# Dedupe: if an endpoint with the same base_url and compatible
|
||||
# credentials already exists and is reachable by the caller (shared or
|
||||
# owned by them), return it instead of creating a duplicate row. Keep
|
||||
# same-url/different-key rows distinct so users can group the same
|
||||
# provider URL under multiple credentials.
|
||||
# Dedupe: if an endpoint with the same base_url already exists and
|
||||
# is reachable by the caller (shared or owned by them), return it
|
||||
# instead of creating a duplicate row. Fixes "Scan for Servers"
|
||||
# re-adding manually-added endpoints under their host:port name.
|
||||
from src.auth_helpers import get_current_user as _gcu_dedup
|
||||
_caller = _gcu_dedup(request) or None
|
||||
_incoming_api_key = api_key.strip()
|
||||
_db_dedup = SessionLocal()
|
||||
try:
|
||||
_same_url_rows = (
|
||||
existing = (
|
||||
_db_dedup.query(ModelEndpoint)
|
||||
.filter(ModelEndpoint.base_url == base_url)
|
||||
.filter((ModelEndpoint.owner.is_(None)) | (ModelEndpoint.owner == _caller))
|
||||
.order_by(ModelEndpoint.owner.desc()) # prefer owned over shared
|
||||
.all()
|
||||
.first()
|
||||
)
|
||||
existing = None
|
||||
_empty_key_existing = None
|
||||
for _candidate in _same_url_rows:
|
||||
_candidate_key = (getattr(_candidate, "api_key", None) or "").strip()
|
||||
if _candidate_key == _incoming_api_key:
|
||||
existing = _candidate
|
||||
break
|
||||
if _incoming_api_key and not _candidate_key and _empty_key_existing is None:
|
||||
_empty_key_existing = _candidate
|
||||
if existing is None and _incoming_api_key and _empty_key_existing is not None:
|
||||
existing = _empty_key_existing
|
||||
if existing:
|
||||
changed = False
|
||||
# Persist any incoming pinned IDs onto the existing row. An
|
||||
@@ -1679,8 +1586,6 @@ def setup_model_routes(model_discovery):
|
||||
"id": existing.id,
|
||||
"name": existing.name,
|
||||
"base_url": existing.base_url,
|
||||
"has_key": bool(existing.api_key),
|
||||
"api_key_fingerprint": _api_key_fingerprint(existing.api_key),
|
||||
"models": _visible_models(
|
||||
existing_models,
|
||||
getattr(existing, "hidden_models", None),
|
||||
@@ -1754,8 +1659,6 @@ def setup_model_routes(model_discovery):
|
||||
"id": ep_id,
|
||||
"name": name.strip(),
|
||||
"base_url": base_url,
|
||||
"has_key": bool(api_key.strip()),
|
||||
"api_key_fingerprint": _api_key_fingerprint(api_key),
|
||||
"models": _merge_model_ids(model_ids, _pinned),
|
||||
"pinned_models": _pinned,
|
||||
"online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")),
|
||||
@@ -1805,7 +1708,7 @@ def setup_model_routes(model_discovery):
|
||||
ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == ep_id).first()
|
||||
if not ep:
|
||||
raise HTTPException(404, "Endpoint not found")
|
||||
ep_data = {"id": ep.id, "name": ep.name, "base_url": ep.base_url, "api_key": _resolve_probe_key(ep)}
|
||||
ep_data = {"id": ep.id, "name": ep.name, "base_url": ep.base_url, "api_key": ep.api_key}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -1869,7 +1772,7 @@ def setup_model_routes(model_discovery):
|
||||
category = _classify_endpoint(base, kind)
|
||||
timeout = _manual_refresh_timeout(ep, category, refresh_timeout)
|
||||
try:
|
||||
probed = _probe_endpoint(base, _resolve_probe_key(ep), timeout=timeout)
|
||||
probed = _probe_endpoint(base, ep.api_key, timeout=timeout)
|
||||
except Exception as exc:
|
||||
logger.warning("Manual model refresh failed for endpoint %s at %s: %s", ep_id, base, exc)
|
||||
probed = []
|
||||
@@ -2105,8 +2008,6 @@ def setup_model_routes(model_discovery):
|
||||
"name": ep.name,
|
||||
"model_type": ep.model_type,
|
||||
"base_url": ep.base_url,
|
||||
"has_key": bool(ep.api_key),
|
||||
"api_key_fingerprint": _api_key_fingerprint(ep.api_key),
|
||||
"pinned_models": _normalize_model_ids(getattr(ep, "pinned_models", None)),
|
||||
"endpoint_kind": getattr(ep, "endpoint_kind", None) or "auto",
|
||||
"model_refresh_mode": getattr(ep, "model_refresh_mode", None) or "auto",
|
||||
@@ -2208,9 +2109,7 @@ def setup_model_routes(model_discovery):
|
||||
cleared_user_preferences = _clear_user_prefs_for_endpoint(ep_id)
|
||||
cleared_sessions = _clear_sessions_for_endpoint(db, ep.base_url)
|
||||
cleared_loaded_sessions = _clear_loaded_sessions_for_endpoint(ep.base_url)
|
||||
auth_id = getattr(ep, "provider_auth_id", None)
|
||||
db.delete(ep)
|
||||
cleared_provider_auth = _delete_orphaned_provider_auth(db, auth_id, exclude_ep_id=ep_id)
|
||||
db.commit()
|
||||
_invalidate_models_cache()
|
||||
_local_probe_cache["data"] = None
|
||||
@@ -2220,7 +2119,6 @@ def setup_model_routes(model_discovery):
|
||||
"cleared_user_preferences": cleared_user_preferences,
|
||||
"cleared_sessions": cleared_sessions,
|
||||
"cleared_loaded_sessions": cleared_loaded_sessions,
|
||||
"cleared_provider_auth": cleared_provider_auth,
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -14036,6 +14036,29 @@
|
||||
"vision"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "google/gemma-4-12B",
|
||||
"provider": "Google",
|
||||
"parameter_count": "12.0B",
|
||||
"parameters_raw": 12000000000,
|
||||
"min_ram_gb": 24.0,
|
||||
"recommended_ram_gb": 32.0,
|
||||
"min_vram_gb": 24.0,
|
||||
"quantization": "BF16",
|
||||
"context_length": 131072,
|
||||
"use_case": "General purpose, multimodal",
|
||||
"is_moe": false,
|
||||
"num_experts": null,
|
||||
"active_experts": null,
|
||||
"active_parameters": null,
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "google/gemma-4-31B-it",
|
||||
"provider": "Google",
|
||||
@@ -19121,4 +19144,4 @@
|
||||
],
|
||||
"_discovered": true
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -243,6 +243,20 @@ async def maybe_extract_skill(
|
||||
logger.debug("[skill-extract] '%s' already exists — dropped as duplicate", title)
|
||||
return None
|
||||
|
||||
# Auto-publish gate: if the user has `auto_approve_skills` on, the
|
||||
# newly-extracted skill is created `published` immediately rather
|
||||
# than waiting for the next audit batch. The audit still runs later
|
||||
# and can demote it back to `draft` (or delete) on failure. Default
|
||||
# ON matches the UI label "Auto-approve skills".
|
||||
_initial_status = "draft"
|
||||
try:
|
||||
from routes.prefs_routes import _load_for_user as _load_prefs
|
||||
_prefs = _load_prefs(owner) or {}
|
||||
if _prefs.get("auto_approve_skills", True):
|
||||
_initial_status = "published"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
entry = skills_manager.add_skill(
|
||||
title=title,
|
||||
problem=data.get("problem", ""),
|
||||
@@ -253,6 +267,7 @@ async def maybe_extract_skill(
|
||||
confidence=data.get("confidence", 0.7),
|
||||
session_id=getattr(session, "session_id", None),
|
||||
owner=owner,
|
||||
status=_initial_status,
|
||||
)
|
||||
try:
|
||||
from src.event_bus import fire_event
|
||||
|
||||
+141
-9
@@ -664,6 +664,17 @@ async def do_manage_skills(content: str, owner: Optional[str] = None) -> Dict:
|
||||
proc = args.get("steps") or []
|
||||
if not proc and not args.get("body_extra") and not args.get("solution"):
|
||||
return {"error": "procedure (or solution body) is required", "exit_code": 1}
|
||||
# Same auto-publish gate as the extractor path — when the user
|
||||
# has auto_approve_skills on and the caller didn't pin an explicit
|
||||
# status, publish immediately. Audit later demotes/removes on fail.
|
||||
_status_arg = args.get("status")
|
||||
if not _status_arg:
|
||||
try:
|
||||
from routes.prefs_routes import _load_for_user as _load_prefs
|
||||
_prefs = _load_prefs(owner) or {}
|
||||
_status_arg = "published" if _prefs.get("auto_approve_skills", True) else "draft"
|
||||
except Exception:
|
||||
_status_arg = "draft"
|
||||
entry = sm.add_skill(
|
||||
name=args.get("name"),
|
||||
description=(args.get("description") or args.get("title") or "").strip(),
|
||||
@@ -677,7 +688,7 @@ async def do_manage_skills(content: str, owner: Optional[str] = None) -> Dict:
|
||||
procedure=proc,
|
||||
pitfalls=args.get("pitfalls") or [],
|
||||
verification=args.get("verification") or [],
|
||||
status=args.get("status") or "draft",
|
||||
status=_status_arg,
|
||||
version=args.get("version") or "1.0.0",
|
||||
confidence=args.get("confidence", 0.8),
|
||||
source=args.get("source", "learned"),
|
||||
@@ -2621,8 +2632,90 @@ async def _cookbook_env_for_host(host: str) -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
async def _cookbook_register_task(session_id: str, model: str, host: str,
|
||||
cmd: str, task_type: str = "serve") -> bool:
|
||||
def _infer_serve_port(cmd: str) -> int:
|
||||
"""Infer likely listen port from a serve command."""
|
||||
if not cmd:
|
||||
return 8080
|
||||
m = re.search(r"--port\\s+(\\d+)", cmd)
|
||||
if m:
|
||||
try:
|
||||
return int(m.group(1))
|
||||
except Exception:
|
||||
pass
|
||||
m = re.search(r"OLLAMA_HOST=[^\\s]*?:(\\d+)", cmd)
|
||||
if m:
|
||||
try:
|
||||
return int(m.group(1))
|
||||
except Exception:
|
||||
pass
|
||||
if "ollama" in cmd:
|
||||
return 11434
|
||||
return 8080
|
||||
|
||||
|
||||
def _infer_serve_host(host: str | None) -> tuple[str, bool]:
|
||||
"""Return (host, container_local) for registering a served endpoint."""
|
||||
if not (host or "").strip():
|
||||
return "localhost", True
|
||||
base_host = host.split("@", 1)[-1] if "@" in host else host
|
||||
return base_host, False
|
||||
|
||||
|
||||
async def _ensure_served_endpoint(
|
||||
*,
|
||||
model: str,
|
||||
cmd: str,
|
||||
host: str | None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Register/fetch a model endpoint for a running serve session."""
|
||||
import httpx
|
||||
endpoint_host, container_local = _infer_serve_host(host)
|
||||
port = _infer_serve_port(cmd)
|
||||
base_url = f"http://{endpoint_host}:{port}/v1"
|
||||
short_name = model.split("/")[-1] if "/" in model else model
|
||||
is_image = "diffusion_server.py" in (cmd or "")
|
||||
payload = {
|
||||
"name": short_name if not is_image else f"{short_name} (image)",
|
||||
"base_url": base_url,
|
||||
"skip_probe": "true",
|
||||
"model_type": "image" if is_image else "llm",
|
||||
"container_local": "true" if container_local else "false",
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
resp = await client.post(
|
||||
f"{_COOKBOOK_BASE}/api/model-endpoints",
|
||||
data=payload,
|
||||
headers=_internal_headers(),
|
||||
)
|
||||
data = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
|
||||
if resp.status_code >= 400:
|
||||
logger.debug(
|
||||
f"ensure endpoint failed for {model!r}: status={resp.status_code} data={data}"
|
||||
)
|
||||
return {"added": False, "endpoint_id": "", "base_url": base_url, "error": data}
|
||||
ep_id = data.get("id") if isinstance(data, dict) else None
|
||||
return {
|
||||
"added": bool(ep_id),
|
||||
"endpoint_id": ep_id or "",
|
||||
"base_url": base_url,
|
||||
"data": data,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug(f"ensure endpoint exception for {model!r}: {e}")
|
||||
return {"added": False, "endpoint_id": "", "base_url": base_url, "error": str(e)}
|
||||
|
||||
|
||||
async def _cookbook_register_task(
|
||||
session_id: str,
|
||||
model: str,
|
||||
host: str,
|
||||
cmd: str,
|
||||
task_type: str = "serve",
|
||||
*,
|
||||
endpoint_added: bool = False,
|
||||
endpoint_id: str = "",
|
||||
) -> bool:
|
||||
"""Append a task entry to cookbook_state.json after the agent
|
||||
launches via /api/model/serve or /api/model/download. The route
|
||||
spawns tmux but leaves state-writing to the UI; the agent needs to
|
||||
@@ -2672,7 +2765,8 @@ async def _cookbook_register_task(session_id: str, model: str, host: str,
|
||||
"sshPort": "",
|
||||
"platform": "linux",
|
||||
"_serveReady": False,
|
||||
"_endpointAdded": False,
|
||||
"_endpointAdded": bool(endpoint_added),
|
||||
"_endpointId": endpoint_id or "",
|
||||
})
|
||||
state["tasks"] = tasks
|
||||
try:
|
||||
@@ -3008,7 +3102,12 @@ async def do_download_model(content: str, owner: Optional[str] = None) -> Dict:
|
||||
if _servers.get("default_host"):
|
||||
host = _servers["default_host"]
|
||||
_host_defaulted = True
|
||||
backend = (args.get("backend") or "").strip().lower()
|
||||
if not backend and "/" not in repo_id and ":" in repo_id:
|
||||
backend = "ollama"
|
||||
payload = {"repo_id": repo_id}
|
||||
if backend:
|
||||
payload["backend"] = backend
|
||||
if host:
|
||||
payload["remote_host"] = host
|
||||
if args.get("include"):
|
||||
@@ -3028,12 +3127,20 @@ async def do_download_model(content: str, owner: Optional[str] = None) -> Dict:
|
||||
sid = data.get("session_id", "?")
|
||||
registered = await _cookbook_register_task(
|
||||
session_id=sid, model=repo_id, host=host,
|
||||
cmd=f"hf download {repo_id}", task_type="download",
|
||||
cmd=(f"ollama pull {repo_id}" if backend == "ollama" else f"hf download {repo_id}"),
|
||||
task_type="download",
|
||||
)
|
||||
note = "" if registered else " (state-write failed — download may not show in UI)"
|
||||
where = host or "local"
|
||||
default_note = " (defaulted to the cookbook's selected server — pass host= or local=true to override)" if _host_defaulted else ""
|
||||
return {"output": f"Download started: {repo_id} on {where} (session: {sid}){note}{default_note}", "session_id": sid, "host": host, "exit_code": 0}
|
||||
return {
|
||||
"output": f"Download started: {repo_id} on {where} (session: {sid}){note}{default_note}",
|
||||
"session_id": sid,
|
||||
"host": host,
|
||||
"task_type": "download",
|
||||
"phase": "running",
|
||||
"exit_code": 0,
|
||||
}
|
||||
return {"error": data.get("error", "Download failed"), "exit_code": 1}
|
||||
except Exception as e:
|
||||
return {"error": str(e), "exit_code": 1}
|
||||
@@ -3102,12 +3209,28 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict:
|
||||
data = resp.json()
|
||||
if data.get("ok"):
|
||||
sid = data.get("session_id", "?")
|
||||
endpoint_id = data.get("endpoint_id") or ""
|
||||
if endpoint_id:
|
||||
endpoint_added = True
|
||||
else:
|
||||
endpoint_meta = await _ensure_served_endpoint(model=repo_id, cmd=cmd, host=host)
|
||||
endpoint_added = bool(endpoint_meta.get("added"))
|
||||
endpoint_id = endpoint_meta.get("endpoint_id", "") or endpoint_id
|
||||
registered = await _cookbook_register_task(
|
||||
session_id=sid, model=repo_id,
|
||||
host=host, cmd=cmd, task_type="serve",
|
||||
endpoint_added=endpoint_added, endpoint_id=endpoint_id or "",
|
||||
)
|
||||
note = "" if registered else " (state-write failed — task may not show in UI)"
|
||||
return {"output": f"Serving {repo_id} (session: {sid}){note}", "session_id": sid, "exit_code": 0}
|
||||
return {
|
||||
"output": f"Serving {repo_id} (session: {sid}){note}",
|
||||
"session_id": sid,
|
||||
"task_type": "serve",
|
||||
"phase": "running",
|
||||
"host": host,
|
||||
"endpoint_id": endpoint_id,
|
||||
"exit_code": 0,
|
||||
}
|
||||
# FastAPI HTTPException puts the message under `detail`, not `error`.
|
||||
# Surface BOTH so the agent sees "Invalid characters in cmd" (from
|
||||
# _validate_serve_cmd rejecting `&&`/`source`/`cd`) instead of
|
||||
@@ -3804,7 +3927,8 @@ async def do_serve_preset(content: str, owner: Optional[str] = None) -> Dict:
|
||||
if env_cfg.get("gpus"): payload["gpus"] = env_cfg["gpus"]
|
||||
if env_cfg.get("hf_token"): payload["hf_token"] = env_cfg["hf_token"]
|
||||
if env_cfg.get("platform"): payload["platform"] = env_cfg["platform"]
|
||||
if env_cfg.get("ssh_port"): payload["ssh_port"] = env_cfg["ssh_port"]
|
||||
if env_cfg.get("ssh_port"):
|
||||
payload["ssh_port"] = env_cfg["ssh_port"]
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
@@ -3813,12 +3937,20 @@ async def do_serve_preset(content: str, owner: Optional[str] = None) -> Dict:
|
||||
data = resp.json()
|
||||
if data.get("ok"):
|
||||
sid = data.get("session_id", "?")
|
||||
endpoint_id = data.get("endpoint_id") or ""
|
||||
if endpoint_id:
|
||||
endpoint_added = True
|
||||
else:
|
||||
endpoint_meta = await _ensure_served_endpoint(model=repo_id, cmd=cmd, host=host)
|
||||
endpoint_added = bool(endpoint_meta.get("added"))
|
||||
endpoint_id = endpoint_meta.get("endpoint_id", "") or endpoint_id
|
||||
registered = await _cookbook_register_task(
|
||||
session_id=sid, model=repo_id, host=host,
|
||||
cmd=cmd, task_type="serve",
|
||||
endpoint_added=endpoint_added, endpoint_id=endpoint_id or "",
|
||||
)
|
||||
note = "" if registered else " (state-write failed — task may not show in UI)"
|
||||
return {"output": f"Launched preset {chosen.get('name')!r}: {repo_id} on {host or 'local'} (session: {sid}){note}", "session_id": sid, "exit_code": 0}
|
||||
return {"output": f"Launched preset {chosen.get('name')!r}: {repo_id} on {host or 'local'} (session: {sid}){note}", "session_id": sid, "host": host, "endpoint_id": endpoint_id, "exit_code": 0}
|
||||
return {"error": data.get("error", "Serve failed"), "exit_code": 1}
|
||||
except Exception as e:
|
||||
return {"error": str(e), "exit_code": 1}
|
||||
|
||||
+92
-42
@@ -1492,21 +1492,7 @@
|
||||
<div id="set-researchMsg" style="font-size:11px;color:color-mix(in srgb, var(--fg) 45%, transparent);"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="admin-card">
|
||||
<h2><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:5px;opacity:0.6"><path d="M14.7 6.3a1 1 0 0 0 0 1.4l1.6 1.6a1 1 0 0 0 1.4 0l3.77-3.77a6 6 0 0 1-7.94 7.94l-6.91 6.91a2.12 2.12 0 0 1-3-3l6.91-6.91a6 6 0 0 1 7.94-7.94l-3.76 3.76z"/></svg>Agent</h2>
|
||||
<div class="admin-toggle-sub" style="margin-bottom:8px">Controls for the agent tool loop.</div>
|
||||
<div class="settings-col">
|
||||
<div class="settings-row">
|
||||
<label class="settings-label">Tool call limit</label>
|
||||
<input id="set-agentMaxTools" type="text" inputmode="numeric" placeholder="0 = unlimited" class="settings-select" style="width:120px;">
|
||||
</div>
|
||||
<div class="settings-row">
|
||||
<label class="settings-label">Max steps per message</label>
|
||||
<input id="set-agentMaxRounds" type="text" inputmode="numeric" placeholder="20" class="settings-select" style="width:120px;">
|
||||
</div>
|
||||
<div id="set-agentMsg" style="font-size:11px;color:color-mix(in srgb, var(--fg) 45%, transparent);"></div>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Agent card moved to the Agent Tools tab. -->
|
||||
<!-- Image Generation removed — only inpaint remains in this build,
|
||||
and inpaint is configured via the gallery editor not this card.
|
||||
Keeping the DOM (hidden) so JS wiring against the inputs
|
||||
@@ -2048,30 +2034,37 @@
|
||||
<div class="admin-model-form">
|
||||
<div class="admin-model-form-row">
|
||||
<input id="adm-epLocalUrl" type="text" placeholder="Paste endpoint URL, e.g. http://localhost:11434/v1" style="flex:1">
|
||||
<select id="adm-epLocalType" style="padding:5px;width:72px;flex-shrink:0;">
|
||||
<option value="llm">LLM</option>
|
||||
<option value="image">Image</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="admin-model-form-row">
|
||||
<!-- API key row stays in the DOM but is collapsed until the
|
||||
user clicks the Key button on the action row. Local
|
||||
endpoints rarely need a key; hiding it by default keeps
|
||||
the form a single visual line. -->
|
||||
<div class="admin-model-form-row" id="adm-epLocalApiKey-row" style="display:none;">
|
||||
<input id="adm-epLocalApiKey" type="password" placeholder="API key (optional — for protected local endpoints)" autocomplete="off" style="flex:1">
|
||||
</div>
|
||||
<!-- Action row: LLM/Image type, Quickstart buttons (Scan,
|
||||
Ollama), Key reveal toggle, Test, Add — all inline so
|
||||
the Quickstart fold is gone and Type sits with the
|
||||
primary actions. -->
|
||||
<div class="admin-model-form-row">
|
||||
<label style="display:inline-flex;align-items:center;gap:4px;font-size:11px;opacity:0.6;flex-shrink:0;">Type:<select id="adm-epLocalType" style="padding:5px;width:72px;flex-shrink:0;">
|
||||
<option value="llm" selected>LLM</option>
|
||||
<option value="image">Image</option>
|
||||
</select></label>
|
||||
<button class="admin-btn-sm" id="adm-epDiscoverBtn" title="Scan your network for running model servers" style="display:inline-flex;align-items:center;gap:4px;">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round"><circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/></svg>Scan
|
||||
</button>
|
||||
<button class="admin-btn-sm" id="adm-epOllamaBtn" title="Fill the default Ollama endpoint" style="display:inline-flex;align-items:center;gap:5px;"><span class="adm-ollama-logo" style="display:inline-flex;width:13px;height:13px;"></span>Ollama</button>
|
||||
<span style="flex:1"></span>
|
||||
<button class="admin-btn-sm" id="adm-epLocalTestBtn" style="width:55px;text-align:center;">Test</button>
|
||||
<button class="admin-btn-add" id="adm-epLocalAddBtn" style="width:55px;text-align:center;">Add</button>
|
||||
</div>
|
||||
<div class="adm-quickstart-section collapsed" id="adm-add-local-quickstart">
|
||||
<div class="adm-quickstart-toggle" role="button" tabindex="0" aria-expanded="false">
|
||||
<span>Quickstart</span>
|
||||
<svg class="adm-section-caret" width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 9 12 15 18 9"/></svg>
|
||||
</div>
|
||||
<div class="adm-quickstart-body">
|
||||
<button class="admin-btn-sm" id="adm-epDiscoverBtn" title="Scan your network for running model servers">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" style="vertical-align:-1px;margin-right:4px;"><circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/></svg>Scan for Servers
|
||||
</button>
|
||||
<button class="admin-btn-sm" id="adm-epOllamaBtn" title="Fill the default Ollama endpoint">Ollama</button>
|
||||
</div>
|
||||
<button class="admin-btn-sm" id="adm-epLocalKeyBtn" title="Show / hide the API key field" aria-expanded="false" aria-controls="adm-epLocalApiKey-row" style="opacity:0.75;display:inline-flex;align-items:center;gap:4px;">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 2l-9.6 9.6"/><circle cx="7.5" cy="15.5" r="5.5"/><path d="M15.5 7.5l3 3"/></svg>API
|
||||
</button>
|
||||
<button class="admin-btn-sm" id="adm-epLocalTestBtn" style="min-width:55px;text-align:center;display:inline-flex;align-items:center;justify-content:center;gap:4px;">
|
||||
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><polygon points="5 3 19 12 5 21 5 3"/></svg>Test
|
||||
</button>
|
||||
<button class="admin-btn-add" id="adm-epLocalAddBtn" style="min-width:55px;text-align:center;display:inline-flex;align-items:center;justify-content:center;gap:4px;">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg>Add
|
||||
</button>
|
||||
</div>
|
||||
<div id="adm-epLocalMsg" class="adm-ep-inline-msg"></div>
|
||||
</div>
|
||||
@@ -2116,19 +2109,33 @@
|
||||
<option value="https://opencode.ai/zen/go/v1" data-logo="opencode">OpenCode Go</option>
|
||||
<option value="https://api.z.ai/api/coding/paas/v4" data-logo="zhipu">Z.AI Coding Plan</option>
|
||||
</select>
|
||||
<div class="admin-model-form-row">
|
||||
<input id="adm-epApiKey" type="password" placeholder="API key">
|
||||
<!-- API key row stays in DOM, hidden until Key button is
|
||||
clicked. Mirrors the Local section pattern: most users
|
||||
paste a key via the provider preset flow rather than
|
||||
typing it free-form, so the row only appears on demand. -->
|
||||
<div class="admin-model-form-row" id="adm-epApiKey-row" style="display:none;">
|
||||
<input id="adm-epApiKey" type="password" placeholder="API key" autocomplete="off" style="flex:1">
|
||||
</div>
|
||||
<div class="admin-model-form-row" style="margin-top:-4px;">
|
||||
<select id="adm-epKind" style="padding:5px;width:82px;">
|
||||
<option value="proxy">Proxy</option>
|
||||
<option value="api">API</option>
|
||||
</select>
|
||||
<select id="adm-epType" style="padding:5px;width:80px;">
|
||||
<option value="llm">LLM</option>
|
||||
<label style="display:inline-flex;align-items:center;gap:4px;font-size:11px;opacity:0.6;flex-shrink:0;">Type:<select id="adm-epType" style="padding:5px;width:80px;flex-shrink:0;">
|
||||
<option value="llm" selected>LLM</option>
|
||||
<option value="image">Image</option>
|
||||
</select>
|
||||
<button class="admin-btn-sm" id="adm-epApiTestBtn" style="width:55px;text-align:center;">Test</button>
|
||||
</select></label>
|
||||
<span style="flex:1"></span>
|
||||
<button class="admin-btn-sm" id="adm-epApiKeyBtn" title="Show / hide the API key field" aria-expanded="false" aria-controls="adm-epApiKey-row" style="opacity:0.75;display:inline-flex;align-items:center;gap:4px;">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 2l-9.6 9.6"/><circle cx="7.5" cy="15.5" r="5.5"/><path d="M15.5 7.5l3 3"/></svg>API
|
||||
</button>
|
||||
<button class="admin-btn-sm" id="adm-epApiTestBtn" style="min-width:55px;text-align:center;display:inline-flex;align-items:center;justify-content:center;gap:4px;">
|
||||
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><polygon points="5 3 19 12 5 21 5 3"/></svg>Test
|
||||
</button>
|
||||
<button class="admin-btn-sm hidden" id="adm-epApiCancelTestBtn" style="width:62px;text-align:center;">Cancel</button>
|
||||
<button class="admin-btn-add" id="adm-epAddBtn" style="width:55px;text-align:center;">Add</button>
|
||||
<button class="admin-btn-add" id="adm-epAddBtn" style="min-width:55px;text-align:center;display:inline-flex;align-items:center;justify-content:center;gap:4px;">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg>Add
|
||||
</button>
|
||||
</div>
|
||||
<div id="adm-epApiMsg" class="adm-ep-inline-msg"></div>
|
||||
<div id="adm-deviceAuthStatus" class="adm-ep-inline-msg"></div>
|
||||
@@ -2136,7 +2143,15 @@
|
||||
</div>
|
||||
</div>
|
||||
<div class="admin-card">
|
||||
<h2><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:5px;opacity:0.6"><rect x="2" y="3" width="20" height="14" rx="2"/><line x1="8" y1="21" x2="16" y2="21"/><line x1="12" y1="17" x2="12" y2="21"/></svg>Added Models <span style="opacity:0.45;font-weight:normal;font-size:0.82em">(Endpoints)</span></h2>
|
||||
<h2 style="display:flex;align-items:center;gap:8px;"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:5px;opacity:0.6"><rect x="2" y="3" width="20" height="14" rx="2"/><line x1="8" y1="21" x2="16" y2="21"/><line x1="12" y1="17" x2="12" y2="21"/></svg>Added Models <span style="opacity:0.45;font-weight:normal;font-size:0.82em">(Endpoints)</span>
|
||||
<span style="flex:1"></span>
|
||||
<button class="admin-btn-sm" id="adm-epProbeAllBtn" title="Re-test every endpoint and refresh online status" style="font-size:11px;font-weight:normal;display:inline-flex;align-items:center;gap:4px;">
|
||||
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><polyline points="23 4 23 10 17 10"/><polyline points="1 20 1 14 7 14"/><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"/></svg>Probe
|
||||
</button>
|
||||
<button class="admin-btn-sm" id="adm-epClearOfflineBtn" title="Remove all endpoints currently marked offline" style="font-size:11px;font-weight:normal;display:inline-flex;align-items:center;gap:4px;opacity:0.85;">
|
||||
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round"><polyline points="3 6 5 6 21 6"/><path d="M19 6l-1 14a2 2 0 0 1-2 2H8a2 2 0 0 1-2-2L5 6"/></svg>Clear offline <span id="adm-epOfflineCount" style="opacity:0.6;margin-left:2px;"></span>
|
||||
</button>
|
||||
</h2>
|
||||
<div class="admin-toggle-sub" style="margin-bottom:10px">Manage the endpoints you've added.</div>
|
||||
<div class="adm-ep-section">
|
||||
<div class="adm-ep-section-head">
|
||||
@@ -2167,10 +2182,45 @@
|
||||
<button type="button" class="admin-btn-sm" id="unified-intg-add-btn" style="display:inline-flex;align-items:center;gap:6px;">+ Add Integration<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="opacity:0.7;"><path d="M10 13a5 5 0 0 0 7.54.54l3-3a5 5 0 0 0-7.07-7.07l-1.72 1.71"/><path d="M14 11a5 5 0 0 0-7.54-.54l-3 3a5 5 0 0 0 7.07 7.07l1.71-1.71"/></svg></button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="admin-card admin-only" style="margin-top:12px;">
|
||||
<h2><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:5px;opacity:0.6"><path d="M21 2l-2 2m-7.61 7.61a5.5 5.5 0 1 1-7.778 7.778 5.5 5.5 0 0 1 7.777-7.777zm0 0L15.5 7.5m0 0l3 3L22 7l-3-3m-3.5 3.5L19 4"/></svg>API Tokens</h2>
|
||||
<div class="admin-toggle-sub" style="margin-bottom:8px">Bearer tokens for external integrations (scripts, Codex, headless agent runs). Token value shown ONCE on create — copy it then.</div>
|
||||
<div id="adm-tokenList" style="margin-bottom:8px;"></div>
|
||||
<div style="display:flex;gap:6px;flex-wrap:wrap;align-items:flex-start;">
|
||||
<input type="text" id="adm-tokenName" placeholder="Token name (e.g. agent-test)" class="settings-select" style="flex:1;min-width:160px;">
|
||||
<input type="text" id="adm-tokenScopes" placeholder="scopes (comma-separated, blank = chat)" class="settings-select" style="flex:2;min-width:220px;" title="Allowed: chat, cookbook:read, cookbook:launch, documents:read|write, todos:read|write, email:read|draft|send, calendar:read|write, memory:read|write">
|
||||
<button class="admin-btn-add" id="adm-tokenAddBtn">Create token</button>
|
||||
</div>
|
||||
<div id="adm-tokenMsg" style="font-size:11px;margin-top:6px;"></div>
|
||||
<div id="adm-tokenReveal" style="display:none;margin-top:8px;padding:8px 10px;background:color-mix(in srgb, var(--accent, var(--red)) 12%, transparent);border:1px solid color-mix(in srgb, var(--accent, var(--red)) 35%, transparent);border-radius:6px;">
|
||||
<div style="font-size:11px;font-weight:600;margin-bottom:4px;">Copy now — this is the only time you'll see it:</div>
|
||||
<code id="adm-tokenValue" style="font-family:'Berkeley Mono','SF Mono','Fira Code',monospace;font-size:11px;word-break:break-all;display:block;background:var(--bg);padding:6px 8px;border-radius:4px;margin-bottom:6px;user-select:all;"></code>
|
||||
<button class="admin-btn-sm" id="adm-tokenCopyBtn">Copy</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ═══ TOOLS TAB ═══ -->
|
||||
<div data-settings-panel="tools" class="hidden">
|
||||
<div class="admin-card" style="margin-bottom:12px;">
|
||||
<h2><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:5px;opacity:0.6"><path d="M14.7 6.3a1 1 0 0 0 0 1.4l1.6 1.6a1 1 0 0 0 1.4 0l3.77-3.77a6 6 0 0 1-7.94 7.94l-6.91 6.91a2.12 2.12 0 0 1-3-3l6.91-6.91a6 6 0 0 1 7.94-7.94l-3.76 3.76z"/></svg>Agent</h2>
|
||||
<div class="admin-toggle-sub" style="margin-bottom:8px">Controls for the agent tool loop.</div>
|
||||
<div class="settings-col">
|
||||
<div class="settings-row">
|
||||
<label class="settings-label">Tool call limit</label>
|
||||
<input id="set-agentMaxTools" type="text" inputmode="numeric" placeholder="0 = unlimited" class="settings-select" style="width:120px;">
|
||||
</div>
|
||||
<div class="settings-row">
|
||||
<label class="settings-label">Max steps per message</label>
|
||||
<input id="set-agentMaxRounds" type="text" inputmode="numeric" placeholder="20" class="settings-select" style="width:120px;">
|
||||
</div>
|
||||
<div id="set-agentMsg" style="font-size:11px;color:color-mix(in srgb, var(--fg) 45%, transparent);"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="admin-card" style="margin-bottom:12px;">
|
||||
<h2 style="display:flex;align-items:center;gap:6px;"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="margin-right:1px;opacity:0.6;flex-shrink:0"><path d="M9 11l3 3L22 4"/><path d="M21 12v7a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h11"/></svg>Agent loop<span style="flex:1"></span><label class="admin-switch" title="On a failing effectful turn, climb verify → different-method → teacher → stop-and-summarize instead of silently quitting." style="flex-shrink:0"><input type="checkbox" id="set-agentSupervisorLadder"><span class="admin-slider"></span></label></h2>
|
||||
<div class="admin-toggle-sub" style="margin-bottom:8px">Supervisor ladder. When on, every effectful agent turn that claims done is verified; on FAIL the ladder escalates verify → different method → teacher → stop-with-blocker, each rung visible in chat. Teacher rung requires <code>teacher_model</code> to be set.</div>
|
||||
</div>
|
||||
<div class="admin-card" style="margin-bottom:12px;">
|
||||
<h2><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:5px;opacity:0.6"><path d="M14.7 6.3a1 1 0 0 0 0 1.4l1.6 1.6a1 1 0 0 0 1.4 0l3.77-3.77a6 6 0 0 1-7.94 7.94l-6.91 6.91a2.12 2.12 0 0 1-3-3l6.91-6.91a6 6 0 0 1 7.94-7.94l-3.76 3.76z"/></svg>Built-in Tools</h2>
|
||||
<div class="admin-toggle-sub" style="margin-bottom:8px">Enable or disable tools available to the AI agent.</div>
|
||||
|
||||
+153
-3
@@ -1149,6 +1149,144 @@ function initEndpointForm() {
|
||||
}
|
||||
}
|
||||
|
||||
// API Key reveal toggle. The key inputs are hidden by default so the Add
|
||||
// form reads as a single action row; the Key button toggles the input row
|
||||
// and flips aria-expanded for screen readers / CSS pseudo-classes.
|
||||
const _wireKeyToggle = (btnId, rowId) => {
|
||||
const btn = el(btnId);
|
||||
const row = el(rowId);
|
||||
if (!btn || !row) return;
|
||||
btn.addEventListener('click', () => {
|
||||
const showing = row.style.display !== 'none';
|
||||
row.style.display = showing ? 'none' : '';
|
||||
btn.setAttribute('aria-expanded', showing ? 'false' : 'true');
|
||||
btn.style.opacity = showing ? '0.75' : '1';
|
||||
if (!showing) {
|
||||
const inp = row.querySelector('input');
|
||||
if (inp) inp.focus();
|
||||
}
|
||||
});
|
||||
};
|
||||
_wireKeyToggle('adm-epLocalKeyBtn', 'adm-epLocalApiKey-row');
|
||||
_wireKeyToggle('adm-epApiKeyBtn', 'adm-epApiKey-row');
|
||||
|
||||
// ── Added Models toolbar: Probe + Clear offline ────────────────────
|
||||
// Both buttons act over the currently-rendered endpoint list. The
|
||||
// online/offline marker is stamped on each row's [data-adm-ep-online]
|
||||
// attribute by loadEndpoints(), so both buttons just iterate the DOM
|
||||
// without re-fetching anything they don't already have.
|
||||
const _refreshOfflineCount = () => {
|
||||
const lbl = el('adm-epOfflineCount');
|
||||
if (!lbl) return;
|
||||
const n = document.querySelectorAll('[data-adm-ep-id] [data-adm-ep-online="0"]').length;
|
||||
lbl.textContent = n > 0 ? `(${n})` : '';
|
||||
// Keep the button enabled even when there are no offline rows — a
|
||||
// click on the empty case fires a toast instead of feeling dead.
|
||||
const btn = el('adm-epClearOfflineBtn');
|
||||
if (btn) btn.style.opacity = n === 0 ? '0.55' : '0.85';
|
||||
};
|
||||
// Wire after every loadEndpoints() run by patching the render hook —
|
||||
// simplest path: MutationObserver on the two list containers.
|
||||
const _obsRoots = ['adm-epList-local', 'adm-epList-api']
|
||||
.map(id => el(id)).filter(Boolean);
|
||||
if (_obsRoots.length) {
|
||||
const mo = new MutationObserver(_refreshOfflineCount);
|
||||
_obsRoots.forEach(r => mo.observe(r, { childList: true, subtree: true }));
|
||||
_refreshOfflineCount();
|
||||
}
|
||||
|
||||
const probeAllBtn = el('adm-epProbeAllBtn');
|
||||
if (probeAllBtn) {
|
||||
probeAllBtn.addEventListener('click', async () => {
|
||||
probeAllBtn.disabled = true;
|
||||
const origHTML = probeAllBtn.innerHTML;
|
||||
probeAllBtn.innerHTML = '<span style="opacity:0.7;">Probing…</span>';
|
||||
try {
|
||||
// Hit the bulk local probe (same one the model picker uses).
|
||||
await fetch('/api/model-endpoints/probe-local', { credentials: 'same-origin' }).catch(() => {});
|
||||
// Then per-endpoint /probe for the rest so API/cloud endpoints
|
||||
// refresh too. Parallel — capped to 6 at a time so we don't
|
||||
// hammer the backend on a big list.
|
||||
const ids = Array.from(document.querySelectorAll('[data-adm-ep-id]')).map(r => r.getAttribute('data-adm-ep-id')).filter(Boolean);
|
||||
const lane = async (id) => {
|
||||
try { await fetch(`/api/model-endpoints/${id}/probe`, { credentials: 'same-origin' }); } catch (_) {}
|
||||
};
|
||||
const queue = [...ids];
|
||||
const workers = Array.from({length: Math.min(6, queue.length)}, () => (async () => {
|
||||
while (queue.length) {
|
||||
const id = queue.shift();
|
||||
if (id) await lane(id);
|
||||
}
|
||||
})());
|
||||
await Promise.all(workers);
|
||||
await loadEndpoints();
|
||||
if (uiModule && uiModule.showToast) uiModule.showToast('Endpoint status refreshed', 1800);
|
||||
} finally {
|
||||
probeAllBtn.innerHTML = origHTML;
|
||||
probeAllBtn.disabled = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const clearOfflineBtn = el('adm-epClearOfflineBtn');
|
||||
if (clearOfflineBtn) {
|
||||
clearOfflineBtn.addEventListener('click', async () => {
|
||||
const offlineBtns = Array.from(document.querySelectorAll('[data-adm-del-ep][data-adm-ep-online="0"]'));
|
||||
const ids = offlineBtns.map(b => b.getAttribute('data-adm-del-ep')).filter(Boolean);
|
||||
if (!ids.length) {
|
||||
if (uiModule && uiModule.showToast) {
|
||||
uiModule.showToast('No offline endpoints — nothing to clear', 1800);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const confirmMsg = ids.length === 1
|
||||
? 'Remove 1 offline endpoint?'
|
||||
: `Remove ${ids.length} offline endpoints?`;
|
||||
if (uiModule && uiModule.styledConfirm) {
|
||||
const ok = await uiModule.styledConfirm(confirmMsg, { confirmText: 'Remove', danger: true });
|
||||
if (!ok) return;
|
||||
} else if (!confirm(confirmMsg)) {
|
||||
return;
|
||||
}
|
||||
clearOfflineBtn.disabled = true;
|
||||
// Optimistic UI: pull rows immediately, then fire the DELETEs.
|
||||
offlineBtns.forEach(b => {
|
||||
const row = b.closest('[data-adm-ep-id]');
|
||||
if (row) row.remove();
|
||||
});
|
||||
await Promise.all(ids.map(id =>
|
||||
fetch('/api/model-endpoints/' + id, { method: 'DELETE', credentials: 'same-origin' }).catch(() => {})
|
||||
));
|
||||
try { await loadEndpoints(); } catch (_) {}
|
||||
_refreshOfflineCount();
|
||||
if (uiModule && uiModule.showToast) uiModule.showToast(`Removed ${ids.length} offline endpoint${ids.length === 1 ? '' : 's'}`, 1800);
|
||||
});
|
||||
}
|
||||
|
||||
// Clear-on-focus for the API key inputs. The fields are type=password so the
|
||||
// value is masked; users can't see what's there to edit it in place, so the
|
||||
// expected gesture is "click in, type new key". Wiping on focus removes the
|
||||
// select-all-and-delete dance.
|
||||
const _wireClearOnFocus = (id) => {
|
||||
const inp = el(id);
|
||||
if (!inp) return;
|
||||
inp.addEventListener('focus', () => {
|
||||
if (inp.value) inp.value = '';
|
||||
});
|
||||
};
|
||||
_wireClearOnFocus('adm-epLocalApiKey');
|
||||
_wireClearOnFocus('adm-epApiKey');
|
||||
|
||||
// Drop the Ollama provider logo into the Ollama Quickstart button. Reuses
|
||||
// the same SVG the provider picker uses, so brand parity stays free.
|
||||
try {
|
||||
const _ollamaLogoSlot = document.querySelector('#adm-epOllamaBtn .adm-ollama-logo');
|
||||
if (_ollamaLogoSlot) {
|
||||
const svg = providerLogo('ollama') || '';
|
||||
if (svg) _ollamaLogoSlot.innerHTML = svg;
|
||||
}
|
||||
} catch (_) {}
|
||||
|
||||
// Local "Add" button — sibling form for self-hosted base URLs.
|
||||
const localAddBtn = el('adm-epLocalAddBtn');
|
||||
const localTestBtn = el('adm-epLocalTestBtn');
|
||||
@@ -2073,17 +2211,28 @@ async function loadTokens() {
|
||||
}
|
||||
|
||||
function initTokenForm() {
|
||||
el('adm-tokenAddBtn').addEventListener('click', async () => {
|
||||
const addBtn = el('adm-tokenAddBtn');
|
||||
if (!addBtn || addBtn.dataset.bound) return;
|
||||
addBtn.dataset.bound = '1';
|
||||
addBtn.addEventListener('click', async () => {
|
||||
const msg = el('adm-tokenMsg');
|
||||
const reveal = el('adm-tokenReveal');
|
||||
msg.textContent = ''; msg.className = ''; reveal.style.display = 'none';
|
||||
const name = el('adm-tokenName').value.trim();
|
||||
if (!name) { msg.textContent = 'Token name is required'; msg.className = 'admin-error'; return; }
|
||||
const fd = new FormData(); fd.append('name', name);
|
||||
const scopes = (el('adm-tokenScopes')?.value || '').trim();
|
||||
if (scopes) fd.append('scopes', scopes);
|
||||
try {
|
||||
const res = await fetch('/api/tokens', { method: 'POST', body: fd, credentials: 'same-origin' });
|
||||
const data = await res.json();
|
||||
if (res.ok) { el('adm-tokenValue').textContent = data.token; reveal.style.display = ''; el('adm-tokenName').value = ''; loadTokens(); }
|
||||
if (res.ok) {
|
||||
el('adm-tokenValue').textContent = data.token;
|
||||
reveal.style.display = '';
|
||||
el('adm-tokenName').value = '';
|
||||
if (el('adm-tokenScopes')) el('adm-tokenScopes').value = '';
|
||||
loadTokens();
|
||||
}
|
||||
else { msg.textContent = data.detail || 'Failed'; msg.className = 'admin-error'; }
|
||||
} catch (e) { msg.textContent = 'Request failed'; msg.className = 'admin-error'; }
|
||||
});
|
||||
@@ -2344,7 +2493,7 @@ function initDangerZone() {
|
||||
═══════════════════════════════════════════ */
|
||||
function initAll() {
|
||||
modalEl = el('settings-modal');
|
||||
const inits = [initSignupToggle, initAddUser, initEndpointForm, initMcpForm, initCalDAV, initBackup, initDangerZone, () => settingsModule.initIntegrations()];
|
||||
const inits = [initSignupToggle, initAddUser, initEndpointForm, initMcpForm, initCalDAV, initBackup, initDangerZone, initTokenForm, () => settingsModule.initIntegrations()];
|
||||
for (const fn of inits) {
|
||||
try { fn(); } catch (e) { console.error('Admin init error in', fn.name || 'anonymous', e); }
|
||||
}
|
||||
@@ -2357,6 +2506,7 @@ function refreshAll() {
|
||||
loadEndpoints();
|
||||
loadBuiltinTools();
|
||||
loadMcpServers();
|
||||
loadTokens();
|
||||
}
|
||||
|
||||
/* ═══════════════════════════════════════════
|
||||
|
||||
@@ -2118,6 +2118,28 @@ export function addMessage(role, content, modelName, metadata) {
|
||||
return lastWrap;
|
||||
}
|
||||
|
||||
// --- Wake-task / supervisor system check-in ---
|
||||
// The self-wake mechanism injects "Did you finish?" as a user message
|
||||
// (or persisted history shows a "[Task] Self-check: <id>" envelope)
|
||||
// so the agent loop re-enters and re-checks status. Render as a
|
||||
// normal user-style bubble — same chrome as a real user message,
|
||||
// just with role "Supervisor" and a short summary body — instead of
|
||||
// a slim system chip. Matches chat style and integrates cleanly
|
||||
// into the conversation flow.
|
||||
let _isWakeCheck = !!(metadata?.wake_check_in || metadata?.hidden_from_user_view);
|
||||
if (!_isWakeCheck && typeof textRaw === 'string') {
|
||||
// Also catch historical messages persisted as "[Task] Self-check: <sid>"
|
||||
// (older wake tasks that didn't set wake_check_in metadata).
|
||||
if (/^\s*\[Task\]\s+Self-check:/i.test(textRaw)) {
|
||||
_isWakeCheck = true;
|
||||
}
|
||||
}
|
||||
if (_isWakeCheck) {
|
||||
// Supervisor self-check messages are an internal control signal —
|
||||
// skip rendering entirely so they don't show up in the conversation.
|
||||
return null;
|
||||
}
|
||||
|
||||
// --- Standard single-bubble message ---
|
||||
const wrap = document.createElement('div');
|
||||
wrap.className = 'msg ' + (role === 'user' ? 'msg-user' : 'msg-ai');
|
||||
|
||||
@@ -610,12 +610,47 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
|
||||
? `Suggested action: ${fixes[0].label}.`
|
||||
: 'Suggested action: copy the error and adjust the serve settings.');
|
||||
|
||||
// Simplified diagnosis card: just the error message + suggestion + fix
|
||||
// button(s). Removed the fold toggle, copy button, and × dismiss — they
|
||||
// made the card noisy without earning their keep. _diagCollapsed is kept
|
||||
// as a stub so callers don't have to change.
|
||||
panel._diagCollapsed = false;
|
||||
|
||||
// Top-right toolbar: Copy bundle + × dismiss. Restored after user feedback
|
||||
// — without them there's no way to quietly close a stale diagnosis or grab
|
||||
// the full error+context for a forum/discord paste.
|
||||
const toolbar = document.createElement('div');
|
||||
toolbar.className = 'cookbook-diag-toolbar';
|
||||
toolbar.style.cssText = 'display:flex;justify-content:flex-end;align-items:center;gap:4px;margin-bottom:-2px;';
|
||||
|
||||
const copyBtn = document.createElement('button');
|
||||
copyBtn.type = 'button';
|
||||
copyBtn.className = 'cookbook-diag-copy';
|
||||
copyBtn.title = 'Copy diagnosis details';
|
||||
copyBtn.setAttribute('aria-label', 'Copy diagnosis');
|
||||
copyBtn.innerHTML = '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>';
|
||||
copyBtn.addEventListener('click', async (e) => {
|
||||
e.stopPropagation();
|
||||
const bundle = _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText);
|
||||
try {
|
||||
await navigator.clipboard.writeText(bundle);
|
||||
copyBtn.classList.add('copied');
|
||||
setTimeout(() => { if (copyBtn.isConnected) copyBtn.classList.remove('copied'); }, 1200);
|
||||
} catch (_) {}
|
||||
});
|
||||
|
||||
const dismissBtn = document.createElement('button');
|
||||
dismissBtn.type = 'button';
|
||||
dismissBtn.className = 'cookbook-diag-dismiss';
|
||||
dismissBtn.title = 'Dismiss diagnosis';
|
||||
dismissBtn.setAttribute('aria-label', 'Dismiss');
|
||||
dismissBtn.textContent = '×';
|
||||
dismissBtn.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
panel._diagDismissed = diagnosis.message;
|
||||
_clearDiagnosis(panel);
|
||||
});
|
||||
|
||||
toolbar.appendChild(copyBtn);
|
||||
toolbar.appendChild(dismissBtn);
|
||||
diag.appendChild(toolbar);
|
||||
|
||||
const body = document.createElement('div');
|
||||
body.className = 'cookbook-diag-body';
|
||||
const msg = document.createElement('div');
|
||||
|
||||
+159
-8
@@ -416,9 +416,11 @@ function _hwfitShowError(list, host, detail) {
|
||||
if (rb) rb.addEventListener('click', () => { _resetGpuToggleState(); _hwfitFetch(true); });
|
||||
}
|
||||
|
||||
// Client-side "Engine" filter (llama.cpp / vLLM / SGLang). Empty = show all.
|
||||
// Uses the same _detectBackend() the serve commands use, so what you filter to
|
||||
// is exactly what would be launched. Pure view filter — no refetch needed.
|
||||
// Client-side "Engine" filter (llama.cpp / vLLM / SGLang / Ollama). Empty =
|
||||
// show all. Uses the same _detectBackend() the serve commands use, so what you
|
||||
// filter to is exactly what would be launched. Pure view filter — no refetch
|
||||
// needed. Ollama rows are merged into the main list (see _ensureOllamaLib +
|
||||
// _ollamaToHwfitRows below) so the filter handles all engines uniformly.
|
||||
function _applyEngineFilter(models) {
|
||||
const want = document.getElementById('hwfit-engine')?.value || '';
|
||||
if (!want || !Array.isArray(models)) return models || [];
|
||||
@@ -427,6 +429,86 @@ function _applyEngineFilter(models) {
|
||||
});
|
||||
}
|
||||
|
||||
// Ollama library cache (per-page). Filled lazily on first _hwfitFetch; the raw
|
||||
// list is the same shape returned by /api/cookbook/ollama/library, then turned
|
||||
// into per-tag hwfit rows so they slot into the main list grid alongside HF
|
||||
// scan results.
|
||||
let _ollamaLibCache = null;
|
||||
async function _ensureOllamaLib() {
|
||||
if (_ollamaLibCache) return _ollamaLibCache;
|
||||
try {
|
||||
const res = await fetch('/api/cookbook/ollama/library');
|
||||
const data = await res.json();
|
||||
_ollamaLibCache = Array.isArray(data?.models) ? data.models : [];
|
||||
} catch { _ollamaLibCache = []; }
|
||||
return _ollamaLibCache;
|
||||
}
|
||||
|
||||
// Convert an Ollama library entry's sizes into per-tag hwfit rows. Shape
|
||||
// matches what _hwfitRenderList expects (fit_level, parameter_count,
|
||||
// required_gb, score, …) so the rows render identically to HF results.
|
||||
function _olParseSize(s) {
|
||||
// "14b" → 14, "1.5b" → 1.5, "8x7b" → 56 (rough), "135m" → 0.135, "latest" → null
|
||||
if (!s) return null;
|
||||
const low = s.toLowerCase();
|
||||
let m = low.match(/^(\d+(?:\.\d+)?)x(\d+(?:\.\d+)?)b$/);
|
||||
if (m) return parseFloat(m[1]) * parseFloat(m[2]);
|
||||
m = low.match(/^(\d+(?:\.\d+)?)b$/);
|
||||
if (m) return parseFloat(m[1]);
|
||||
m = low.match(/^(\d+(?:\.\d+)?)m$/);
|
||||
if (m) return parseFloat(m[1]) / 1000;
|
||||
return null;
|
||||
}
|
||||
function _ollamaToHwfitRows(libModels, vramAvail, ramAvail) {
|
||||
const out = [];
|
||||
if (!Array.isArray(libModels)) return out;
|
||||
for (const m of libModels) {
|
||||
const sizes = (Array.isArray(m.sizes) && m.sizes.length) ? m.sizes : ['latest'];
|
||||
for (const sz of sizes) {
|
||||
const params = _olParseSize(sz);
|
||||
// Ollama default GGUF is ~Q4_K_M. Rough VRAM estimate: 0.6 GB / B.
|
||||
const vramGb = params ? params * 0.6 : 0;
|
||||
let fitLevel = 'no_fit';
|
||||
if (vramGb && vramAvail) {
|
||||
if (vramGb <= vramAvail * 0.6) fitLevel = 'perfect';
|
||||
else if (vramGb <= vramAvail) fitLevel = 'good';
|
||||
else if (ramAvail && vramGb <= ramAvail) fitLevel = 'marginal';
|
||||
else fitLevel = 'too_tight';
|
||||
} else if (vramGb && ramAvail && vramGb <= ramAvail) {
|
||||
fitLevel = 'marginal';
|
||||
}
|
||||
const tag = `${m.name}:${sz}`;
|
||||
const paramsLabel = params
|
||||
? (params >= 1 ? params.toFixed(params >= 10 ? 0 : 1) + 'B' : (params * 1000).toFixed(0) + 'M')
|
||||
: '?';
|
||||
// A modest score so Ollama rows still sort sensibly in the default
|
||||
// score view — bigger models get a slightly higher base, but they
|
||||
// always come in below well-scored HF results. Sort by Fit or VRAM
|
||||
// to surface them more aggressively.
|
||||
const score = params ? Math.min(30 + params * 0.3, 60) : 25;
|
||||
out.push({
|
||||
name: tag,
|
||||
repo_id: tag,
|
||||
quant: 'Q4_K_M',
|
||||
parameter_count: paramsLabel,
|
||||
params_b: params || 0,
|
||||
required_gb: vramGb,
|
||||
fit_level: fitLevel,
|
||||
score,
|
||||
speed_tps: 0,
|
||||
context: 0,
|
||||
is_gguf: true,
|
||||
backend: 'ollama',
|
||||
_isOllama: true,
|
||||
_olName: m.name,
|
||||
_olSize: sz,
|
||||
_description: m.description || '',
|
||||
});
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export async function _hwfitFetch(fresh = false) {
|
||||
const _tk = ++_hwfitFetchToken;
|
||||
const useCase = document.getElementById('hwfit-usecase')?.value || '';
|
||||
@@ -475,7 +557,12 @@ export async function _hwfitFetch(fresh = false) {
|
||||
_setLastCacheHost(remoteKey);
|
||||
const _cacheSrv = _serverByVal(_envState.remoteServerKey || remoteHost);
|
||||
const _cachePort = _cacheSrv?.port || '';
|
||||
const _cacheParams = new URLSearchParams({ host: remoteHost }); if (_cachePort) _cacheParams.set('ssh_port', _cachePort); if (_cacheSrv?.platform) _cacheParams.set('platform', _cacheSrv.platform);
|
||||
const _cacheParams = new URLSearchParams();
|
||||
if (remoteHost) {
|
||||
_cacheParams.set('host', remoteHost);
|
||||
if (_cachePort) _cacheParams.set('ssh_port', _cachePort);
|
||||
if (_cacheSrv?.platform) _cacheParams.set('platform', _cacheSrv.platform);
|
||||
}
|
||||
fetch(`/api/model/cached?${_cacheParams}`, { credentials: 'same-origin' })
|
||||
.then(r => r.json())
|
||||
.then(d => {
|
||||
@@ -543,7 +630,18 @@ export async function _hwfitFetch(fresh = false) {
|
||||
// A newer scan started while this one was in flight (user switched servers
|
||||
// mid-probe) — drop this stale response so it can't clobber the new one.
|
||||
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
|
||||
if (!res.ok) throw new Error(res.statusText);
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
let msg = '';
|
||||
try {
|
||||
const payload = JSON.parse(body);
|
||||
msg = payload && (payload.detail || payload.error || payload.message);
|
||||
} catch {
|
||||
msg = body;
|
||||
}
|
||||
msg = typeof msg === 'string' ? msg.trim() : '';
|
||||
throw new Error(`HTTP ${res.status} ${res.statusText}${msg ? `: ${msg}` : ''}`);
|
||||
}
|
||||
let data = await res.json();
|
||||
if (_tk !== _hwfitFetchToken) { try { wp.destroy(); } catch {} return; }
|
||||
if (!isImageMode && quantPref && !data.error && Array.isArray(data.models) && data.models.length === 0) {
|
||||
@@ -583,6 +681,23 @@ export async function _hwfitFetch(fresh = false) {
|
||||
if (!_cached) { _hwfitShowError(list, remoteHost, data.error); if (hw) hw.innerHTML = ''; }
|
||||
return;
|
||||
}
|
||||
// Merge Ollama library rows into the main list so they appear with the
|
||||
// same Fit/Param/Quant/VRAM/Mode columns as HF results and respond to the
|
||||
// Engine filter. Skipped in image-gen mode (Ollama doesn't serve diffusers).
|
||||
if (!isImageMode) {
|
||||
const _vramAvail = data.system?.gpu_vram_gb || 0;
|
||||
const _ramAvail = data.system?.total_ram_gb || 0;
|
||||
const _lib = await _ensureOllamaLib();
|
||||
const _olRows = _ollamaToHwfitRows(_lib, _vramAvail, _ramAvail);
|
||||
// Search filter on Ollama rows: HF API already filters by search; do the
|
||||
// same client-side over Ollama name + description so the search box
|
||||
// works consistently across both sources.
|
||||
const _s = (search || '').trim().toLowerCase();
|
||||
const _olFiltered = _s
|
||||
? _olRows.filter(r => r.name.toLowerCase().includes(_s) || (r._description || '').toLowerCase().includes(_s))
|
||||
: _olRows;
|
||||
data.models = (data.models || []).concat(_olFiltered);
|
||||
}
|
||||
_hwfitCache = data;
|
||||
_hwfitRenderHw(hw, data.system);
|
||||
// Propagate local platform from hardware probe so _isWindows(task) works
|
||||
@@ -964,14 +1079,36 @@ export function _hwfitRenderList(el, models) {
|
||||
html += `</div>`;
|
||||
}
|
||||
el.innerHTML = html;
|
||||
// Click row → expand inline action panel
|
||||
// Click row → expand inline action panel. Exception: Ollama rows skip the
|
||||
// expand panel (no HF metadata to power it) and just fill the Download
|
||||
// input with the `<name>:<size>` tag — one click → ready to pull.
|
||||
el.querySelectorAll('.hwfit-row:not(.hwfit-header)').forEach(row => {
|
||||
row.addEventListener('click', () => {
|
||||
const name = row.dataset.model;
|
||||
if (!name) return;
|
||||
// Find model data from cache
|
||||
const modelData = (_hwfitCache?.models || []).find(m => m.name === name);
|
||||
if (!modelData) return;
|
||||
if (modelData._isOllama) {
|
||||
// Force-open the Download card if it's been collapsed — otherwise
|
||||
// filling the (hidden) input silently swallows the click.
|
||||
const dlBody = document.getElementById('cookbook-download-card-body');
|
||||
const dlArrow = document.getElementById('cookbook-download-card-arrow');
|
||||
if (dlBody && dlBody.style.display === 'none') {
|
||||
dlBody.style.display = 'block';
|
||||
if (dlArrow) dlArrow.style.transform = 'rotate(90deg)';
|
||||
}
|
||||
const dlInput = document.getElementById('cookbook-dl-repo');
|
||||
if (dlInput) {
|
||||
dlInput.value = modelData.name;
|
||||
dlInput.focus();
|
||||
// Briefly highlight so the user sees what got filled even when the
|
||||
// download card sits far above the (long) hwfit list.
|
||||
dlInput.classList.add('cookbook-dl-flash');
|
||||
setTimeout(() => dlInput.classList.remove('cookbook-dl-flash'), 800);
|
||||
dlInput.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
_expandModelRow(row, modelData);
|
||||
});
|
||||
});
|
||||
@@ -1297,7 +1434,7 @@ export function _hwfitInit() {
|
||||
if (sort) sort.addEventListener('change', () => _hwfitFetch());
|
||||
if (qpref) qpref.addEventListener('change', () => _hwfitFetch());
|
||||
// Engine filter is a pure client-side view filter over the already-fetched
|
||||
// list, so just re-render from cache instead of re-probing hardware.
|
||||
// list (HF + Ollama merged), so just re-render from cache.
|
||||
const engine = document.getElementById('hwfit-engine');
|
||||
if (engine) engine.addEventListener('change', () => {
|
||||
const list = document.getElementById('hwfit-list');
|
||||
@@ -1694,6 +1831,15 @@ export function _hwfitInit() {
|
||||
saveBtn.addEventListener('click', () => {
|
||||
_syncServers();
|
||||
_rebuildServerSelect();
|
||||
// Broadcast for anything outside the settings tab that depends on
|
||||
// the server list (Serve dialog host picker, Running tasks, etc.).
|
||||
// Without this the user had to hard-refresh to see the new entry
|
||||
// in those other places.
|
||||
try {
|
||||
document.dispatchEvent(new CustomEvent('cookbook:servers-changed', {
|
||||
detail: { servers: _envState.servers.slice() },
|
||||
}));
|
||||
} catch (_) {}
|
||||
saveBtn.classList.add('saved');
|
||||
saveBtn.innerHTML = '<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="2.6" stroke-linecap="round" stroke-linejoin="round" style="margin-right:4px;flex-shrink:0;"><polyline points="20 6 9 17 4 12"/></svg>Saved';
|
||||
});
|
||||
@@ -1713,6 +1859,11 @@ export function _hwfitInit() {
|
||||
entry.remove();
|
||||
_syncServers();
|
||||
_rebuildServerSelect();
|
||||
try {
|
||||
document.dispatchEvent(new CustomEvent('cookbook:servers-changed', {
|
||||
detail: { servers: _envState.servers.slice() },
|
||||
}));
|
||||
} catch (_) {}
|
||||
_hwfitCache = null;
|
||||
_hwfitFetch();
|
||||
});
|
||||
|
||||
+262
-250
@@ -72,7 +72,7 @@ function _platformIcon(platform) {
|
||||
return '';
|
||||
}
|
||||
|
||||
export let _envState = { env: 'none', envPath: '', hfToken: '', hfTokenConfigured: false, hfTokenMasked: '', gpus: '', remoteHost: '', remoteServerKey: '', servers: [], modelPaths: [], platform: '', defaultServer: '' };
|
||||
export let _envState = { env: 'none', envPath: '', hfToken: '', hfTokenConfigured: false, hfTokenMasked: '', gpus: '', remoteHost: '', servers: [], modelPaths: [], platform: '', defaultServer: '' };
|
||||
let _lastCacheHostVal = null;
|
||||
let _cookbookOpeningSpinners = [];
|
||||
export function _lastCacheHost() { return _lastCacheHostVal; }
|
||||
@@ -89,8 +89,8 @@ function _setCookbookOpening(on) {
|
||||
].filter(Boolean);
|
||||
if (!on) {
|
||||
_cookbookOpeningSpinners.forEach(({ spinner, wrap, target }) => {
|
||||
try { spinner?.stop?.(); } catch { }
|
||||
try { wrap?.remove?.(); } catch { }
|
||||
try { spinner?.stop?.(); } catch {}
|
||||
try { wrap?.remove?.(); } catch {}
|
||||
target?.classList?.remove('cookbook-opening');
|
||||
});
|
||||
_cookbookOpeningSpinners = [];
|
||||
@@ -114,44 +114,18 @@ function _setCookbookOpening(on) {
|
||||
// True for the local server entry (empty / "local" / "localhost" host).
|
||||
function _isLocalEntry(s) { return !s || !s.host || s.host === 'local' || s.host.toLowerCase() === 'localhost'; }
|
||||
|
||||
// Resolve a dropdown option value to a server entry. New option values are
|
||||
// stable per-profile keys, so same-host SSH profiles stay distinguishable.
|
||||
// Host strings and numeric indices remain accepted for stale saved state.
|
||||
export function _serverKey(s) {
|
||||
if (_isLocalEntry(s)) return 'local';
|
||||
return 'srv:' + [
|
||||
s?.name || '',
|
||||
s?.host || '',
|
||||
s?.port || '',
|
||||
s?.envPath || '',
|
||||
s?.platform || '',
|
||||
].map(v => encodeURIComponent(String(v).trim())).join('|');
|
||||
}
|
||||
|
||||
// Resolve a dropdown option value to a server entry. Option values are the
|
||||
// stable HOST string ('local' for the local box) — NOT array indices — because
|
||||
// `_envState.servers` gets deduped/reordered, which made index-based selection
|
||||
// silently resolve to the wrong (or local) server. Accepts a numeric index too
|
||||
// for backwards-compat with any stale value.
|
||||
function _serverByVal(val) {
|
||||
if (val == null || val === 'local' || val === '') return null;
|
||||
const raw = String(val);
|
||||
let s = _envState.servers.find(x => _serverKey(x) === raw);
|
||||
if (!s) s = _envState.servers.find(x => x.host === raw);
|
||||
let s = _envState.servers.find(x => x.host === val);
|
||||
if (!s && /^\d+$/.test(String(val))) s = _envState.servers[parseInt(val)];
|
||||
return s || null;
|
||||
}
|
||||
|
||||
export function _selectedServer() {
|
||||
if (_envState.remoteServerKey) {
|
||||
const keyed = _serverByVal(_envState.remoteServerKey);
|
||||
if (keyed) return keyed;
|
||||
}
|
||||
if (_envState.remoteHost) return _envState.servers.find(s => s.host === _envState.remoteHost) || null;
|
||||
return null;
|
||||
}
|
||||
|
||||
export function _currentServerValue() {
|
||||
const selected = _selectedServer();
|
||||
if (selected) return _serverKey(selected);
|
||||
return _envState.remoteHost || 'local';
|
||||
}
|
||||
|
||||
function _buildServerOpts(excludeLocal = false) {
|
||||
// The local server is ALWAYS represented by the synthetic value="local" option
|
||||
// (showing its custom name from the "server name" feature). We must therefore
|
||||
@@ -160,20 +134,13 @@ function _buildServerOpts(excludeLocal = false) {
|
||||
const _localSrv = _localIdx >= 0 ? _envState.servers[_localIdx] : null;
|
||||
const _localLabel = (_localSrv && _localSrv.name) ? _localSrv.name : 'Local';
|
||||
let html = `<option value="local"${!_envState.remoteHost ? ' selected' : ''}>${esc(_localLabel)}</option>`;
|
||||
const selectedKey = _envState.remoteServerKey || '';
|
||||
let legacyHostSelected = false;
|
||||
for (let i = 0; i < _envState.servers.length; i++) {
|
||||
const s = _envState.servers[i];
|
||||
if (i === _localIdx) continue; // already the synthetic "local" option
|
||||
if (excludeLocal && _isLocalEntry(s)) continue;
|
||||
const label = s.name || s.host || `Server ${i + 1}`;
|
||||
const value = _serverKey(s);
|
||||
let selected = selectedKey ? value === selectedKey : false;
|
||||
if (!selectedKey && _envState.remoteHost === s.host && !legacyHostSelected) {
|
||||
selected = true;
|
||||
legacyHostSelected = true;
|
||||
}
|
||||
html += `<option value="${esc(value)}"${selected ? ' selected' : ''}>${esc(label)}</option>`;
|
||||
const selected = _envState.remoteHost === s.host ? ' selected' : '';
|
||||
html += `<option value="${esc(s.host)}"${selected}>${esc(label)}</option>`;
|
||||
}
|
||||
return html;
|
||||
}
|
||||
@@ -187,41 +154,16 @@ export function _sshCmd(host, cmd, port) {
|
||||
/** Get SSH port for a given host (or task object) */
|
||||
function _getPort(hostOrTask) {
|
||||
if (!hostOrTask) return '';
|
||||
if (typeof hostOrTask === 'object') return hostOrTask.sshPort || _getPort(hostOrTask.remoteServerKey || hostOrTask.remoteHost);
|
||||
const selected = hostOrTask === _envState.remoteHost ? _selectedServer() : null;
|
||||
const srv = selected || _serverByVal(hostOrTask);
|
||||
if (typeof hostOrTask === 'object') return hostOrTask.sshPort || _getPort(hostOrTask.remoteHost);
|
||||
const srv = _envState.servers.find(s => s.host === hostOrTask);
|
||||
return srv?.port || '';
|
||||
}
|
||||
|
||||
/** Get platform for a given host (or task object). Returns 'windows', 'termux', 'linux', or '' */
|
||||
export function _getPlatform(hostOrTask) {
|
||||
const isWinBrowser = (window.navigator.userAgent || window.navigator.platform || '').toLowerCase().includes('win');
|
||||
// The browser's OS is NOT the server's OS when the UI is opened remotely —
|
||||
// e.g. a Windows browser driving a Mac/Linux homeserver. Trusting the
|
||||
// user-agent there makes the serve builder emit the Windows python-only
|
||||
// shape (`python -m llama_cpp.server`, no `llama-server ||` fallback), which
|
||||
// then fails on the actual Unix server. The local hardware probe is
|
||||
// authoritative: it reports a backend (metal/cuda/rocm/cpu_*) for any Unix
|
||||
// server and carries platform:"windows" for local Windows (which sets
|
||||
// _envState.platform, short-circuiting below). So only fall back to the
|
||||
// browser hint when we have no server-side signal at all.
|
||||
const localPlatform = () => {
|
||||
if (_envState.platform) return _envState.platform;
|
||||
if (String(_hwfitCache?.system?.backend || '')) return '';
|
||||
return isWinBrowser ? 'windows' : '';
|
||||
};
|
||||
if (!hostOrTask || hostOrTask === 'local') {
|
||||
return localPlatform();
|
||||
}
|
||||
if (typeof hostOrTask === 'object') {
|
||||
const h = hostOrTask.remoteHost;
|
||||
if (!h || h === 'local') {
|
||||
return hostOrTask.platform || localPlatform();
|
||||
}
|
||||
return hostOrTask.platform || _getPlatform(hostOrTask.remoteServerKey || h);
|
||||
}
|
||||
const selected = hostOrTask === _envState.remoteHost ? _selectedServer() : null;
|
||||
const srv = selected || _serverByVal(hostOrTask);
|
||||
if (!hostOrTask) return _envState.platform || '';
|
||||
if (typeof hostOrTask === 'object') return hostOrTask.platform || _getPlatform(hostOrTask.remoteHost);
|
||||
const srv = _envState.servers.find(s => s.host === hostOrTask);
|
||||
return srv?.platform || '';
|
||||
}
|
||||
|
||||
@@ -237,19 +179,6 @@ export function _isMetal() {
|
||||
return ['metal', 'mps', 'apple'].includes(String(_hwfitCache?.system?.backend || '').toLowerCase());
|
||||
}
|
||||
|
||||
const GEMMA4_THINKING_CHAT_TEMPLATE = `{% for message in messages %}{% if message['role'] == 'system' %}<|turn>system\n<|think|>{{ message['content'] }}<turn|>\n{% elif message['role'] == 'user' %}<|turn>user\n{{ message['content'] }}<turn|>\n{% elif message['role'] == 'assistant' %}<|turn>model\n{{ message['content'] }}<turn|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|turn>model\n<|channel>thought{% endif %}`;
|
||||
|
||||
function _isGemma4ThinkingModel(modelName) {
|
||||
const n = (modelName || '').toLowerCase();
|
||||
return n.includes('gemma-4') || n.includes('gemma4');
|
||||
}
|
||||
|
||||
function _gemma4ThinkingChatTemplateArg(modelName) {
|
||||
return _isGemma4ThinkingModel(modelName)
|
||||
? _shellQuote(GEMMA4_THINKING_CHAT_TEMPLATE)
|
||||
: '';
|
||||
}
|
||||
|
||||
/** Detect model-specific vLLM optimizations */
|
||||
function _detectModelOptimizations(modelName) {
|
||||
const n = (modelName || '').toLowerCase();
|
||||
@@ -326,7 +255,10 @@ export function _detectToolParser(modelName) {
|
||||
// ── Backend detection ──
|
||||
|
||||
export function _detectBackend(model) {
|
||||
if (model?.backend === 'ollama' || model?.is_ollama) {
|
||||
const _ollamaName = String(model?.repo_id || model?.name || model?.id || '').trim();
|
||||
const _ollamaMeta = `${model?.backend || ''} ${model?.endpoint_kind || ''} ${model?.provider || ''} ${model?.source || ''}`.toLowerCase();
|
||||
const _looksLikeOllamaTag = /^[A-Za-z0-9][A-Za-z0-9._-]*(?::[A-Za-z0-9][A-Za-z0-9._-]*)$/.test(_ollamaName);
|
||||
if (model?.backend === 'ollama' || model?.is_ollama || _ollamaMeta.includes('ollama') || _looksLikeOllamaTag) {
|
||||
return { backend: 'ollama', label: 'Ollama' };
|
||||
}
|
||||
const q = (model.quant || '').toUpperCase();
|
||||
@@ -450,8 +382,6 @@ export function _buildServeCmd(f, modelName, backend) {
|
||||
const _extraEnv = (f.extra_env ?? '').toString().replace(/\s+/g, ' ').trim();
|
||||
if (_extraEnv) cmd += _extraEnv + ' ';
|
||||
cmd += `${_vllmBin} serve ${modelName} --host 0.0.0.0 --port ${f.port || '8000'}`;
|
||||
const _gemma4ChatTemplate = _gemma4ThinkingChatTemplateArg(modelName);
|
||||
if (_gemma4ChatTemplate) cmd += ` --chat-template ${_gemma4ChatTemplate}`;
|
||||
cmd += ` --tensor-parallel-size ${f.tp || '1'}`;
|
||||
cmd += ` --max-model-len ${f.ctx || '8192'}`;
|
||||
cmd += ` --gpu-memory-utilization ${f.gpu_mem || '0.90'}`;
|
||||
@@ -482,8 +412,6 @@ export function _buildServeCmd(f, modelName, backend) {
|
||||
const _extraEnv = (f.extra_env ?? '').toString().replace(/\s+/g, ' ').trim();
|
||||
if (_extraEnv) cmd += _extraEnv + ' ';
|
||||
cmd += `${_py3Bin} -m sglang.launch_server --model-path ${modelName} --host 0.0.0.0 --port ${f.port || '30000'}`;
|
||||
const _gemma4ChatTemplate = _gemma4ThinkingChatTemplateArg(modelName);
|
||||
if (_gemma4ChatTemplate) cmd += ` --chat-template ${_gemma4ChatTemplate}`;
|
||||
if (f.tp && f.tp !== '1') cmd += ` --tp ${f.tp}`;
|
||||
if (f.ctx) cmd += ` --context-length ${f.ctx}`;
|
||||
if (f.gpu_mem && f.gpu_mem !== '0.90') cmd += ` --mem-fraction-static ${f.gpu_mem}`;
|
||||
@@ -585,9 +513,34 @@ export function _buildServeCmd(f, modelName, backend) {
|
||||
}
|
||||
} else if (backend === 'ollama') {
|
||||
const ollamaPort = f.port || '11434';
|
||||
const bindHost = _envState.remoteHost ? '0.0.0.0' : '127.0.0.1';
|
||||
const hostEnv = ollamaPort !== '11434' ? `OLLAMA_HOST=${bindHost}:${ollamaPort} ` : '';
|
||||
cmd = `${hostEnv}ollama serve`;
|
||||
// GGUF + Ollama: delegate to the iGPU-bound ollama-test container via
|
||||
// its /usr/local/bin/ollama-import helper. Plain `ollama serve` errors
|
||||
// 127 on hosts where ollama isn't on PATH (and even when it is, it
|
||||
// doesn't import the GGUF — it just starts the daemon). Args are all
|
||||
// literal so the cookbook validator (which bans &&/||/;/$() ) is
|
||||
// happy: `docker exec ollama-test ollama-import <repo> <name> <ctx>
|
||||
// <file>`. The helper handles the find/Modelfile/preload dance.
|
||||
if (modelName.includes('/') && (f.gguf_file || /-GGUF$/i.test(modelName))) {
|
||||
// HF-GGUF repo → import + preload + tail
|
||||
const _name = (modelName.split('/').pop() || modelName)
|
||||
.replace(/-GGUF$/i, '')
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9._:-]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '');
|
||||
const _ctx = f.ctx || '8192';
|
||||
const _file = (f.gguf_file || '').split('/').pop() || '';
|
||||
// Trailing GGUF_FILE is optional; helper picks the first match if empty.
|
||||
cmd = `docker exec ollama-test ollama-import ${modelName} ${_name} ${_ctx}${_file ? ' ' + _file : ''}`;
|
||||
} else if (!modelName.includes('/') && modelName) {
|
||||
// Already-pulled Ollama tag (e.g. `qwen2.5:7b`). On kierkegaard the
|
||||
// runtime is the ROCm Ollama sidecar; this quick command verifies the
|
||||
// tag exists, then the backend auto-registers http://host.docker.internal:11434/v1.
|
||||
cmd = `docker exec ollama-rocm ollama show ${modelName}`;
|
||||
} else {
|
||||
const bindHost = _envState.remoteHost ? '0.0.0.0' : '127.0.0.1';
|
||||
const hostEnv = ollamaPort !== '11434' ? `OLLAMA_HOST=${bindHost}:${ollamaPort} ` : '';
|
||||
cmd = `${hostEnv}ollama serve`;
|
||||
}
|
||||
} else if (backend === 'diffusers') {
|
||||
const gpuStr = f.gpus?.trim();
|
||||
if (gpuStr) cmd += `CUDA_VISIBLE_DEVICES=${gpuStr} `;
|
||||
@@ -630,7 +583,7 @@ function _fallbackCopy(text) {
|
||||
ta.style.cssText = 'position:fixed;left:-9999px;top:-9999px';
|
||||
document.body.appendChild(ta);
|
||||
ta.select();
|
||||
try { document.execCommand('copy'); } catch (_) { }
|
||||
try { document.execCommand('copy'); } catch (_) {}
|
||||
document.body.removeChild(ta);
|
||||
return Promise.resolve();
|
||||
}
|
||||
@@ -663,7 +616,7 @@ function _readStoredEnvState() {
|
||||
|
||||
export function _persistEnvState() {
|
||||
try { localStorage.setItem(LAST_STATE_KEY, JSON.stringify(_envStateForStorage())); }
|
||||
catch (_) { }
|
||||
catch (_) {}
|
||||
_saveTasks(_loadTasks());
|
||||
}
|
||||
|
||||
@@ -712,24 +665,22 @@ async function _fetchDependencies() {
|
||||
const data = await resp.json();
|
||||
const pkgs = data.packages || [];
|
||||
if (!pkgs.length) { list.innerHTML = '<div class="hwfit-loading">No packages found</div>'; return; }
|
||||
const _winUnsupported = new Set(['vllm', 'rembg', 'gfpgan']);
|
||||
const _winUnsupported = new Set(['diffusers', 'hf_transfer', 'vllm', 'rembg', 'gfpgan']);
|
||||
|
||||
const _statusTag = (pkg, isLocal, isSystemDep, winBlocked) => {
|
||||
if (winBlocked) return `<span class="cookbook-dep-tag cookbook-dep-na">N/A</span>`;
|
||||
const hasCustomInstall = !!pkg.install_cmd;
|
||||
const hasCustomUpdate = !!pkg.update_cmd;
|
||||
if (pkg.installed && isSystemDep && !hasCustomUpdate) return `<span class="cookbook-dep-tag cookbook-dep-installed" title="Found on selected server">Installed</span>`;
|
||||
if (pkg.installed && pkg.pip_update_available === false && !hasCustomUpdate) {
|
||||
if (pkg.installed && isSystemDep) return `<span class="cookbook-dep-tag cookbook-dep-installed" title="Found on selected server">Installed</span>`;
|
||||
if (pkg.installed && pkg.pip_update_available === false) {
|
||||
const tip = esc(pkg.update_note || pkg.status_note || 'Found externally; update outside Odysseus.');
|
||||
return `<span class="cookbook-dep-tag cookbook-dep-installed" title="${tip}">Installed</span>`;
|
||||
}
|
||||
if (pkg.installed) return `<button class="cookbook-dep-tag cookbook-dep-installed cookbook-dep-installed-btn" title="Installed — click for actions"><span class="cookbook-dep-installed-label">Installed</span><span class="cookbook-dep-caret">▾</span></button>`;
|
||||
if (isSystemDep && !hasCustomInstall) {
|
||||
if (isSystemDep) {
|
||||
const depTip = esc(pkg.install_hint || 'Install this OS package on the selected server.');
|
||||
const depLabel = pkg.applicable === false ? 'N/A ?' : 'Missing';
|
||||
return `<span class="cookbook-dep-tag cookbook-dep-na" title="${depTip}">${depLabel}</span>`;
|
||||
}
|
||||
return `<button class="cookbook-dep-tag cookbook-dep-install" data-dep-pip="${esc(pkg.pip || '')}" data-dep-install-cmd="${esc(pkg.install_cmd || '')}" data-dep-update-cmd="${esc(pkg.update_cmd || '')}" data-dep-target="${isLocal ? 'local' : 'remote'}">Install</button>`;
|
||||
return `<button class="cookbook-dep-tag cookbook-dep-install" data-dep-pip="${esc(pkg.pip)}" data-dep-target="${isLocal ? 'local' : 'remote'}">Install</button>`;
|
||||
};
|
||||
|
||||
const _depRow = (pkg) => {
|
||||
@@ -752,7 +703,7 @@ async function _fetchDependencies() {
|
||||
} else if (pkg.name === 'sglang' && pkg.installed) {
|
||||
_rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild cookbook-dep-reinstall" data-reinstall-pkg="sglang" title="Force-reinstall SGLang (pulls a matching torch). Runs as a tmux task in the Running tab.">Reinstall</button>`;
|
||||
}
|
||||
return `<div class="cookbook-dep-row${winBlocked ? ' cookbook-dep-blocked' : ''}" data-pkg-name="${esc(pkg.name)}" data-dep-pip="${esc(pkg.pip || '')}" data-dep-install-cmd="${esc(pkg.install_cmd || '')}" data-dep-update-cmd="${esc(pkg.update_cmd || '')}" data-dep-target="${isLocal ? 'local' : 'remote'}" data-dep-kind="${esc(pkg.kind || 'python')}">`
|
||||
return `<div class="cookbook-dep-row${winBlocked ? ' cookbook-dep-blocked' : ''}" data-pkg-name="${esc(pkg.name)}" data-dep-pip="${esc(pkg.pip || '')}" data-dep-target="${isLocal ? 'local' : 'remote'}" data-dep-kind="${esc(pkg.kind || 'python')}">`
|
||||
+ `<div class="cookbook-dep-info">`
|
||||
+ `<div class="memory-item-title">${esc(pkg.name)}</div>`
|
||||
+ `<div class="memory-item-meta" style="font-size:10px;opacity:0.5;margin-top:2px;">${esc(pkg.desc)}</div>`
|
||||
@@ -782,7 +733,7 @@ async function _fetchDependencies() {
|
||||
// Shared install/update routine — used by the Install button and the
|
||||
// "Update" item in an installed package's ⋮ menu. `upgrade` adds pip -U;
|
||||
// `statusEl`, when given, shows "Installing…/Updating…" and is disabled.
|
||||
async function _installDep(pipName, pkgName, isLocalOnly, upgrade, statusEl, actionCmd = '') {
|
||||
async function _installDep(pipName, pkgName, isLocalOnly, upgrade, statusEl) {
|
||||
if (isLocalOnly) {
|
||||
_envState.remoteHost = '';
|
||||
_envState.env = 'none';
|
||||
@@ -827,43 +778,6 @@ async function _fetchDependencies() {
|
||||
envPrefix = 'eval "$(conda shell.bash hook)" && conda activate ' + _shellQuote(_envState.envPath);
|
||||
}
|
||||
}
|
||||
|
||||
if (actionCmd) {
|
||||
const shellCmd = envPrefix ? `${envPrefix} ${actionCmd}` : actionCmd;
|
||||
const fullCmd = (!isLocalOnly && _envState.remoteHost)
|
||||
? _sshCmd(_envState.remoteHost, shellCmd, _getPort(_envState.remoteHost))
|
||||
: shellCmd;
|
||||
try {
|
||||
if (statusEl) { statusEl.textContent = upgrade ? 'Updating...' : 'Installing...'; statusEl.disabled = true; }
|
||||
const res = await fetch('/api/shell/stream', {
|
||||
method: 'POST', credentials: 'same-origin',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ command: fullCmd }),
|
||||
});
|
||||
uiModule.showToast(`${upgrade ? 'Updating' : 'Installing'} ${pkgName} on ${targetHost}...`);
|
||||
const body = await res.text();
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const exitMatches = [...body.matchAll(/"exit_code":\s*(-?\d+)/g)].map(m => Number(m[1]));
|
||||
const exitCode = exitMatches.length ? exitMatches[exitMatches.length - 1] : 0;
|
||||
if (exitCode !== 0) {
|
||||
throw new Error((body.slice(-500).trim() || `${pkgName} command failed`) + ` (exit ${exitCode})`);
|
||||
}
|
||||
|
||||
if (upgrade) { uiModule.showToast(`Successfully updated ${pkgName} on ${targetHost}.`); } else { uiModule.showToast(`Successfully installed ${pkgName} on ${targetHost}.`); }
|
||||
await _fetchDependencies();
|
||||
return;
|
||||
} catch (err) {
|
||||
if (statusEl) { statusEl.textContent = 'Install'; statusEl.disabled = false; }
|
||||
uiModule.showToast(`${upgrade ? 'Update' : 'Install'} failed: ` + err.message);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Always go through `python -m pip` so the leading token is `python`
|
||||
// — matches the /api/model/serve allow-list (bare `pip` is blocked).
|
||||
// Inside a venv/conda env, `--user` is invalid (pip refuses), so we
|
||||
// only add `--user --break-system-packages` when there's no env —
|
||||
// for PEP-668-locked system pythons (Arch, newer Debian).
|
||||
try {
|
||||
const reqBody = {
|
||||
repo_id: pipName,
|
||||
@@ -902,9 +816,8 @@ async function _fetchDependencies() {
|
||||
btn.addEventListener('click', async (e) => {
|
||||
e.stopPropagation();
|
||||
const pipName = btn.dataset.depPip;
|
||||
const installCmd = btn.dataset.depInstallCmd || '';
|
||||
const pkgName = btn.closest('.cookbook-dep-row')?.querySelector('.memory-item-title')?.textContent || pipName;
|
||||
await _installDep(pipName, pkgName, btn.dataset.depTarget === 'local', !!btn.dataset.upgrade, btn, installCmd);
|
||||
await _installDep(pipName, pkgName, btn.dataset.depTarget === 'local', !!btn.dataset.upgrade, btn);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -927,12 +840,11 @@ async function _fetchDependencies() {
|
||||
const it = document.createElement('div');
|
||||
it.className = 'dropdown-item-compact';
|
||||
it.innerHTML = `<span class="dropdown-icon">${upIco}</span><span>Update</span>`;
|
||||
it.title = row.dataset.depUpdateCmd ? `Update ${pkgName} using its custom command` : `Update ${pkgName} to the latest version (pip install -U)`;
|
||||
it.title = `Update ${pkgName} to the latest version (pip install -U)`;
|
||||
it.addEventListener('click', async (e) => {
|
||||
e.stopPropagation();
|
||||
dropdown.remove();
|
||||
const updateCmd = row.dataset.depUpdateCmd || '';
|
||||
await _installDep(pipName, pkgName, isLocalOnly, true, null, updateCmd);
|
||||
await _installDep(pipName, pkgName, isLocalOnly, true, null);
|
||||
});
|
||||
dropdown.appendChild(it);
|
||||
document.body.appendChild(dropdown);
|
||||
@@ -964,7 +876,6 @@ async function _fetchDependencies() {
|
||||
function _applyServerSelection(val) {
|
||||
if (val === 'local') {
|
||||
_envState.remoteHost = '';
|
||||
_envState.remoteServerKey = '';
|
||||
_envState.env = 'none';
|
||||
_envState.envPath = '';
|
||||
_envState.platform = '';
|
||||
@@ -972,7 +883,6 @@ function _applyServerSelection(val) {
|
||||
const s = _serverByVal(val);
|
||||
if (s) {
|
||||
_envState.remoteHost = s.host;
|
||||
_envState.remoteServerKey = _serverKey(s);
|
||||
_envState.env = s.env || 'none';
|
||||
_envState.envPath = s.envPath || '';
|
||||
_envState.platform = s.platform || '';
|
||||
@@ -983,9 +893,10 @@ function _applyServerSelection(val) {
|
||||
// bug: the Download/Cache/Deps dropdowns set the host but never saved it, so
|
||||
// it silently reverted and downloads/scans hit the wrong server).
|
||||
_persistEnvState();
|
||||
const _want = _currentServerValue();
|
||||
const _want = _envState.remoteHost || 'local';
|
||||
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
|
||||
if (!sel || sel.tagName !== 'SELECT') return;
|
||||
// Option values are host strings now ('local' for the local box).
|
||||
sel.value = _want;
|
||||
// If the host isn't among this select's current options (stale options after
|
||||
// the server list changed), the browser leaves the box BLANK/grey even though
|
||||
@@ -993,7 +904,7 @@ function _applyServerSelection(val) {
|
||||
// re-apply; fall back to 'local' only if it's genuinely gone.
|
||||
if (sel.selectedIndex < 0) {
|
||||
sel.innerHTML = _buildServerOpts(sel.id === 'hwfit-dl-server');
|
||||
sel.value = _currentServerValue();
|
||||
sel.value = _want;
|
||||
if (sel.selectedIndex < 0) sel.value = 'local';
|
||||
}
|
||||
});
|
||||
@@ -1031,7 +942,7 @@ function _wireTabEvents(body) {
|
||||
// Ignore swipes that start in a horizontally-scrollable tag row — those
|
||||
// should scroll the chips, not flip the tab.
|
||||
if (window.innerWidth > 768 || e.touches.length !== 1
|
||||
|| e.target.closest('input, textarea, select, .doclib-lang-chips')) { _sx = null; return; }
|
||||
|| e.target.closest('input, textarea, select, .doclib-lang-chips')) { _sx = null; return; }
|
||||
_sx = e.touches[0].clientX; _sy = e.touches[0].clientY;
|
||||
}, { passive: true });
|
||||
body.addEventListener('touchend', (e) => {
|
||||
@@ -1081,13 +992,11 @@ function _wireTabEvents(body) {
|
||||
const remotes = servers.filter(s => !_isLocalEntry(s));
|
||||
if (remotes.length === 1) {
|
||||
_envState.remoteHost = remotes[0].host;
|
||||
_envState.remoteServerKey = _serverKey(remotes[0]);
|
||||
_envState.env = remotes[0].env || 'none';
|
||||
_envState.envPath = remotes[0].envPath || '';
|
||||
}
|
||||
}
|
||||
const activeSrv = _selectedServer();
|
||||
if (activeSrv) _envState.remoteServerKey = _serverKey(activeSrv);
|
||||
const activeSrv = servers.find(s => s.host === _envState.remoteHost);
|
||||
_envState.platform = activeSrv?.platform || '';
|
||||
localStorage.setItem('cookbook-last-state', JSON.stringify(_envStateForStorage()));
|
||||
_saveTasks(_loadTasks());
|
||||
@@ -1095,7 +1004,7 @@ function _wireTabEvents(body) {
|
||||
// UI matches the resolved host. Done in a microtask so the dropdowns
|
||||
// exist by the time we set their .value.
|
||||
Promise.resolve().then(() => {
|
||||
const _want = _currentServerValue();
|
||||
const _want = _envState.remoteHost || 'local';
|
||||
document.querySelectorAll('#hwfit-server-select, #hwfit-dl-server, #hwfit-cache-server, #hwfit-deps-server').forEach(sel => {
|
||||
if (sel && sel.tagName === 'SELECT') sel.value = _want;
|
||||
});
|
||||
@@ -1361,14 +1270,28 @@ function _wireTabEvents(body) {
|
||||
if (!m) return { repo: raw, include: null };
|
||||
return { repo: m[1], include: `*${m[2]}*` };
|
||||
}
|
||||
// Ollama-library name. Matches `qwen2.5:14b`, `llama3:latest`, and the
|
||||
// (rare) `library/<name>:<tag>` form which we normalize by stripping the
|
||||
// namespace. The backend's _is_ollama_download check expects the same
|
||||
// shape (no slash + has a colon).
|
||||
function _ollamaName(raw) {
|
||||
const stripped = raw.replace(/^library\//, '');
|
||||
if (/^[A-Za-z0-9][A-Za-z0-9._-]{0,200}:[A-Za-z0-9][A-Za-z0-9._-]{0,200}$/.test(stripped)) {
|
||||
return stripped;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
const triggerDownload = () => {
|
||||
const rawRepo = _stripHfUrl(dlInput.value);
|
||||
if (!rawRepo) return;
|
||||
const { repo, include: autoInclude } = _splitRepoTag(rawRepo);
|
||||
const ollamaName = _ollamaName(rawRepo);
|
||||
const { repo, include: autoInclude } = ollamaName ? { repo: ollamaName, include: null } : _splitRepoTag(rawRepo);
|
||||
// HuggingFace repo IDs must be `org/model`. A bare model name would 404
|
||||
// at snapshot_download time with a raw traceback, so reject it up front.
|
||||
if (!/^[^\s/]+\/[^\s/]+$/.test(repo)) {
|
||||
uiModule.showToast('Enter a full HuggingFace repo ID like "org/model-name" (or paste the full HF URL).');
|
||||
// Ollama names (single-segment with a tag) skip this check — they go
|
||||
// through `ollama pull` server-side, not snapshot_download.
|
||||
if (!ollamaName && !/^[^\s/]+\/[^\s/]+$/.test(repo)) {
|
||||
uiModule.showToast('Enter a full HuggingFace repo ID like "org/model-name", or an Ollama name like "qwen2.5:14b".');
|
||||
dlInput.focus();
|
||||
return;
|
||||
}
|
||||
@@ -1383,12 +1306,13 @@ function _wireTabEvents(body) {
|
||||
if (srvVal !== 'local') {
|
||||
host = _serverByVal(srvVal)?.host || '';
|
||||
}
|
||||
const _hsrv = srvVal !== 'local' ? (_serverByVal(srvVal) || {}) : {};
|
||||
const _hsrv = _envState.servers.find(sv => sv.host === host) || {};
|
||||
let env = host ? (_hsrv.env || 'none') : _envState.env;
|
||||
let envPath = host ? (_hsrv.envPath || '') : _envState.envPath;
|
||||
const payload = { repo_id: repo };
|
||||
if (ollamaName) payload.backend = 'ollama';
|
||||
if (autoInclude) payload.include = autoInclude;
|
||||
if (_envState.hfToken) payload.hf_token = _envState.hfToken;
|
||||
if (_envState.hfToken && !ollamaName) payload.hf_token = _envState.hfToken;
|
||||
if (host) { payload.remote_host = host; const _sp3 = _getPort(host); if (_sp3) payload.ssh_port = _sp3; }
|
||||
const srvPlatform = _getPlatform(host);
|
||||
if (srvPlatform) payload.platform = srvPlatform;
|
||||
@@ -1432,7 +1356,7 @@ function _wireTabEvents(body) {
|
||||
// the section is collapsed (the body's content normally provides
|
||||
// separation; with no body visible, the line gives the h2 definition).
|
||||
dlFold.classList.toggle('is-folded', !folded);
|
||||
try { localStorage.setItem('cookbook_dl_tab_folded_v1', folded ? '0' : '1'); } catch { }
|
||||
try { localStorage.setItem('cookbook_dl_tab_folded_v1', folded ? '0' : '1'); } catch {}
|
||||
});
|
||||
}
|
||||
const hfToggle = document.getElementById('cookbook-hf-latest-toggle');
|
||||
@@ -1478,7 +1402,7 @@ function _wireTabEvents(body) {
|
||||
_hwCache[cacheKey] = hw;
|
||||
return hw;
|
||||
}
|
||||
} catch { }
|
||||
} catch {}
|
||||
_hwCache[cacheKey] = { vram: 0, backend: '' };
|
||||
return _hwCache[cacheKey];
|
||||
}
|
||||
@@ -1591,6 +1515,84 @@ function _wireTabEvents(body) {
|
||||
document.getElementById('hwfit-server-select')?.addEventListener('change', _onServerChange);
|
||||
}
|
||||
|
||||
// Browse Ollama library — popular models from ollama.com via cached backend
|
||||
// proxy. Click a row → fills the download input with `<name>:<size>` so the
|
||||
// existing Download button kicks off `ollama pull`.
|
||||
const olToggle = document.getElementById('cookbook-ollama-toggle');
|
||||
const olArrow = document.getElementById('cookbook-ollama-arrow');
|
||||
const olList = document.getElementById('cookbook-ollama-list');
|
||||
const olRefresh = document.getElementById('cookbook-ollama-refresh');
|
||||
if (olToggle && olList) {
|
||||
let _olLoaded = false;
|
||||
async function _loadOllama(refresh = false) {
|
||||
olList.innerHTML = '<div class="hwfit-loading" style="opacity:0.5;font-size:11px;text-align:center;padding:12px;">Loading…</div>';
|
||||
try {
|
||||
const res = await fetch(`/api/cookbook/ollama/library${refresh ? '?refresh=1' : ''}`);
|
||||
const data = await res.json();
|
||||
const models = data.models || [];
|
||||
if (!models.length) {
|
||||
olList.innerHTML = '<div class="hwfit-loading">No models</div>';
|
||||
return;
|
||||
}
|
||||
let html = '';
|
||||
for (const m of models) {
|
||||
const sizes = Array.isArray(m.sizes) && m.sizes.length ? m.sizes : ['latest'];
|
||||
const sizeChips = sizes.map(s => `<button type="button" class="memory-toolbar-btn cookbook-ol-size" data-name="${esc(m.name)}" data-size="${esc(s)}" style="height:20px;padding:0 6px;font-size:10px;border-radius:3px;">${esc(s)}</button>`).join('');
|
||||
html += `<div class="doclib-card memory-item cookbook-ollama-card" data-name="${esc(m.name)}">`;
|
||||
html += `<div style="flex:1;min-width:0;">`;
|
||||
html += `<div class="memory-item-title">${esc(m.name)} <a href="https://ollama.com/library/${esc(m.name)}" target="_blank" rel="noopener" class="cookbook-hf-link">ollama ↗</a></div>`;
|
||||
if (m.description) html += `<div class="memory-item-meta" style="font-size:10px;opacity:0.55;margin-top:2px;">${esc(m.description)}</div>`;
|
||||
html += `<div style="display:flex;flex-wrap:wrap;gap:3px;margin-top:4px;">${sizeChips}</div>`;
|
||||
html += `</div></div>`;
|
||||
}
|
||||
olList.innerHTML = html;
|
||||
olList.querySelectorAll('.cookbook-ol-size').forEach(btn => {
|
||||
btn.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
const name = btn.dataset.name;
|
||||
const size = btn.dataset.size;
|
||||
if (dlInput) {
|
||||
dlInput.value = `${name}:${size}`;
|
||||
dlInput.focus();
|
||||
}
|
||||
});
|
||||
});
|
||||
// Clicking the card body (not a size chip / link) → default to first size
|
||||
olList.querySelectorAll('.cookbook-ollama-card').forEach(card => {
|
||||
card.addEventListener('click', (e) => {
|
||||
if (e.target.closest('a') || e.target.closest('.cookbook-ol-size')) return;
|
||||
const name = card.dataset.name;
|
||||
const firstSize = card.querySelector('.cookbook-ol-size')?.dataset.size || 'latest';
|
||||
if (dlInput) {
|
||||
dlInput.value = `${name}:${firstSize}`;
|
||||
dlInput.focus();
|
||||
}
|
||||
});
|
||||
});
|
||||
} catch (e) {
|
||||
olList.innerHTML = '<div class="hwfit-loading">Failed to load</div>';
|
||||
}
|
||||
}
|
||||
olToggle.addEventListener('click', () => {
|
||||
const isOpen = olList.style.display !== 'none';
|
||||
olList.style.display = isOpen ? 'none' : 'flex';
|
||||
if (olArrow) olArrow.style.transform = isOpen ? 'rotate(0deg)' : 'rotate(90deg)';
|
||||
if (!isOpen && !_olLoaded) {
|
||||
_olLoaded = true;
|
||||
_loadOllama(false);
|
||||
}
|
||||
});
|
||||
if (olRefresh) olRefresh.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
_olLoaded = true;
|
||||
_loadOllama(true);
|
||||
if (olList.style.display === 'none') {
|
||||
olList.style.display = 'flex';
|
||||
if (olArrow) olArrow.style.transform = 'rotate(90deg)';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Server add button, row removal, model-dir add/remove, and per-row wiring
|
||||
// are ALL owned by cookbook-hwfit.js's _hwfitInit / _wireServerEntry.
|
||||
// A duplicate add handler used to live here and fired alongside the hwfit
|
||||
@@ -1603,7 +1605,7 @@ function _wireTabEvents(body) {
|
||||
hfInput.addEventListener('change', async () => {
|
||||
const val = hfInput.value.trim();
|
||||
_envState.hfToken = val;
|
||||
try { await _persistEnvState(); } catch { }
|
||||
try { await _persistEnvState(); } catch {}
|
||||
if (val) {
|
||||
_envState.hfTokenConfigured = true;
|
||||
const masked = val.length > 6 ? val.slice(0, 3) + '…' + val.slice(-3) : '••••';
|
||||
@@ -1643,9 +1645,8 @@ export function _serverEntryHtml(s, i, defaultServer, forceRemote, isNew) {
|
||||
let html = '';
|
||||
html += `<div class="cookbook-server-entry" data-idx="${i}" data-platform="${esc(s.platform || '')}">`;
|
||||
const _srvTitle = s.name || (isLocal ? 'Local' : (s.host || `Server ${i + 1}`));
|
||||
const _srvKey = isLocal ? 'local' : _serverKey(s);
|
||||
const _legacyDefault = !String(defaultServer || '').startsWith('srv:') && !isLocal && (defaultServer || '') === (s.host || '');
|
||||
const _isDefaultSrv = (defaultServer || '') === _srvKey || _legacyDefault;
|
||||
const _srvKey = isLocal ? 'local' : (s.host || '');
|
||||
const _isDefaultSrv = (defaultServer || '') === _srvKey;
|
||||
const _pIco = _platformIcon(s.platform);
|
||||
const _keyBtn = `<button class="cookbook-server-key-btn" title="Set up SSH key for this server" style="height:22px;box-sizing:border-box;display:inline-flex;align-items:center;position:relative;top:-2px;"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="margin-right:4px;flex-shrink:0;"><circle cx="7.5" cy="15.5" r="5.5"/><path d="M12 11l8-8"/><path d="M17 6l3 3"/></svg>Key</button>`;
|
||||
const _checkBtn = `<button class="cookbook-server-check-btn" title="Check SSH connection" style="height:22px;box-sizing:border-box;display:inline-flex;align-items:center;position:relative;top:-2px;"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round" style="margin-right:4px;flex-shrink:0;"><polyline points="20 6 9 17 4 12"/></svg>Check</button>`;
|
||||
@@ -1775,11 +1776,24 @@ function _renderRecipes() {
|
||||
html += `<button class="memory-toolbar-btn cookbook-dl-add-server" title="Add server in Settings" style="height:28px;">add server</button>`;
|
||||
html += `</div>`;
|
||||
html += `<div class="cookbook-dl-input" style="margin-top:0;">`;
|
||||
html += `<input type="text" class="cookbook-dl-repo" id="cookbook-dl-repo" placeholder="org/model-name, HF URL, or org/model:QUANT_TAG" />`;
|
||||
html += `<input type="text" class="cookbook-dl-repo" id="cookbook-dl-repo" placeholder="org/model-name, qwen2.5:14b, or HF URL" />`;
|
||||
html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`;
|
||||
html += `</div>`;
|
||||
// Browse Ollama library — fetches popular models from ollama.com via the
|
||||
// /api/cookbook/ollama/library cached proxy, click → fills the input with
|
||||
// `<name>:<size>` so the existing Download button kicks off `ollama pull`.
|
||||
html += `<div style="margin-top:5px;position:relative;top:-3px;">`;
|
||||
html += `<div style="display:flex;gap:4px;align-items:center;">`;
|
||||
html += `<button type="button" class="memory-toolbar-btn" id="cookbook-ollama-toggle" style="flex:1;text-align:left;height:26px;display:flex;align-items:center;gap:6px;border-radius:4px;">`;
|
||||
html += `<span id="cookbook-ollama-arrow" style="display:inline-block;transition:transform 0.15s;pointer-events:none;">▸</span>`;
|
||||
html += `<span style="pointer-events:none;">Browse Ollama library</span>`;
|
||||
html += `</button>`;
|
||||
html += `<button type="button" class="memory-toolbar-btn" id="cookbook-ollama-refresh" title="Refresh" style="height:26px;width:26px;padding:0;border-radius:4px;">↻</button>`;
|
||||
html += `</div>`;
|
||||
html += `<div id="cookbook-ollama-list" style="display:none;margin-top:4px;max-height:320px;overflow-y:auto;flex-direction:column;gap:4px;"></div>`;
|
||||
html += `</div>`;
|
||||
// Latest HF models that fit — collapsible card list
|
||||
html += `<div style="margin-top:5px;position:relative;top:-7px;">`;
|
||||
html += `<div style="margin-top:5px;position:relative;top:-3px;">`;
|
||||
html += `<div style="display:flex;gap:4px;align-items:center;">`;
|
||||
html += `<button type="button" class="memory-toolbar-btn" id="cookbook-hf-latest-toggle" style="flex:1;text-align:left;height:26px;display:flex;align-items:center;gap:6px;border-radius:4px;">`;
|
||||
html += `<span id="cookbook-hf-latest-arrow" style="display:inline-block;transition:transform 0.15s;pointer-events:none;">\u25B8</span>`;
|
||||
@@ -1804,7 +1818,7 @@ function _renderRecipes() {
|
||||
html += '<option value="general" selected>Standard</option><option value="coding">Coding</option>';
|
||||
html += '<option value="reasoning">Reasoning</option><option value="chat">Chat</option>';
|
||||
// Image tab removed — text→image gen is gone from this build (only inpaint
|
||||
// remains, which uses its own settings panel). Vision (multimodal) stays.
|
||||
// remains, which uses its own settings panel). Vision (multimodal) stays.
|
||||
html += '<option value="multimodal">Vision</option></select>';
|
||||
// Engine sits next to the type filter so the "what category / which serving
|
||||
// path" filters live together; Quant + Context are storage-format and budget
|
||||
@@ -1813,6 +1827,7 @@ function _renderRecipes() {
|
||||
html += '<select class="cookbook-field-input hwfit-engine" id="hwfit-engine" style="height:28px;" title="Filter by serving engine">';
|
||||
html += '<option value="">Engine</option>';
|
||||
html += '<option value="llamacpp">llama.cpp</option>';
|
||||
html += '<option value="ollama">Ollama</option>';
|
||||
html += '<option value="vllm">vLLM</option>';
|
||||
html += '<option value="sglang">SGLang</option>';
|
||||
html += '</select>';
|
||||
@@ -1869,13 +1884,13 @@ function _renderRecipes() {
|
||||
// Footer: link to the public discussion where users can request additions
|
||||
// to the curated model list. Sits below the list so it reads as a callout
|
||||
// after browsing, not a header.
|
||||
html += '<div class="hwfit-list-footer" style="margin-top:8px;padding-top:6px;border-top:1px solid color-mix(in srgb, var(--border) 50%, transparent);font-size:9.5px;opacity:0.65;text-align:right;">'
|
||||
+ 'Don\'t see a model? '
|
||||
+ '<a href="https://github.com/pewdiepie-archdaemon/odysseus/discussions/1962" target="_blank" rel="noopener" style="color:var(--accent,var(--red));text-decoration:none;display:inline-flex;align-items:center;gap:4px;vertical-align:middle;">'
|
||||
+ 'Request it →'
|
||||
+ '<svg width="11" height="11" viewBox="0 0 16 16" fill="currentColor" aria-hidden="true" style="flex-shrink:0;"><path d="M8 0C3.58 0 0 3.58 0 8a8 8 0 0 0 5.47 7.59c.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"/></svg>'
|
||||
+ '</a>'
|
||||
+ '</div>';
|
||||
html += '<div class="hwfit-list-footer" style="display:none;">'
|
||||
+ 'Don\'t see a model? '
|
||||
+ '<a href="https://github.com/pewdiepie-archdaemon/odysseus/discussions/1962" target="_blank" rel="noopener" style="color:var(--accent,var(--red));text-decoration:none;display:inline-flex;align-items:center;gap:4px;vertical-align:middle;position:relative;top:-1px;">'
|
||||
+ 'Request it →'
|
||||
+ '<svg width="11" height="11" viewBox="0 0 16 16" fill="currentColor" aria-hidden="true" style="flex-shrink:0;"><path d="M8 0C3.58 0 0 3.58 0 8a8 8 0 0 0 5.47 7.59c.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"/></svg>'
|
||||
+ '</a>'
|
||||
+ '</div>';
|
||||
|
||||
html += '</div></div>';
|
||||
|
||||
@@ -1885,7 +1900,7 @@ function _renderRecipes() {
|
||||
html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">';
|
||||
html += '<h2 style="margin:0;padding:0;line-height:1;">Serve <span id="serve-stats" class="memory-count" style="font-size:0.6em;opacity:0.6;font-weight:normal"></span></h2>';
|
||||
html += '</div>';
|
||||
const _selSrv = _selectedServer() || _es.servers[0] || {};
|
||||
const _selSrv = _es.servers.find(s => s.host === _es.remoteHost) || _es.servers[0] || {};
|
||||
const _srvDirs = (Array.isArray(_selSrv.modelDirs) ? _selSrv.modelDirs : [_selSrv.modelDir || '~/.cache/huggingface/hub']).map(d => d.replaceAll('✕', '').replaceAll('✖', '').trim()).filter(Boolean);
|
||||
html += '<div class="cookbook-serve-dirs" style="margin-top:6px;">';
|
||||
html += _srvDirs.map(d => `<span class="cookbook-serve-dir-pill">${esc(d)}</span>`).join('');
|
||||
@@ -1909,7 +1924,7 @@ function _renderRecipes() {
|
||||
html += '<label class="memory-bulk-check-all"><input type="checkbox" id="serve-select-all"> All</label>';
|
||||
html += '<span id="serve-bulk-count" style="font-size:10px;opacity:0.5;">0 selected</span>';
|
||||
html += '<button class="memory-toolbar-btn danger" id="serve-bulk-delete" style="position:relative;top:-3px;"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:3px;"><polyline points="3 6 5 6 21 6"/><path d="M19 6l-1 14a2 2 0 0 1-2 2H8a2 2 0 0 1-2-2L5 6"/><path d="M10 11v6"/><path d="M14 11v6"/></svg>Delete</button>';
|
||||
html += '<button class="memory-toolbar-btn" id="serve-bulk-cancel" title="Cancel (Esc)" style="margin-left:4px;padding:3px 6px;position:relative;top:-3px;"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg></button>';
|
||||
html += '<button class="memory-toolbar-btn" id="serve-bulk-cancel" title="Cancel (Esc)" style="margin-left:4px;padding:3px 6px;position:relative;top:-7px;"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg></button>';
|
||||
html += '</div>';
|
||||
|
||||
html += '<div class="doclib-grid hwfit-cached-list" id="hwfit-cached-list"></div>';
|
||||
@@ -1963,7 +1978,7 @@ function _renderRecipes() {
|
||||
html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;margin-top:-4px;">';
|
||||
html += '<h2 style="margin:0;padding:0;line-height:1;">Servers</h2>';
|
||||
// Reuse the calendar +New pill: spinning plus, label fades in idea uses
|
||||
// the same `.cal-add-btn-text` rules, so styling stays consistent.
|
||||
// the same `.cal-add-btn-text` rules, so styling stays consistent.
|
||||
html += '<button class="cal-add-btn cal-add-btn-text" id="cookbook-server-add" title="Add server" style="margin-left:auto;"><span class="cal-add-plus">+</span><span class="cal-add-label">Add</span></button>';
|
||||
html += '</div>';
|
||||
html += '<p class="memory-desc doclib-desc">Configure SSH servers, install Odysseus keys, choose model directories, and set the default server. Local is this machine.</p>';
|
||||
@@ -2059,73 +2074,73 @@ export async function open(opts) {
|
||||
}
|
||||
_setCookbookOpening(true);
|
||||
try {
|
||||
// Invalidate any pending close() animation handlers so they won't re-hide us
|
||||
_closeGen++;
|
||||
// Clear any leftover inline styles from a previous swipe-dismiss or close animation
|
||||
const _content = modal.querySelector('.modal-content');
|
||||
if (_content) {
|
||||
_content.classList.remove('modal-closing', 'sheet-ready', 'cookbook-modal-entering');
|
||||
_content.style.transform = '';
|
||||
_content.style.transition = '';
|
||||
_content.style.animation = '';
|
||||
_content.style.opacity = '';
|
||||
// Invalidate any pending close() animation handlers so they won't re-hide us
|
||||
_closeGen++;
|
||||
// Clear any leftover inline styles from a previous swipe-dismiss or close animation
|
||||
const _content = modal.querySelector('.modal-content');
|
||||
if (_content) {
|
||||
_content.classList.remove('modal-closing', 'sheet-ready', 'cookbook-modal-entering');
|
||||
_content.style.transform = '';
|
||||
_content.style.transition = '';
|
||||
_content.style.animation = '';
|
||||
_content.style.opacity = '';
|
||||
}
|
||||
modal.style.display = '';
|
||||
Modals.register('cookbook-modal', {
|
||||
railBtnId: 'rail-cookbook',
|
||||
sidebarBtnId: 'tool-cookbook-btn',
|
||||
closeFn: () => _doClose(),
|
||||
restoreFn: () => { _renderRunningTab(); },
|
||||
});
|
||||
_wireCookbookDrag(modal);
|
||||
await _syncFromServer();
|
||||
// `_syncFromServer` lives in cookbookRunning.js and populates *its* _envState
|
||||
// (a different object reference than this module's), then mirrors the merged
|
||||
// state to localStorage. So ALWAYS hydrate our _envState from that mirror —
|
||||
// on a successful sync it holds the freshly-fetched servers; on failure it
|
||||
// holds the last-known state. Gating this on `!synced` left the render's
|
||||
// _envState empty whenever sync succeeded → "servers don't show".
|
||||
try { Object.assign(_envState, _readStoredEnvState()); } catch {}
|
||||
// Honour a user-set default server: always land on it when Cookbook opens, so
|
||||
// every dropdown (scan/download/serve/cache/deps) starts on the same machine.
|
||||
if (_envState.defaultServer) {
|
||||
const _dk = _envState.defaultServer;
|
||||
if (_dk === 'local') {
|
||||
_envState.remoteHost = ''; _envState.env = 'none'; _envState.envPath = ''; _envState.platform = '';
|
||||
} else {
|
||||
const _ds = (_envState.servers || []).find(s => s.host === _dk);
|
||||
if (_ds) { _envState.remoteHost = _ds.host; _envState.env = _ds.env || 'none'; _envState.envPath = _ds.envPath || ''; _envState.platform = _ds.platform || ''; }
|
||||
}
|
||||
modal.style.display = '';
|
||||
Modals.register('cookbook-modal', {
|
||||
railBtnId: 'rail-cookbook',
|
||||
sidebarBtnId: 'tool-cookbook-btn',
|
||||
closeFn: () => _doClose(),
|
||||
restoreFn: () => { _renderRunningTab(); },
|
||||
});
|
||||
_wireCookbookDrag(modal);
|
||||
await _syncFromServer();
|
||||
// `_syncFromServer` lives in cookbookRunning.js and populates *its* _envState
|
||||
// (a different object reference than this module's), then mirrors the merged
|
||||
// state to localStorage. So ALWAYS hydrate our _envState from that mirror —
|
||||
// on a successful sync it holds the freshly-fetched servers; on failure it
|
||||
// holds the last-known state. Gating this on `!synced` left the render's
|
||||
// _envState empty whenever sync succeeded → "servers don't show".
|
||||
try { Object.assign(_envState, _readStoredEnvState()); } catch { }
|
||||
// Honour a user-set default server: always land on it when Cookbook opens, so
|
||||
// every dropdown (scan/download/serve/cache/deps) starts on the same machine.
|
||||
if (_envState.defaultServer) {
|
||||
const _dk = _envState.defaultServer;
|
||||
if (_dk === 'local') {
|
||||
_envState.remoteHost = ''; _envState.remoteServerKey = ''; _envState.env = 'none'; _envState.envPath = ''; _envState.platform = '';
|
||||
} else {
|
||||
const _ds = _serverByVal(_dk);
|
||||
if (_ds) { _envState.remoteHost = _ds.host; _envState.remoteServerKey = _serverKey(_ds); _envState.env = _ds.env || 'none'; _envState.envPath = _ds.envPath || ''; _envState.platform = _ds.platform || ''; }
|
||||
}
|
||||
}
|
||||
// Re-render on every open AFTER sync so the freshly-fetched state (servers,
|
||||
// HF token, presets) is always reflected. Gating this to once-per-page used
|
||||
// to freeze a stale/empty servers list whenever the first sync raced or
|
||||
// returned before hydration — and since close/reopen doesn't reset the page,
|
||||
// only a full reload recovered it. Re-rendering is cheap and the in-progress
|
||||
// Running tab is rendered separately just below.
|
||||
_renderRecipes();
|
||||
_rendered = true;
|
||||
_clearCookbookNotif();
|
||||
_renderRunningTab();
|
||||
// Self-heal: revive any download tasks whose tmux session is still alive
|
||||
// but were persisted as done/error (covers the "restarted server while a
|
||||
// big multi-shard download was in flight" case — the task survived in
|
||||
// tmux, the cookbook just lost track of it).
|
||||
try { _selfHealStaleTasks({ oneShot: true }); } catch { }
|
||||
if (_content) {
|
||||
// Put the panel in its entering state before it becomes visible. On
|
||||
// mobile, showing first and adding the class a frame later can paint the
|
||||
// sheet at its final position, which makes the slide-up look like a snap.
|
||||
_content.classList.add('cookbook-modal-entering');
|
||||
}
|
||||
modal.classList.remove('hidden');
|
||||
if (_content) {
|
||||
void _content.offsetWidth;
|
||||
_content.addEventListener('animationend', () => {
|
||||
_content.classList.remove('cookbook-modal-entering');
|
||||
}, { once: true });
|
||||
}
|
||||
setTimeout(_applyIntent, 0);
|
||||
}
|
||||
// Re-render on every open AFTER sync so the freshly-fetched state (servers,
|
||||
// HF token, presets) is always reflected. Gating this to once-per-page used
|
||||
// to freeze a stale/empty servers list whenever the first sync raced or
|
||||
// returned before hydration — and since close/reopen doesn't reset the page,
|
||||
// only a full reload recovered it. Re-rendering is cheap and the in-progress
|
||||
// Running tab is rendered separately just below.
|
||||
_renderRecipes();
|
||||
_rendered = true;
|
||||
_clearCookbookNotif();
|
||||
_renderRunningTab();
|
||||
// Self-heal: revive any download tasks whose tmux session is still alive
|
||||
// but were persisted as done/error (covers the "restarted server while a
|
||||
// big multi-shard download was in flight" case — the task survived in
|
||||
// tmux, the cookbook just lost track of it).
|
||||
try { _selfHealStaleTasks({ oneShot: true }); } catch {}
|
||||
if (_content) {
|
||||
// Put the panel in its entering state before it becomes visible. On
|
||||
// mobile, showing first and adding the class a frame later can paint the
|
||||
// sheet at its final position, which makes the slide-up look like a snap.
|
||||
_content.classList.add('cookbook-modal-entering');
|
||||
}
|
||||
modal.classList.remove('hidden');
|
||||
if (_content) {
|
||||
void _content.offsetWidth;
|
||||
_content.addEventListener('animationend', () => {
|
||||
_content.classList.remove('cookbook-modal-entering');
|
||||
}, { once: true });
|
||||
}
|
||||
setTimeout(_applyIntent, 0);
|
||||
} finally {
|
||||
_setCookbookOpening(false);
|
||||
}
|
||||
@@ -2217,9 +2232,6 @@ const shared = {
|
||||
_getPort,
|
||||
_sshPrefix,
|
||||
_getPlatform,
|
||||
_serverByVal,
|
||||
_selectedServer,
|
||||
_currentServerValue,
|
||||
_isWindows,
|
||||
_isMetal,
|
||||
_buildEnvPrefix,
|
||||
|
||||
@@ -242,11 +242,7 @@ export function _wirePanelEvents(panel, model, backend) {
|
||||
const dlBtn = panel.querySelector('.hwfit-dl-btn');
|
||||
if (dlBtn) {
|
||||
dlBtn.addEventListener('click', () => {
|
||||
if (backend === 'ollama') {
|
||||
_runPanelCmd(panel, _buildDownloadCmd(model, backend), { timeout: 0 });
|
||||
} else {
|
||||
_runModelDownload(panel, model, backend);
|
||||
}
|
||||
_runModelDownload(panel, model, backend)
|
||||
});
|
||||
}
|
||||
|
||||
@@ -459,7 +455,9 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
|
||||
uiModule.showToast(_missingGgufMessage(model));
|
||||
return;
|
||||
}
|
||||
const repo = ggufSource?.repo || model.quant_repo || model.name;
|
||||
const repo = backend === 'ollama'
|
||||
? (model.ollama || model.ollama_name || model.name)
|
||||
: (ggufSource?.repo || model.quant_repo || model.name);
|
||||
const include = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
|
||||
|
||||
_syncEnvFromPanel(panel);
|
||||
@@ -494,7 +492,7 @@ export async function _runModelDownload(panel, model, backend, hostOverride) {
|
||||
const platform = host ? (srv.platform || '') : (_envState.platform || '');
|
||||
const isWin = host ? (platform === 'windows') : _isWindows();
|
||||
|
||||
const payload = { repo_id: repo };
|
||||
const payload = { repo_id: repo, backend };
|
||||
if (include) payload.include = include;
|
||||
// Large downloads are where hf_transfer most often dies near the end. Use the
|
||||
// plain HuggingFace downloader up front for big model files; it is slower, but
|
||||
|
||||
@@ -1564,6 +1564,10 @@ export async function _launchServeTask(shortName, repo, cmd, fields, hostOverrid
|
||||
const payload = { repo_id: repo, remote_host: _host || undefined, ssh_port: _sp || undefined, _cmd: cmd, _fields: fields || undefined, _env: _usedEnv, _envPath: _usedEnvPath, _gpus: _usedGpus };
|
||||
_addTask(data.session_id, shortName, 'serve', payload);
|
||||
uiModule.showToast(`Serving ${shortName}...`);
|
||||
// Auto-register may have enabled an existing (offline) endpoint for this
|
||||
// host:port. Refresh the picker so the row is no longer dimmed, and the
|
||||
// user doesn't see "offline" on a serve they just started.
|
||||
try { _refreshModelsAfterEndpointChange(); } catch (_) {}
|
||||
} catch (e) {
|
||||
uiModule.showToast('Failed: ' + e.message);
|
||||
}
|
||||
@@ -3032,6 +3036,11 @@ async function _reconnectTask(el, task) {
|
||||
if (info.status === 'ready' && !task._serveReady) {
|
||||
task._serveReady = true;
|
||||
_updateTask(task.sessionId, { _serveReady: true });
|
||||
// The auto-registered endpoint was marked offline while the
|
||||
// server was coming up. Now that it's reachable, nudge the
|
||||
// picker to re-probe so the offline pill clears without the
|
||||
// user having to reopen Settings or refresh the page.
|
||||
try { _refreshModelsAfterEndpointChange(); } catch (_) {}
|
||||
}
|
||||
if (info.phase) {
|
||||
badge.textContent = info.phase;
|
||||
|
||||
@@ -129,7 +129,7 @@ try { (function () {
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div class="hwfit-schedule-row">
|
||||
<div class="hwfit-schedule-row hwfit-schedule-when-row">
|
||||
<label class="hwfit-schedule-field">
|
||||
<span>From</span>
|
||||
<input type="time" class="hwfit-sched-start cookbook-field-input" value="09:00" />
|
||||
@@ -138,24 +138,24 @@ try { (function () {
|
||||
<span>Until</span>
|
||||
<input type="time" class="hwfit-sched-end cookbook-field-input" value="17:00" />
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div class="hwfit-schedule-row hwfit-schedule-days-row">
|
||||
<span class="hwfit-schedule-label">Days</span>
|
||||
<div class="hwfit-sched-days">
|
||||
${DAYS.map(d => `
|
||||
<button type="button" class="hwfit-sched-day-chip${WEEKDAYS.has(d.k) ? " is-on" : ""}" data-day="${d.k}">${d.l}</button>
|
||||
`).join("")}
|
||||
<label class="hwfit-schedule-field hwfit-schedule-days-field">
|
||||
<span>Days</span>
|
||||
<div class="hwfit-sched-days">
|
||||
${DAYS.map(d => `
|
||||
<button type="button" class="hwfit-sched-day-chip${WEEKDAYS.has(d.k) ? " is-on" : ""}" data-day="${d.k}">${d.l}</button>
|
||||
`).join("")}
|
||||
</div>
|
||||
</label>
|
||||
<div class="hwfit-schedule-actions-inline">
|
||||
<button type="button" class="cookbook-btn hwfit-sched-cancel" title="Cancel">
|
||||
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:5px;flex-shrink:0;"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>
|
||||
<span>Cancel</span>
|
||||
</button>
|
||||
<button type="button" class="cookbook-btn hwfit-sched-save" title="Save schedule" aria-label="Save schedule">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:5px;flex-shrink:0;"><rect x="3" y="4" width="18" height="18" rx="2"/><line x1="16" y1="2" x2="16" y2="6"/><line x1="8" y1="2" x2="8" y2="6"/><line x1="3" y1="10" x2="21" y2="10"/></svg>
|
||||
<span>Save</span>
|
||||
</button>
|
||||
</div>
|
||||
<span class="hwfit-schedule-actions-spacer"></span>
|
||||
<button type="button" class="cookbook-btn hwfit-sched-cancel" title="Cancel">
|
||||
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:5px;flex-shrink:0;"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>
|
||||
<span>Cancel</span>
|
||||
</button>
|
||||
<button type="button" class="cookbook-btn hwfit-sched-save" title="Save schedule" aria-label="Save schedule">
|
||||
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:5px;flex-shrink:0;"><rect x="3" y="4" width="18" height="18" rx="2"/><line x1="16" y1="2" x2="16" y2="6"/><line x1="8" y1="2" x2="8" y2="6"/><line x1="3" y1="10" x2="21" y2="10"/></svg>
|
||||
<span>Save</span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="hwfit-sched-err"></div>
|
||||
|
||||
+169
-132
@@ -14,7 +14,6 @@ import { bindMenuDismiss, dismissOrRemove } from './escMenuStack.js';
|
||||
let _envState;
|
||||
let _sshCmd;
|
||||
let _getPort;
|
||||
let _serverByVal;
|
||||
let _sshPrefix;
|
||||
let _getPlatform;
|
||||
let _isWindows;
|
||||
@@ -98,14 +97,14 @@ function _selectedServeTarget(panel) {
|
||||
const select = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server');
|
||||
const servers = Array.isArray(_envState.servers) ? _envState.servers : [];
|
||||
let host = _envState.remoteHost || '';
|
||||
let server = host ? (_serverByVal?.(_envState.remoteServerKey || host) || servers.find(s => s.host === host)) : null;
|
||||
let server = host ? servers.find(s => s.host === host) : null;
|
||||
if (select && select.value != null) {
|
||||
if (select.value === 'local') {
|
||||
host = '';
|
||||
server = servers.find(s => !s.host || s.host === 'local') || null;
|
||||
} else {
|
||||
const idx = /^\d+$/.test(String(select.value)) ? parseInt(select.value, 10) : -1;
|
||||
server = _serverByVal?.(select.value) || (idx >= 0 ? servers[idx] : null) || null;
|
||||
server = servers.find(s => s.host === select.value) || (idx >= 0 ? servers[idx] : null) || null;
|
||||
host = server?.host || '';
|
||||
}
|
||||
}
|
||||
@@ -115,7 +114,7 @@ function _selectedServeTarget(panel) {
|
||||
: (server?.name || 'local server');
|
||||
return {
|
||||
host,
|
||||
port: host ? (server?.port || _getPort(host) || '') : '',
|
||||
port: host ? (_getPort(host) || server?.port || '') : '',
|
||||
venv,
|
||||
label,
|
||||
};
|
||||
@@ -243,21 +242,6 @@ function _shellPathExpr(path) {
|
||||
function _selectedGgufExpr(model, repo, relPath) {
|
||||
const rel = String(relPath || '').replace(/^\/+/, '');
|
||||
if (!rel) return '';
|
||||
if (_isWindows()) {
|
||||
// PowerShell: plain path — no bash $() syntax (backend validator rejects
|
||||
// $( ) in non-prelude commands, and PowerShell doesn't have printf).
|
||||
const relW = rel.replace(/\//g, '\\');
|
||||
if (model.is_local_dir && model.path) {
|
||||
const base = String(model.path || '').replace(/\/+$/, '').replace(/\//g, '\\');
|
||||
return `${base}\\${repo.replace(/\//g, '\\')}\\${relW}`;
|
||||
}
|
||||
if (model.path) {
|
||||
const base = String(model.path || '').replace(/\/+$/, '').replace(/\//g, '\\');
|
||||
return `${base}\\models--${repo.replace(/\//g, '--')}\\snapshots\\${relW}`;
|
||||
}
|
||||
const cacheRepo = repo.replace(/\//g, '--');
|
||||
return `$env:USERPROFILE\\.cache\\huggingface\\hub\\models--${cacheRepo}\\snapshots\\${relW}`;
|
||||
}
|
||||
if (model.is_local_dir && model.path) {
|
||||
const base = String(model.path || '').replace(/\/+$/, '');
|
||||
return `$(printf %s ${_shellPathExpr(`${base}/${repo}/${rel}`)})`;
|
||||
@@ -271,15 +255,6 @@ function _selectedGgufExpr(model, repo, relPath) {
|
||||
}
|
||||
|
||||
function _ggufSearchDirExpr(model, repo) {
|
||||
if (_isWindows()) {
|
||||
if (model.is_local_dir && model.path) {
|
||||
return `${String(model.path || '').replace(/\/+$/, '').replace(/\//g, '\\')}\\${repo.replace(/\//g, '\\')}`;
|
||||
}
|
||||
if (model.path) {
|
||||
return `${String(model.path || '').replace(/\/+$/, '').replace(/\//g, '\\')}\\models--${repo.replace(/\//g, '--')}\\snapshots`;
|
||||
}
|
||||
return `$env:USERPROFILE\\.cache\\huggingface\\hub\\models--${repo.replace(/\//g, '--')}\\snapshots`;
|
||||
}
|
||||
if (model.is_local_dir && model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/${repo}`);
|
||||
if (model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/models--${repo.replace(/\//g, '--')}/snapshots`);
|
||||
return `"$HOME/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}/snapshots"`;
|
||||
@@ -537,7 +512,7 @@ function _rerenderCachedModels() {
|
||||
// The venv set per-server in Settings (server.envPath). Used as the venv
|
||||
// field default when the global active env path isn't carrying it, so a
|
||||
// configured server venv shows up without re-typing it.
|
||||
const _selSrv = _serverByVal?.(_es.remoteServerKey || _es.remoteHost || '') || {};
|
||||
const _selSrv = (_es.servers || []).find(s => s.host === (_es.remoteHost || '')) || {};
|
||||
const _srvVenv = _selSrv.envPath || '';
|
||||
// Serve state schema: { _byRepo: { <repo>: {...} }, _lastUsed: {...} }.
|
||||
// Loading priority: this-repo's saved settings → last-used (from any
|
||||
@@ -600,7 +575,7 @@ function _rerenderCachedModels() {
|
||||
+ `<button type="button" class="cookbook-slot-btn cookbook-saved-arrow" title="${esc(_arrowTitle)}">${_arrowLabel}</button>`
|
||||
+ `</div>`;
|
||||
|
||||
let panelHtml = `<div class="hwfit-serve-panel">${_slotsHtml}`;
|
||||
let panelHtml = `<div class="hwfit-serve-panel">`;
|
||||
// Warn when serving a model whose download hasn't fully completed —
|
||||
// the user CAN still hit Launch (vLLM/llama-server will start, then
|
||||
// crash trying to read missing shards), but they should know.
|
||||
@@ -633,26 +608,48 @@ function _rerenderCachedModels() {
|
||||
_gpuBtnsHtml += `<button type="button" class="cookbook-gpu-btn${on ? ' active' : ''}" data-gpu="${i}">${i}</button>`;
|
||||
}
|
||||
panelHtml += `<label>${_l('GPUs','Toggle which GPUs to use')}<div class="cookbook-gpu-group">${_gpuBtnsHtml}</div><input type="hidden" class="hwfit-sf" data-field="gpus" value="${esc(defaultGpus)}" /></label>`;
|
||||
// Save / saved-configs split button — moved into Row 1 (next to GPUs)
|
||||
// so it shares the same baseline as the rest of the top controls.
|
||||
panelHtml += _slotsHtml;
|
||||
panelHtml += `</div>`;
|
||||
panelHtml += `<div class="hwfit-serve-runtime-note" style="display:none;font-size:11px;line-height:1.35;color:var(--fg-muted);margin-top:-4px;"></div>`;
|
||||
if (_ggufChoices.length > 1) {
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-llamacpp">`;
|
||||
panelHtml += `<label class="hwfit-backend-llamacpp">${_l('GGUF File','Choose the exact GGUF artifact to serve from this cached model folder.')}<select class="hwfit-sf hwfit-sf-wide" data-field="gguf_file">${_ggufOptions}</select></label>`;
|
||||
// Show the GGUF File dropdown for BOTH llama.cpp and Ollama — Ollama
|
||||
// also needs to know which exact .gguf to import via the new
|
||||
// `docker exec ollama-test ollama-import` auto-fill (otherwise the
|
||||
// helper falls back to "first sorted gguf", which may not match what
|
||||
// the user picked).
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-llamacpp hwfit-backend-ollama">`;
|
||||
panelHtml += `<label class="hwfit-backend-llamacpp hwfit-backend-ollama">${_l('GGUF File','Choose the exact GGUF artifact to serve from this cached model folder.')}<select class="hwfit-sf hwfit-sf-wide" data-field="gguf_file">${_ggufOptions}</select></label>`;
|
||||
panelHtml += `</div>`;
|
||||
} else if (_defaultGguf) {
|
||||
panelHtml += `<input type="hidden" class="hwfit-sf" data-field="gguf_file" value="${esc(_defaultGguf)}" />`;
|
||||
}
|
||||
// Row 2: Core settings
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-vllm hwfit-backend-sglang hwfit-backend-llamacpp">`;
|
||||
// Row 2: Core settings — the handful you actually touch every launch.
|
||||
// TP / Context / GPU / GPU Mem / Max Seqs / Dtype. Everything else
|
||||
// (Swap, KV Cache, Attention backend, Env vars, llama.cpp batch/ubatch)
|
||||
// moved to the Advanced fold below to keep this row scannable.
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-vllm hwfit-backend-sglang hwfit-backend-llamacpp hwfit-backend-ollama">`;
|
||||
panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('TP','Tensor Parallelism — split model across N GPUs')}<select class="hwfit-sf" data-field="tp">${tpOpts}</select></label>`;
|
||||
// ctx resets to the model's max on every panel open (the real ctx slider
|
||||
// lives in the Scan/Download toolbar — see cookbook.js .hwfit-ctx-control).
|
||||
panelHtml += `<label>${_l('Context','Max tokens per request — resets to the model max on every open. Lower = less VRAM')}<input type="text" class="hwfit-sf" data-field="ctx" value="${esc(m.context_length || m.context || '20000')}" /></label>`;
|
||||
panelHtml += `<label>${_l('GPU','Which GPU to use. Leave empty for default')}<input type="text" class="hwfit-sf" data-field="gpu_id" value="${esc(sv('gpu_id', ''))}" placeholder="auto" style="width:50px;" /></label>`;
|
||||
panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('GPU Mem','Fraction of GPU memory (0.0–1.0). Lower if OOM')}<input type="text" class="hwfit-sf" data-field="gpu_mem" value="${esc(sv('gpu_mem', '0.90'))}" /></label>`;
|
||||
panelHtml += `<label class="hwfit-backend-vllm">${_l('Swap','CPU swap space in GB. Leave empty to omit (removed in newer vLLM)')}<input type="text" class="hwfit-sf" data-field="swap" value="${esc(sv('swap', ''))}" placeholder="off" /></label>`;
|
||||
panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('Max Seqs','Maximum concurrent requests. Lower = less memory. Default 4 — prosumer GPUs often OOM on vLLM default 256 during CUDA graph capture.')}<input type="text" class="hwfit-sf" data-field="max_seqs" value="${esc(sv('max_seqs', '4'))}" placeholder="4" /></label>`;
|
||||
panelHtml += `<label>${_l('Dtype','Data type for weights. auto picks best for GPU')}<select class="hwfit-sf" data-field="dtype">${dtypeOpts}</select></label>`;
|
||||
panelHtml += `</div>`;
|
||||
// ── Advanced (collapsed by default) ──
|
||||
// Everything below the fold is tuning users only touch occasionally:
|
||||
// vLLM kernel/env knobs, llama.cpp fit/cache/split controls, the
|
||||
// GGUF batch sizes, the speculative-decoding row, and the live VRAM
|
||||
// monitor. Wrapped in a native <details> so toggle state survives
|
||||
// re-renders cheaply and a closed fold doesn't trigger any layout
|
||||
// work for the dozens of nested inputs.
|
||||
panelHtml += `<details class="hwfit-serve-advanced">`;
|
||||
panelHtml += `<summary class="hwfit-serve-advanced-summary">Advanced</summary>`;
|
||||
// Advanced vLLM/SGLang row (KV Cache, Attention, Swap, Env)
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-vllm hwfit-backend-sglang">`;
|
||||
panelHtml += `<label class="hwfit-backend-vllm">${_l('KV Cache','vLLM --kv-cache-dtype. auto uses the model/runtime default; fp8 reduces KV memory for long context.')}<select class="hwfit-sf" data-field="vllm_kv_cache_dtype" style="height:32px;">${vllmKvCacheOpts}</select></label>`;
|
||||
// Attention backend selector — pin the kernel impl. Default `auto` lets
|
||||
// vLLM pick FlashInfer (which JITs on first use and breaks on older
|
||||
@@ -662,6 +659,7 @@ function _rerenderCachedModels() {
|
||||
const vllmAttnBackendOpts = ['auto', 'FLASH_ATTN', 'XFORMERS', 'FLASHINFER', 'TORCH_SDPA']
|
||||
.map(b => `<option value="${b === 'auto' ? '' : b}"${(sv('vllm_attn_backend','') === (b === 'auto' ? '' : b)) ? ' selected' : ''}>${b}</option>`).join('');
|
||||
panelHtml += `<label class="hwfit-backend-vllm">${_l('Attention','vLLM VLLM_ATTENTION_BACKEND. auto = vLLM picks (often FLASHINFER, which JITs and can fail on old nvcc). FLASH_ATTN skips the JIT entirely.')}<select class="hwfit-sf" data-field="vllm_attn_backend" style="height:32px;">${vllmAttnBackendOpts}</select></label>`;
|
||||
panelHtml += `<label class="hwfit-backend-vllm">${_l('Swap','CPU swap space in GB. Leave empty to omit (removed in newer vLLM)')}<input type="text" class="hwfit-sf" data-field="swap" value="${esc(sv('swap', ''))}" placeholder="off" /></label>`;
|
||||
// Free-text env-vars field. Anything pasted here is prepended to the
|
||||
// launch command verbatim. Use for CUDACXX, PATH overrides, NCCL_*
|
||||
// tuning, or any other KEY=VALUE pair that doesn't have a dedicated
|
||||
@@ -669,6 +667,12 @@ function _rerenderCachedModels() {
|
||||
// already exported so they expand correctly here.
|
||||
panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang" style="flex:1 1 100%;">${_l('Env','Extra KEY=VALUE env-var pairs prepended to the launch (space-separated). Example: CUDACXX=$VIRTUAL_ENV/lib/python3.10/site-packages/nvidia/cuda_nvcc/bin/nvcc — points flashinfer at the venv-bundled nvcc when the system one is too old for your GPU.')}<input type="text" class="hwfit-sf" data-field="extra_env" value="${esc(sv('extra_env',''))}" placeholder="CUDACXX=/path/to/nvcc NCCL_P2P_DISABLE=1" style="width:100%;" /></label>`;
|
||||
panelHtml += `</div>`;
|
||||
// Advanced llama.cpp row (Batch / UBatch — moved out of Core for the
|
||||
// same "rarely touched" reason as the vLLM extras above).
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-llamacpp">`;
|
||||
panelHtml += `<label class="hwfit-backend-llamacpp">${_l('Batch','llama.cpp prompt batch size. Leave blank for llama.cpp default.')}<input type="text" class="hwfit-sf" data-field="llama_batch_size" value="${esc(sv('llama_batch_size', ''))}" placeholder="2048" /></label>`;
|
||||
panelHtml += `<label class="hwfit-backend-llamacpp">${_l('UBatch','llama.cpp physical micro-batch size. Leave blank for llama.cpp default.')}<input type="text" class="hwfit-sf" data-field="llama_ubatch_size" value="${esc(sv('llama_ubatch_size', ''))}" placeholder="512" /></label>`;
|
||||
panelHtml += `</div>`;
|
||||
// Row 2b: Diffusers settings
|
||||
const diffDtypeOpts = ['bfloat16','float16','float32'].map(d => `<option value="${d}"${sv('diff_dtype','bfloat16')===d?' selected':''}>${d}</option>`).join('');
|
||||
const deviceMapOpts = ['balanced','auto','sequential'].map(d => `<option value="${d}"${sv('diff_device_map','balanced')===d?' selected':''}>${d}</option>`).join('');
|
||||
@@ -691,7 +695,7 @@ function _rerenderCachedModels() {
|
||||
const llamaFitOpts = ['', 'off', 'on'].map(d => `<option value="${d}"${sv('llama_fit','')===d?' selected':''}>${d||'default'}</option>`).join('');
|
||||
const llamaSplitModeOpts = ['', 'layer', 'tensor', 'row', 'none'].map(d => `<option value="${d}"${sv('llama_split_mode','')===d?' selected':''}>${d||'default'}</option>`).join('');
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-llamacpp">`;
|
||||
panelHtml += `<label>${_l('CPU MoE','n-cpu-moe: number of MoE expert layers to run on CPU when the model is bigger than VRAM. 0 = all on GPU. Set automatically by the Auto profiles below.')}<input type="text" class="hwfit-sf" data-field="n_cpu_moe" value="${esc(sv('n_cpu_moe',''))}" placeholder="0" style="width:54px;" /></label>`;
|
||||
panelHtml += `<label>${_l('CPU MoE','n-cpu-moe: number of MoE expert layers to run on CPU when the model is bigger than VRAM. 0 = all on GPU. Set automatically by the Auto profiles below.')}<input type="text" class="hwfit-sf" data-field="n_cpu_moe" value="${esc(sv('n_cpu_moe',''))}" placeholder="0" style="width:54px;position:relative;top:-8px;" /></label>`;
|
||||
panelHtml += `<label>${_l('KV Cache','cache-type-k/v: quantize the KV cache. q4_0 = smallest (more context), q8_0 = sharp long-context, f16 = full. Blank = llama.cpp default.')}<select class="hwfit-sf" data-field="cache_type">${_kvOpts}</select></label>`;
|
||||
panelHtml += `<label class="hwfit-sf-cb" style="align-self:end;"><input type="checkbox" class="hwfit-sf" data-field="flash_attn"${sv('flash_attn',false)?' checked':''} /> Flash Attn${_h('--flash-attn on: faster attention + needed for quantized KV cache.')}</label>`;
|
||||
panelHtml += `<label class="hwfit-sf-cb" style="align-self:end;"><input type="checkbox" class="hwfit-sf" data-field="vision"${sv('vision',false)?' checked':''} /> Vision${_h('Serve with the vision encoder so the model can read images. Auto-finds an mmproj-*.gguf next to the model (download one into the model folder). Adds ~1 GB VRAM + a small per-image cost.')}</label>`;
|
||||
@@ -701,19 +705,16 @@ function _rerenderCachedModels() {
|
||||
// explicit overrides for known-good advanced presets; blank keeps
|
||||
// llama.cpp/profile defaults.
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-llamacpp">`;
|
||||
panelHtml += `<label>${_l('Split Mode','llama.cpp GPU placement. layer is the usual default; tensor splits weights and KV across GPUs.')}<select class="hwfit-sf" data-field="llama_split_mode">${llamaSplitModeOpts}</select></label>`;
|
||||
panelHtml += `<label>${_l('Split Mode','llama.cpp GPU placement. layer is the usual default; tensor splits weights and KV across GPUs.')}<select class="hwfit-sf" data-field="llama_split_mode" style="position:relative;top:-8px;">${llamaSplitModeOpts}</select></label>`;
|
||||
panelHtml += `<label>${_l('Tensor Split','GPU proportions for llama.cpp, e.g. 50,50 across two visible GPUs. Leave blank for auto.')}<input type="text" class="hwfit-sf" data-field="llama_tensor_split" value="${esc(sv('llama_tensor_split', ''))}" placeholder="50,50" /></label>`;
|
||||
panelHtml += `<label>${_l('Main GPU','llama.cpp --main-gpu index inside the visible GPU set. Mostly useful for split mode none/row.')}<input type="text" class="hwfit-sf" data-field="llama_main_gpu" value="${esc(sv('llama_main_gpu', ''))}" placeholder="auto" /></label>`;
|
||||
panelHtml += `<label>${_l('Parallel','llama.cpp parallel slots. Leave blank for llama.cpp default; 1 matches single-lane presets.')}<input type="text" class="hwfit-sf" data-field="llama_parallel" value="${esc(sv('llama_parallel', ''))}" placeholder="1" /></label>`;
|
||||
panelHtml += `<label>${_l('Batch','llama.cpp prompt batch size. Leave blank for llama.cpp default.')}<input type="text" class="hwfit-sf" data-field="llama_batch_size" value="${esc(sv('llama_batch_size', ''))}" placeholder="2048" /></label>`;
|
||||
panelHtml += `<label>${_l('UBatch','llama.cpp physical micro-batch size. Leave blank for llama.cpp default.')}<input type="text" class="hwfit-sf" data-field="llama_ubatch_size" value="${esc(sv('llama_ubatch_size', ''))}" placeholder="512" /></label>`;
|
||||
panelHtml += `</div>`;
|
||||
// Row 2d: Auto profiles — computed from detected hardware (see profiles.py).
|
||||
// Buttons are injected after the panel mounts (needs an async fetch).
|
||||
panelHtml += `<div class="hwfit-serve-row hwfit-backend-llamacpp hwfit-serve-profiles" style="align-items:center;gap:8px;">`;
|
||||
panelHtml += `<span style="opacity:0.7;font-size:11px;">Auto profiles:</span>`;
|
||||
panelHtml += `<span class="hwfit-profile-btns" style="display:flex;gap:6px;flex-wrap:wrap;"><span style="opacity:0.5;font-size:11px;">computing…</span></span>`;
|
||||
panelHtml += `</div>`;
|
||||
// Auto-profile chips row removed — visual fit with the rest of the
|
||||
// serve panel was off, and the manual ctx/n_cpu_moe/cache controls
|
||||
// above are already sufficient. The hwfit profile API
|
||||
// (/api/hwfit/profiles) is still available for any caller that
|
||||
// wants it.
|
||||
// Live VRAM / RAM-spillover monitor for the serve target's GPU. Polls
|
||||
// /api/cookbook/gpus while the panel is open so you can SEE whether the
|
||||
// config fits VRAM (fast) or spills to system RAM (slow). Populated after mount.
|
||||
@@ -745,7 +746,7 @@ function _rerenderCachedModels() {
|
||||
// even for models the auto-detector doesn't recognize. Expert-parallel,
|
||||
// reasoning-parser and MoE-env still only appear when auto-detected.
|
||||
const _opts2 = _detectModelOptimizations(repo);
|
||||
panelHtml += `<div class="hwfit-serve-checks hwfit-backend-vllm" style="margin-top:2px;">`;
|
||||
panelHtml += `<div class="hwfit-serve-checks hwfit-backend-vllm">`;
|
||||
if (_opts2.flags.includes('--enable-expert-parallel')) panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="expert_parallel" /> Expert Parallel</label>`;
|
||||
if (_opts2.flags.some(f => f.includes('--reasoning-parser'))) { const rp = _opts2.flags.find(f => f.includes('--reasoning-parser')).split(' ')[1]; panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="reasoning_parser" data-parser="${rp}" /> Reasoning Parser <span class="hwfit-parser-tag">${rp}</span></label>`; }
|
||||
{
|
||||
@@ -764,6 +765,8 @@ function _rerenderCachedModels() {
|
||||
}
|
||||
if (_opts2.envVars.length) panelHtml += `<label class="hwfit-sf-cb"><input type="checkbox" class="hwfit-sf" data-field="moe_env" /> MoE Env Vars</label>`;
|
||||
panelHtml += `</div>`;
|
||||
// ── End Advanced fold ──
|
||||
panelHtml += `</details>`;
|
||||
// Command preview + actions. Wrap the textarea so a floating Copy
|
||||
// button can sit at its top-right corner — same pattern as the chat
|
||||
// run-output panel.
|
||||
@@ -825,27 +828,17 @@ function _rerenderCachedModels() {
|
||||
// model the file lives under "<path>/<repo>" — search there just like we
|
||||
// search the HF snapshots dir, so serving a GGUF from a custom dir works
|
||||
// instead of handing llama.cpp a directory (which fails).
|
||||
const _ldir = m.path
|
||||
? (_isWindows() ? `${m.path.replace(/\//g, '\\')}\\${repo.replace(/\//g, '\\')}` : _shellQuote(`${m.path}/${repo}`))
|
||||
: (_isWindows() ? '' : '""');
|
||||
if (selectedGguf) {
|
||||
f._gguf_path = _selectedGgufExpr(m, repo, selectedGguf.rel_path);
|
||||
} else if (_isWindows()) {
|
||||
// Windows fallback: no bash $() available; validator rejects it.
|
||||
// Return empty so the serve fails with a clear message.
|
||||
f._gguf_path = '';
|
||||
} else if (m.is_local_dir && m.path) {
|
||||
f._gguf_path = `$({ find ${_ldir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${_ldir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
|
||||
} else {
|
||||
f._gguf_path = `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
|
||||
}
|
||||
const _ldir = m.path ? _shellQuote(`${m.path}/${repo}`) : '""';
|
||||
f._gguf_path = selectedGguf
|
||||
? _selectedGgufExpr(m, repo, selectedGguf.rel_path)
|
||||
: m.is_local_dir && m.path
|
||||
? `$({ find ${_ldir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${_ldir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`
|
||||
: `$({ find ${dir} -name '*-00001-of-*.gguf' 2>/dev/null | sort; find ${dir} -name '*.gguf' 2>/dev/null | sort; } | head -1)`;
|
||||
// Vision: auto-find the mmproj (CLIP/projector) file in the same dir.
|
||||
// Resolved at runtime so the toggle just works if an mmproj-*.gguf is
|
||||
// present (downloaded alongside the model). Empty if none → cmd omits it.
|
||||
const _vsearchdir = (m.is_local_dir && m.path) ? _ldir : dir;
|
||||
f._mmproj_path = _isWindows()
|
||||
? (_vsearchdir ? `${_vsearchdir}\\mmproj*.gguf` : '')
|
||||
: `$(find ${_vsearchdir} -iname 'mmproj*.gguf' 2>/dev/null | sort | head -1)`;
|
||||
f._mmproj_path = `$(find ${_vsearchdir} -iname 'mmproj*.gguf' 2>/dev/null | sort | head -1)`;
|
||||
}
|
||||
if (f.reasoning_parser) {
|
||||
const _rpEl2 = panel.querySelector('[data-field="reasoning_parser"]');
|
||||
@@ -886,72 +879,29 @@ function _rerenderCachedModels() {
|
||||
_clampCtx(false); // fix any stale/preset value already present
|
||||
}
|
||||
|
||||
// Auto profiles — fetch hardware-computed llama.cpp profiles and render
|
||||
// them as clickable chips. Clicking one fills the ctx/CPU-MoE/KV/flash
|
||||
// fields and rebuilds the command. Computed from detected VRAM (see
|
||||
// services/hwfit/profiles.py); rough on t/s, accurate on fit.
|
||||
async function _loadServeProfiles() {
|
||||
const wrap = panel.querySelector('.hwfit-profile-btns');
|
||||
if (!wrap) return;
|
||||
// Tighten the ctx slider's upper bound to the model's trained limit.
|
||||
// Asking llama.cpp for ctx > n_ctx_train overflows and, with a quantized
|
||||
// KV cache, can crash the GPU (radv ErrorDeviceLost). The auto-profile
|
||||
// chip row that used to also live here was removed — visual fit with
|
||||
// the rest of the serve panel was off — but this clamp is essential.
|
||||
(async () => {
|
||||
try {
|
||||
const host = (_es.remoteHost || '').trim();
|
||||
const selected = _serverByVal?.(_es.remoteServerKey || host);
|
||||
const params = new URLSearchParams({ model: repo });
|
||||
if (host) {
|
||||
params.set('host', host);
|
||||
const _sp = selected?.port;
|
||||
const _sp = (_es.servers || []).find(s => s.host === host)?.port;
|
||||
if (_sp) params.set('ssh_port', _sp);
|
||||
}
|
||||
// SERVE mode: this is a specific GGUF file already on disk, so its quant
|
||||
// is fixed — tell the profiler the file's real size + quant so it varies
|
||||
// only the serving knobs (KV/ctx/offload), not the quant. Parse the size
|
||||
// from m.size (e.g. "20.6 GB") and the quant from the file/repo name.
|
||||
const _sizeMatch = String(m.size || '').match(/([\d.]+)\s*GB/i);
|
||||
if (_sizeMatch) params.set('serve_weights_gb', _sizeMatch[1]);
|
||||
const _qMatch = String(repo).match(/(Q\d[\w]*|IQ\d[\w]*|F16|BF16|FP8)/i);
|
||||
if (_qMatch) params.set('serve_quant', _qMatch[1]);
|
||||
const res = await fetch(`/api/hwfit/profiles?${params}`);
|
||||
const data = await res.json();
|
||||
// Remember the model's trained context limit and clamp the ctx field
|
||||
// to it — asking llama.cpp for ctx > n_ctx_train overflows and, with a
|
||||
// quantized KV cache, can crash the GPU (radv ErrorDeviceLost).
|
||||
const ctxMax = Number(data && data.model_ctx_max) || 0;
|
||||
if (ctxMax > 0) {
|
||||
panel._modelCtxMax = ctxMax; // tighten the clamp to the real limit
|
||||
_clampCtx(false); // re-apply now that we know the model's max
|
||||
panel._modelCtxMax = ctxMax;
|
||||
_clampCtx(false);
|
||||
}
|
||||
const profs = (data && Array.isArray(data.profiles)) ? data.profiles : [];
|
||||
if (!profs.length) { wrap.innerHTML = `<span style="opacity:0.5;font-size:11px;">no auto profile for this model</span>`; return; }
|
||||
wrap.innerHTML = '';
|
||||
for (const p of profs) {
|
||||
const b = document.createElement('button');
|
||||
b.type = 'button';
|
||||
b.className = 'cookbook-btn hwfit-profile-chip';
|
||||
b.style.cssText = 'height:24px;padding:0 9px;font-size:11px;';
|
||||
const off = p.offloads ? `, ncm${p.n_cpu_moe}` : ', all-GPU';
|
||||
b.textContent = `${p.label} · ${p.quant} · ${Math.round(p.ctx/1024)}k${off}`;
|
||||
b.title = `${p.note}\nKV ${p.cache_type}, ~${p.est_vram_gb} GB VRAM`;
|
||||
b.addEventListener('click', () => {
|
||||
const set = (field, val) => {
|
||||
const el = panel.querySelector(`[data-field="${field}"]`);
|
||||
if (!el) return;
|
||||
if (el.type === 'checkbox') el.checked = !!val; else el.value = val;
|
||||
};
|
||||
set('ctx', p.ctx);
|
||||
set('n_cpu_moe', p.n_cpu_moe || '');
|
||||
set('cache_type', p.cache_type || '');
|
||||
set('flash_attn', true); // required for a quantized KV cache
|
||||
wrap.querySelectorAll('.hwfit-profile-chip').forEach(x => x.classList.remove('cookbook-btn-active'));
|
||||
b.classList.add('cookbook-btn-active');
|
||||
updateCmd();
|
||||
});
|
||||
wrap.appendChild(b);
|
||||
}
|
||||
} catch {
|
||||
wrap.innerHTML = `<span style="opacity:0.5;font-size:11px;">profile compute failed</span>`;
|
||||
}
|
||||
}
|
||||
_loadServeProfiles();
|
||||
} catch { /* clamp falls back to the static default */ }
|
||||
})();
|
||||
|
||||
// Live GPU-memory monitor: poll /api/cookbook/gpus and show VRAM usage +
|
||||
// RAM-spillover, with a plain-language health/speed hint. Lets you tell at
|
||||
@@ -962,11 +912,10 @@ function _rerenderCachedModels() {
|
||||
if (!el || !document.body.contains(el)) return false; // panel closed → stop
|
||||
try {
|
||||
const host = (_es.remoteHost || '').trim();
|
||||
const selected = _serverByVal?.(_es.remoteServerKey || host);
|
||||
const params = new URLSearchParams();
|
||||
if (host) {
|
||||
params.set('host', host);
|
||||
const _sp = selected?.port;
|
||||
const _sp = (_es.servers || []).find(s => s.host === host)?.port;
|
||||
if (_sp) params.set('ssh_port', _sp);
|
||||
}
|
||||
const res = await fetch('/api/cookbook/gpus' + (params.toString() ? '?' + params : ''));
|
||||
@@ -1535,6 +1484,38 @@ function _rerenderCachedModels() {
|
||||
}
|
||||
panel._gpuProbe.byIdx = new Map(data.gpus.map(g => [g.index, g]));
|
||||
panel._gpuProbe.host = remoteHost;
|
||||
// If the probe found more GPUs than the panel originally
|
||||
// rendered (e.g. host switched from a 1-iGPU local box to an
|
||||
// 8-GPU remote), append buttons for the missing indexes so the
|
||||
// user can actually toggle them. Reuse the parent <div> from
|
||||
// the first existing button as the insertion target.
|
||||
try {
|
||||
const _existing = Array.from(panel.querySelectorAll('.cookbook-gpu-btn'));
|
||||
const _grp = _existing[0] && _existing[0].parentElement;
|
||||
if (_grp) {
|
||||
const _have = new Set(_existing.map(b => parseInt(b.dataset.gpu, 10)));
|
||||
const _activeStr = (panel.querySelector('[data-field="gpus"]')?.value || '').split(',').map(s => s.trim());
|
||||
data.gpus.forEach(g => {
|
||||
if (_have.has(g.index)) return;
|
||||
const _b = document.createElement('button');
|
||||
_b.type = 'button';
|
||||
_b.className = 'cookbook-gpu-btn' + (_activeStr.includes(String(g.index)) ? ' active' : '');
|
||||
_b.dataset.gpu = String(g.index);
|
||||
_b.textContent = String(g.index);
|
||||
_grp.appendChild(_b);
|
||||
// Re-wire the click handler the same way the panel did
|
||||
// on first render. Toggles active + rewrites the hidden
|
||||
// gpus input from the live set of active buttons.
|
||||
_b.addEventListener('click', () => {
|
||||
_b.classList.toggle('active');
|
||||
const activeBtns = [...panel.querySelectorAll('.cookbook-gpu-btn.active')];
|
||||
const ids = activeBtns.map(x => x.dataset.gpu).sort((a, b) => +a - +b).join(',');
|
||||
const hidden = panel.querySelector('[data-field="gpus"]');
|
||||
if (hidden) { hidden.value = ids; hidden.dispatchEvent(new Event('change', { bubbles: true })); }
|
||||
});
|
||||
});
|
||||
}
|
||||
} catch (_) {}
|
||||
panel.querySelectorAll('.cookbook-gpu-btn').forEach(b => {
|
||||
const idx = parseInt(b.dataset.gpu);
|
||||
const g = panel._gpuProbe.byIdx.get(idx);
|
||||
@@ -1790,7 +1771,7 @@ function _rerenderCachedModels() {
|
||||
const _probeParams = new URLSearchParams();
|
||||
if (_probeHost) {
|
||||
_probeParams.set('host', _probeHost);
|
||||
const _sp = (_serverByVal?.(_envState.remoteServerKey || _probeHost) || {}).port;
|
||||
const _sp = (_envState.servers || []).find(s => s.host === _probeHost)?.port;
|
||||
if (_sp) _probeParams.set('ssh_port', _sp);
|
||||
}
|
||||
const _probeRes = await fetch('/api/cookbook/gpus' + (_probeParams.toString() ? '?' + _probeParams : ''), { credentials: 'same-origin' });
|
||||
@@ -1861,12 +1842,20 @@ function _rerenderCachedModels() {
|
||||
}
|
||||
// Save in the { _byRepo, _lastUsed } schema — no legacy flat keys at
|
||||
// the root so per-model state doesn't leak between models.
|
||||
// Stamp `_forceBackend: true` so the next open of this model defaults
|
||||
// to the launched configuration end-to-end, even when the detector
|
||||
// would have picked a different backend. Without this flag, the
|
||||
// `savedMatchesBackend` gate inside sv() throws away every saved
|
||||
// value when the detected backend doesn't match — the user opens
|
||||
// Serve again and the panel looks like a fresh form despite a
|
||||
// known-good prior launch.
|
||||
try {
|
||||
let cur = {};
|
||||
try { cur = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {}
|
||||
const byRepo = (cur && cur._byRepo && typeof cur._byRepo === 'object') ? cur._byRepo : {};
|
||||
byRepo[repo] = serveState;
|
||||
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: serveState }));
|
||||
const _saved = { ...serveState, _forceBackend: true };
|
||||
byRepo[repo] = _saved;
|
||||
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _saved }));
|
||||
} catch {}
|
||||
const origEnv = _envState.env;
|
||||
const origEnvPath = _envState.envPath;
|
||||
@@ -1882,7 +1871,8 @@ function _rerenderCachedModels() {
|
||||
if (_ssEl && _ssEl.value != null) {
|
||||
if (_ssEl.value === 'local') serveHost = '';
|
||||
else {
|
||||
const _srv = _serverByVal?.(_ssEl.value) || _envState.servers[parseInt(_ssEl.value)];
|
||||
// Values are host strings now; resolve by host (numeric fallback).
|
||||
const _srv = _envState.servers.find(s => s.host === _ssEl.value) || _envState.servers[parseInt(_ssEl.value)];
|
||||
if (_srv) {
|
||||
serveHost = _srv.host;
|
||||
_srvEnv = _srv.env || '';
|
||||
@@ -1938,10 +1928,24 @@ function _rerenderCachedModels() {
|
||||
function _resolveCacheHost() {
|
||||
let host = _envState.remoteHost || '';
|
||||
const cacheSrv = document.getElementById('hwfit-cache-server');
|
||||
|
||||
function _serverByCacheValue(val) {
|
||||
if (val === 'local') return null;
|
||||
const found = _envState.servers.find(x => x.host === val)
|
||||
|| (/^\d+$/.test(String(val)) ? _envState.servers[parseInt(val)] : null)
|
||||
|| _envState.servers.find(x => x.name === val)
|
||||
|| null;
|
||||
return found || null;
|
||||
}
|
||||
|
||||
if (cacheSrv) {
|
||||
const val = cacheSrv.value;
|
||||
if (val === 'local') host = '';
|
||||
else { const s = _serverByVal?.(val) || _envState.servers[parseInt(val)]; if (s) host = s.host; }
|
||||
if (val === 'local') {
|
||||
host = '';
|
||||
} else {
|
||||
const s = _serverByCacheValue(val);
|
||||
if (s) host = s.host;
|
||||
}
|
||||
}
|
||||
return host;
|
||||
}
|
||||
@@ -2071,8 +2075,12 @@ export async function openServePanelForRepo(repo, fields) {
|
||||
let cur = {};
|
||||
try { cur = JSON.parse(localStorage.getItem(SERVE_STATE_KEY)) || {}; } catch {}
|
||||
const byRepo = (cur && cur._byRepo && typeof cur._byRepo === 'object') ? cur._byRepo : {};
|
||||
byRepo[repo] = fields;
|
||||
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: fields }));
|
||||
// Mirror the launch-time save: stamp _forceBackend so the panel's
|
||||
// sv() helper treats these seeded fields as authoritative, not as
|
||||
// overridable defaults.
|
||||
const _seeded = { ...fields, _forceBackend: true };
|
||||
byRepo[repo] = _seeded;
|
||||
localStorage.setItem(SERVE_STATE_KEY, JSON.stringify({ _byRepo: byRepo, _lastUsed: _seeded }));
|
||||
} catch {}
|
||||
}
|
||||
// Switch to the Serve tab (its click handler triggers _fetchCachedModels).
|
||||
@@ -2099,7 +2107,18 @@ export async function openServePanelForRepo(repo, fields) {
|
||||
.find(el => (el.dataset.repo || '').split('/').pop() === _short);
|
||||
}
|
||||
if (card) {
|
||||
if (!card.classList.contains('doclib-card-expanded')) card.click();
|
||||
// If we were given fields to restore, force a fresh render of the
|
||||
// serve panel so it reads the just-written _byRepo[repo] values
|
||||
// from localStorage. Without this, an already-expanded card kept
|
||||
// its stale form and the "Edit serve" → previous settings round-
|
||||
// trip looked broken from the user's side.
|
||||
if (fields && card.classList.contains('doclib-card-expanded')) {
|
||||
card.click();
|
||||
await new Promise(r => setTimeout(r, 40));
|
||||
card.click();
|
||||
} else if (!card.classList.contains('doclib-card-expanded')) {
|
||||
card.click();
|
||||
}
|
||||
try { card.scrollIntoView({ behavior: 'smooth', block: 'center' }); } catch {}
|
||||
return true;
|
||||
}
|
||||
@@ -2130,6 +2149,14 @@ export async function _fetchCachedModels() {
|
||||
try {
|
||||
let host = _envState.remoteHost || '';
|
||||
let selectedServer = null;
|
||||
const _serverByCacheValue = (val) => {
|
||||
if (val === 'local') return null;
|
||||
return _envState.servers.find(x => x.host === val)
|
||||
|| (/^\d+$/.test(String(val)) ? _envState.servers[parseInt(val)] : null)
|
||||
|| _envState.servers.find(x => x.name === val)
|
||||
|| null;
|
||||
};
|
||||
|
||||
const cacheSrv = document.getElementById('hwfit-cache-server');
|
||||
if (cacheSrv) {
|
||||
const val = cacheSrv.value;
|
||||
@@ -2137,11 +2164,11 @@ export async function _fetchCachedModels() {
|
||||
host = '';
|
||||
selectedServer = _envState.servers.find(s => !s.host || s.host === 'local') || _envState.servers[0];
|
||||
} else {
|
||||
const s = _serverByVal?.(val) || _envState.servers[parseInt(val)];
|
||||
const s = _serverByCacheValue(val);
|
||||
if (s) { host = s.host; selectedServer = s; }
|
||||
}
|
||||
} else {
|
||||
selectedServer = _serverByVal?.(_envState.remoteServerKey || host) || _envState.servers[0];
|
||||
selectedServer = _envState.servers.find(s => s.host === host) || _envState.servers[0];
|
||||
}
|
||||
// Read extra model dirs from the SELECTED server's modelDirs (canonical source)
|
||||
const modelDirs = [];
|
||||
@@ -2171,7 +2198,18 @@ export async function _fetchCachedModels() {
|
||||
if (modelDirs.length) qp.set('model_dir', modelDirs.join(','));
|
||||
const params = qp.toString() ? `?${qp}` : '';
|
||||
const res = await fetch(`/api/model/cached${params}`);
|
||||
if (!res.ok) throw new Error(res.statusText);
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
let msg = '';
|
||||
try {
|
||||
const payload = JSON.parse(body);
|
||||
msg = payload && (payload.detail || payload.error || payload.message);
|
||||
} catch {
|
||||
msg = body;
|
||||
}
|
||||
msg = typeof msg === 'string' ? msg.trim() : '';
|
||||
throw new Error(`HTTP ${res.status} ${res.statusText}${msg ? `: ${msg}` : ''}`);
|
||||
}
|
||||
const data = await res.json();
|
||||
_dlWp.destroy();
|
||||
|
||||
@@ -2268,7 +2306,6 @@ export function initServe(shared) {
|
||||
_envState = shared._envState;
|
||||
_sshCmd = shared._sshCmd;
|
||||
_getPort = shared._getPort;
|
||||
_serverByVal = shared._serverByVal;
|
||||
_sshPrefix = shared._sshPrefix;
|
||||
_getPlatform = shared._getPlatform;
|
||||
_isWindows = shared._isWindows;
|
||||
|
||||
@@ -578,13 +578,12 @@ let _libraryArchivedView = false; // Documents tab showing archived docs?
|
||||
const pieces = [];
|
||||
if (doc.session_name) pieces.push(`<span>${_esc(doc.session_name)}</span>`);
|
||||
if (doc.language && doc.language !== 'text') {
|
||||
const ic = langIcon(doc.language, 11, { style: 'vertical-align:-2px;flex-shrink:0;opacity:0.65;color:currentColor;' });
|
||||
pieces.push(`<span style="display:inline-flex;align-items:center;gap:3px;">${ic}${_esc(doc.language)}</span>`);
|
||||
// Per-language icon lives in the title row above; just the language
|
||||
// name here keeps the meta line scannable without duplicating the icon.
|
||||
pieces.push(`<span>${_esc(doc.language)}</span>`);
|
||||
}
|
||||
pieces.push(`<span>${_esc(libraryRelativeTime(doc.updated_at))}</span>`);
|
||||
meta.innerHTML = pieces.join('<span style="opacity:0.5;">\u00b7</span>');
|
||||
// Strip the per-language icon from the meta line \u2014 it now sits next to the
|
||||
// title above, so duplicating it here was redundant.
|
||||
content.appendChild(meta);
|
||||
card.appendChild(content);
|
||||
|
||||
|
||||
@@ -788,7 +788,7 @@ export function openEmailLibrary(opts = {}) {
|
||||
<div class="admin-card" style="flex:1;flex-direction:column;display:flex;overflow:hidden;">
|
||||
<p class="memory-desc doclib-desc">All emails. Click to open as a document.</p>
|
||||
<div class="email-accounts-row">
|
||||
<div id="email-lib-accounts" style="display:flex;gap:4px;flex-wrap:wrap;flex:1;"></div>
|
||||
<div id="email-lib-accounts" style="display:flex;gap:4px;flex:1;min-width:0;"></div>
|
||||
<button class="memory-toolbar-btn email-compose-jiggle" id="email-lib-compose-btn">
|
||||
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" style="vertical-align:-2px;margin-right:3px;"><rect x="2" y="4" width="20" height="16" rx="2"/><path d="m22 7-8.97 5.7a1.94 1.94 0 0 1-2.06 0L2 7"/></svg>
|
||||
New
|
||||
|
||||
+149
-2
@@ -36,6 +36,17 @@ function linkHtml(text, url) {
|
||||
return `<a href="${escapeHtml(safeUrl)}" target="_blank" rel="noopener noreferrer">${safeText}</a>`;
|
||||
}
|
||||
|
||||
function _isModelEndpointUrl(rawUrl) {
|
||||
try {
|
||||
const parsed = new URL(String(rawUrl || ''), window.location.origin);
|
||||
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false;
|
||||
const path = parsed.pathname.replace(/\/+$/, '');
|
||||
return path === '/v1';
|
||||
} catch (_) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize the raw-HTML fragments that mdToHtml deliberately preserves from
|
||||
* the source text — <details> blocks (collapsible agent output) and <a> tags
|
||||
@@ -327,6 +338,17 @@ function createThinkingSection(thinkingContent, index = 0, thinkingTime = null)
|
||||
`;
|
||||
}
|
||||
|
||||
function createTaskCompletedMarker() {
|
||||
return `
|
||||
<div class="task-completed-marker" role="status" aria-label="Task completed">
|
||||
<span class="task-completed-icon" aria-hidden="true">
|
||||
<svg viewBox="0 0 24 24" width="14" height="14" fill="none" stroke="currentColor" stroke-width="2.6" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg>
|
||||
</span>
|
||||
<span>Task completed</span>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process text and render with thinking sections
|
||||
*/
|
||||
@@ -422,6 +444,9 @@ export function processWithThinking(text) {
|
||||
const { thinkingBlocks, content, thinkingTime } = extractThinkingBlocks(text);
|
||||
|
||||
let html = '';
|
||||
let visibleContent = content || '';
|
||||
const doneOnly = /^\s*\[DONE\]\s*$/i.test(visibleContent);
|
||||
const hadTrailingDone = !doneOnly && /(?:^|\n)\s*\[DONE\]\s*$/i.test(visibleContent);
|
||||
|
||||
// Add thinking sections (collapsed by default)
|
||||
thinkingBlocks.forEach((block, index) => {
|
||||
@@ -429,8 +454,12 @@ export function processWithThinking(text) {
|
||||
});
|
||||
|
||||
// Add the actual content
|
||||
if (content) {
|
||||
html += mdToHtml(content);
|
||||
if (doneOnly) {
|
||||
html += createTaskCompletedMarker();
|
||||
} else {
|
||||
if (hadTrailingDone) visibleContent = visibleContent.replace(/\n?\s*\[DONE\]\s*$/i, '').trimEnd();
|
||||
if (visibleContent) html += mdToHtml(visibleContent);
|
||||
if (hadTrailingDone) html += createTaskCompletedMarker();
|
||||
}
|
||||
|
||||
return _useSvgEmoji() ? svgifyEmoji(html) : html;
|
||||
@@ -885,3 +914,121 @@ document.addEventListener('click', function(e) {
|
||||
start();
|
||||
}
|
||||
})();
|
||||
|
||||
function _endpointNameFromUrl(url) {
|
||||
try {
|
||||
const parsed = new URL(url, window.location.origin);
|
||||
return parsed.host || parsed.hostname || 'Model endpoint';
|
||||
} catch (_) {
|
||||
return 'Model endpoint';
|
||||
}
|
||||
}
|
||||
|
||||
function _appendEndpointAddButtons(root) {
|
||||
if (!root || !root.querySelectorAll) return;
|
||||
const anchors = root.matches?.('a[href]')
|
||||
? [root]
|
||||
: [...root.querySelectorAll('a[href]')];
|
||||
for (const anchor of anchors) {
|
||||
if (anchor.dataset.endpointAddChecked === '1') continue;
|
||||
anchor.dataset.endpointAddChecked = '1';
|
||||
const href = anchor.getAttribute('href') || '';
|
||||
if (!_isModelEndpointUrl(href)) continue;
|
||||
if (anchor.nextElementSibling?.classList?.contains('model-endpoint-add-btn')) continue;
|
||||
|
||||
const btn = document.createElement('button');
|
||||
btn.type = 'button';
|
||||
btn.className = 'model-endpoint-add-btn';
|
||||
btn.dataset.endpointUrl = new URL(href, window.location.origin).href.replace(/\/+$/, '');
|
||||
btn.title = 'Add this OpenAI-compatible endpoint to the model picker';
|
||||
btn.innerHTML = '<span aria-hidden="true">+</span><span>Add to model picker</span>';
|
||||
anchor.insertAdjacentElement('afterend', btn);
|
||||
}
|
||||
}
|
||||
|
||||
async function _registerEndpointFromButton(btn) {
|
||||
const baseUrl = String(btn?.dataset?.endpointUrl || '').trim();
|
||||
if (!baseUrl || !_isModelEndpointUrl(baseUrl)) return;
|
||||
const original = btn.innerHTML;
|
||||
btn.disabled = true;
|
||||
btn.innerHTML = '<span aria-hidden="true">...</span><span>Adding</span>';
|
||||
try {
|
||||
const existingRes = await fetch('/api/model-endpoints', { credentials: 'same-origin' });
|
||||
if (existingRes.ok) {
|
||||
const endpoints = await existingRes.json();
|
||||
const existing = Array.isArray(endpoints)
|
||||
? endpoints.find((ep) => String(ep.base_url || '').replace(/\/+$/, '') === baseUrl)
|
||||
: null;
|
||||
if (existing) {
|
||||
btn.classList.add('added');
|
||||
btn.innerHTML = '<span aria-hidden="true">✓</span><span>Already added</span>';
|
||||
window.dispatchEvent(new CustomEvent('ge:model-endpoints-updated', { detail: { baseUrl } }));
|
||||
if (window.modelsModule?.refreshModels) window.modelsModule.refreshModels(true);
|
||||
if (window.sessionModule?.updateModelPicker) window.sessionModule.updateModelPicker();
|
||||
uiModule.showToast?.(`Already in model picker: ${existing.name || _endpointNameFromUrl(baseUrl)}`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const parsed = new URL(baseUrl, window.location.origin);
|
||||
const fd = new FormData();
|
||||
fd.append('base_url', baseUrl);
|
||||
fd.append('name', _endpointNameFromUrl(baseUrl));
|
||||
fd.append('model_type', 'llm');
|
||||
fd.append('endpoint_kind', 'auto');
|
||||
fd.append('skip_probe', 'true');
|
||||
if (/^(localhost|127\.0\.0\.1|0\.0\.0\.0)$/i.test(parsed.hostname)) {
|
||||
fd.append('container_local', 'true');
|
||||
}
|
||||
const res = await fetch('/api/model-endpoints', {
|
||||
method: 'POST',
|
||||
credentials: 'same-origin',
|
||||
body: fd,
|
||||
});
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new Error(`HTTP ${res.status}${body ? ': ' + body.slice(0, 160) : ''}`);
|
||||
}
|
||||
btn.classList.add('added');
|
||||
btn.innerHTML = '<span aria-hidden="true">✓</span><span>Added</span>';
|
||||
window.dispatchEvent(new CustomEvent('ge:model-endpoints-updated', { detail: { baseUrl } }));
|
||||
if (window.modelsModule?.refreshModels) await window.modelsModule.refreshModels(true);
|
||||
if (window.sessionModule?.updateModelPicker) window.sessionModule.updateModelPicker();
|
||||
uiModule.showToast?.(`Model endpoint added: ${_endpointNameFromUrl(baseUrl)}`);
|
||||
} catch (err) {
|
||||
btn.disabled = false;
|
||||
btn.innerHTML = original;
|
||||
uiModule.showError?.(`Add endpoint failed: ${err.message || err}`);
|
||||
}
|
||||
}
|
||||
|
||||
(function _watchModelEndpointLinks() {
|
||||
if (window._modelEndpointLinkWatcherWired) return;
|
||||
window._modelEndpointLinkWatcherWired = true;
|
||||
|
||||
document.addEventListener('click', (e) => {
|
||||
const btn = e.target.closest?.('.model-endpoint-add-btn');
|
||||
if (!btn) return;
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
_registerEndpointFromButton(btn);
|
||||
});
|
||||
|
||||
const start = () => {
|
||||
const root = document.body;
|
||||
if (!root) return;
|
||||
_appendEndpointAddButtons(root);
|
||||
new MutationObserver((mutations) => {
|
||||
for (const m of mutations) {
|
||||
for (const node of m.addedNodes) {
|
||||
if (node.nodeType === 1) _appendEndpointAddButtons(node);
|
||||
}
|
||||
}
|
||||
}).observe(root, { childList: true, subtree: true });
|
||||
};
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', start, { once: true });
|
||||
} else {
|
||||
start();
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -327,13 +327,10 @@ function _initModelPickerDropdown() {
|
||||
// hover so the suffix/variant tag is still discoverable (#1982).
|
||||
nameSpan.title = m.display;
|
||||
row.appendChild(nameSpan);
|
||||
if (m.stale) {
|
||||
const badge = document.createElement('span');
|
||||
badge.className = 'model-switch-stale-badge';
|
||||
badge.textContent = 'offline';
|
||||
badge.style.cssText = 'font-size:10px;opacity:0.7;padding:1px 6px;border:1px solid var(--border);border-radius:8px;margin-left:6px;';
|
||||
row.appendChild(badge);
|
||||
}
|
||||
// Offline state is already conveyed by the row's reduced opacity —
|
||||
// a redundant "offline" pill on top of that just added clutter.
|
||||
// (Class kept on `row` so the opacity rule still applies; the text
|
||||
// badge is gone.)
|
||||
const epSpan = document.createElement('span');
|
||||
epSpan.className = 'model-switch-ep';
|
||||
// Don't show endpoint name if it matches the model name (local self-hosted)
|
||||
|
||||
+8
-1
@@ -178,7 +178,14 @@ export async function refreshModels(force = false) {
|
||||
_loadingSpinner.start();
|
||||
try {
|
||||
if (!_fetchInflight) {
|
||||
_fetchInflight = fetch(`${API_BASE}/api/models`, { credentials: 'same-origin' })
|
||||
// Pass ?refresh=true on forced refreshes so the BACKEND's 30s
|
||||
// per-user cache also gets bypassed. Without this, `force=true`
|
||||
// only clears the frontend cache and the same stale list comes
|
||||
// back — newly-served endpoints don't appear until the cache
|
||||
// ages out. (Bug repro: serve a model, picker is empty for ~30s
|
||||
// even though the endpoint is in the DB and online.)
|
||||
const _url = `${API_BASE}/api/models` + (force ? '?refresh=true' : '');
|
||||
_fetchInflight = fetch(_url, { credentials: 'same-origin' })
|
||||
.then(async (res) => {
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
return res.json();
|
||||
|
||||
@@ -1559,6 +1559,7 @@ async function initResearchSearchSettings() {
|
||||
async function initAgentSettings() {
|
||||
var toolsInput = el('set-agentMaxTools');
|
||||
var roundsInput = el('set-agentMaxRounds');
|
||||
var supInput = el('set-agentSupervisorLadder');
|
||||
var msg = el('set-agentMsg');
|
||||
if (!toolsInput) return;
|
||||
|
||||
@@ -1567,6 +1568,7 @@ async function initAgentSettings() {
|
||||
var settings = await res.json();
|
||||
if (settings.agent_max_tool_calls) toolsInput.value = settings.agent_max_tool_calls;
|
||||
if (roundsInput && settings.agent_max_rounds) roundsInput.value = settings.agent_max_rounds;
|
||||
if (supInput) supInput.checked = !!settings.agent_supervisor_ladder;
|
||||
} catch (e) {}
|
||||
|
||||
// Clamp + coerce a raw input to an int in [lo, hi]; falls back to `dflt`
|
||||
@@ -1584,23 +1586,27 @@ async function initAgentSettings() {
|
||||
if (roundsInput) roundsInput.value = rounds;
|
||||
var payload = { agent_max_tool_calls: tools };
|
||||
if (rounds != null) payload.agent_max_rounds = rounds;
|
||||
if (supInput) payload.agent_supervisor_ladder = !!supInput.checked;
|
||||
try {
|
||||
await fetch('/api/auth/settings', { method: 'POST', credentials: 'same-origin',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
msg.textContent = (tools > 0 ? 'Limit: ' + tools + ' tool calls' : 'Unlimited tool calls') +
|
||||
(rounds != null ? ' · ' + rounds + ' steps/message' : '');
|
||||
(rounds != null ? ' · ' + rounds + ' steps/message' : '') +
|
||||
(supInput && supInput.checked ? ' · supervisor on' : '');
|
||||
msg.style.color = 'var(--fg)';
|
||||
} catch (e) { msg.textContent = 'Failed to save'; msg.style.color = 'var(--red)'; }
|
||||
}
|
||||
|
||||
toolsInput.addEventListener('change', save);
|
||||
if (roundsInput) roundsInput.addEventListener('change', save);
|
||||
if (supInput) supInput.addEventListener('change', save);
|
||||
var cur = parseInt(toolsInput.value, 10) || 0;
|
||||
var curR = roundsInput ? (parseInt(roundsInput.value, 10) || 20) : null;
|
||||
msg.textContent = (cur > 0 ? 'Limit: ' + cur + ' tool calls' : 'Unlimited tool calls') +
|
||||
(curR != null ? ' · ' + curR + ' steps/message' : '');
|
||||
(curR != null ? ' · ' + curR + ' steps/message' : '') +
|
||||
(supInput && supInput.checked ? ' · supervisor on' : '');
|
||||
}
|
||||
|
||||
/* ═══════════════════════════════════════════
|
||||
|
||||
+4
-4
@@ -890,10 +890,10 @@ function renderSkillsList() {
|
||||
});
|
||||
}
|
||||
|
||||
// Background-load the visible skills' SKILL.md so expanding any of them is
|
||||
// instant (no first-time async fetch → no jump). Deferred so it never
|
||||
// competes with the render/cascade paint.
|
||||
setTimeout(_preloadVisibleMarkdown, 0);
|
||||
// Do not eager-load every visible SKILL.md. On large skill libraries this
|
||||
// creates dozens of simultaneous /api/skills/<name>/markdown requests during
|
||||
// app startup and can peg uvicorn. Markdown is fetched lazily when a card is
|
||||
// expanded.
|
||||
}
|
||||
|
||||
// ---- Card expand / edit / actions ----
|
||||
|
||||
+266
-14
@@ -2048,12 +2048,64 @@ body.bg-pattern-sparkles {
|
||||
.msg-user .body {
|
||||
color: var(--fg);
|
||||
}
|
||||
.msg-ai .body {
|
||||
color: var(--fg);
|
||||
}
|
||||
.rag-sources {
|
||||
margin-top: 12px;
|
||||
border: 1px solid var(--border);
|
||||
.msg-ai .body {
|
||||
color: var(--fg);
|
||||
}
|
||||
.model-endpoint-add-btn {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
margin-left: 7px;
|
||||
padding: 2px 7px;
|
||||
border: 1px solid color-mix(in srgb, var(--red) 34%, var(--border));
|
||||
border-radius: 999px;
|
||||
background: color-mix(in srgb, var(--red) 8%, transparent);
|
||||
color: var(--red);
|
||||
font: inherit;
|
||||
font-size: 0.78em;
|
||||
line-height: 1.45;
|
||||
cursor: pointer;
|
||||
vertical-align: 1px;
|
||||
}
|
||||
.model-endpoint-add-btn:hover {
|
||||
background: color-mix(in srgb, var(--red) 14%, transparent);
|
||||
border-color: color-mix(in srgb, var(--red) 55%, var(--border));
|
||||
}
|
||||
.model-endpoint-add-btn:disabled {
|
||||
cursor: default;
|
||||
opacity: 0.72;
|
||||
}
|
||||
.model-endpoint-add-btn.added {
|
||||
color: var(--color-save-green, #4caf50);
|
||||
border-color: color-mix(in srgb, var(--color-save-green, #4caf50) 45%, var(--border));
|
||||
background: color-mix(in srgb, var(--color-save-green, #4caf50) 9%, transparent);
|
||||
}
|
||||
.task-completed-marker {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 7px;
|
||||
margin: 7px 0 2px;
|
||||
padding: 5px 9px;
|
||||
border: 1px solid color-mix(in srgb, var(--color-save-green, #4caf50) 42%, var(--border));
|
||||
border-radius: 999px;
|
||||
background: color-mix(in srgb, var(--color-save-green, #4caf50) 9%, transparent);
|
||||
color: var(--color-save-green, #4caf50);
|
||||
font-size: 0.86em;
|
||||
font-weight: 600;
|
||||
}
|
||||
.task-completed-icon {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 17px;
|
||||
height: 17px;
|
||||
border-radius: 50%;
|
||||
background: color-mix(in srgb, var(--color-save-green, #4caf50) 18%, transparent);
|
||||
flex: 0 0 auto;
|
||||
}
|
||||
.rag-sources {
|
||||
margin-top: 12px;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
padding: 8px;
|
||||
font-size: 12px;
|
||||
@@ -2182,7 +2234,7 @@ body.bg-pattern-sparkles {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
right: 0;
|
||||
z-index: 2;
|
||||
z-index: 250;
|
||||
transform-origin: top right;
|
||||
transition: opacity 0.22s ease, transform 0.22s ease;
|
||||
will-change: opacity, transform;
|
||||
@@ -2704,7 +2756,7 @@ body.bg-pattern-sparkles {
|
||||
position: absolute;
|
||||
bottom: calc(100% + 16px);
|
||||
right: 0;
|
||||
z-index: 300;
|
||||
z-index: 250;
|
||||
min-width: 260px;
|
||||
max-width: 360px;
|
||||
background: var(--panel);
|
||||
@@ -8367,6 +8419,14 @@ body.hide-thinking .thinking-section { display: none !important; }
|
||||
transition: background 0.2s ease;
|
||||
}
|
||||
|
||||
.thinking-header > .token-new {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.thinking-header > div:last-child {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.thinking-header:hover {
|
||||
background: color-mix(in srgb, var(--red) 12%, transparent);
|
||||
}
|
||||
@@ -8382,6 +8442,7 @@ body.hide-thinking .thinking-section { display: none !important; }
|
||||
min-width: 0;
|
||||
}
|
||||
.thinking-header-left span {
|
||||
display: block;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
@@ -8760,6 +8821,22 @@ body.hide-thinking .thinking-section { display: none !important; }
|
||||
.agent-thread-node + .agent-thread-node {
|
||||
margin-top: 2px;
|
||||
}
|
||||
/* Supervisor ladder cards — same chrome as tool cards but tinted so the
|
||||
user can tell at a glance "this is the agent recovering" vs "this is
|
||||
the agent doing work". Stop rung gets the red accent. */
|
||||
.agent-thread-node.supervisor-step .agent-thread-tool {
|
||||
color: color-mix(in srgb, var(--accent, #c08a3e) 80%, var(--fg));
|
||||
font-style: italic;
|
||||
}
|
||||
.agent-thread-node.supervisor-step .agent-thread-dot {
|
||||
background: color-mix(in srgb, var(--accent, #c08a3e) 60%, transparent);
|
||||
}
|
||||
.agent-thread-node.supervisor-step[data-rung="stop"] .agent-thread-tool {
|
||||
color: var(--red, #d65a5a);
|
||||
}
|
||||
.agent-thread-node.supervisor-step[data-rung="stop"] .agent-thread-dot {
|
||||
background: color-mix(in srgb, var(--red, #d65a5a) 60%, transparent);
|
||||
}
|
||||
.agent-thread-dot {
|
||||
position: absolute;
|
||||
left: -20px;
|
||||
@@ -15144,10 +15221,28 @@ body.right-dock-active:not(.email-doc-split-active) .doc-editor-pane {
|
||||
}
|
||||
}
|
||||
|
||||
/* Cookbook's cached-model list should scale with viewport height, not be capped at 400px */
|
||||
/* Cookbook's cached-model list: NO inner-scroll cap. Two nested scroll
|
||||
surfaces (this + the outer .admin-card) trapped the wheel so an expanded
|
||||
serve panel couldn't be reached on tall content. Let the outer
|
||||
.admin-card (overflow-y:auto) be the single scroll surface. */
|
||||
.hwfit-cached-list {
|
||||
max-height: min(75vh, 900px) !important;
|
||||
overflow-y: auto;
|
||||
max-height: none !important;
|
||||
overflow-y: visible !important;
|
||||
}
|
||||
/* Serve panel specifically: the admin-card inline style is
|
||||
`overflow:hidden` (so the toolbar/header don't drift), and the list
|
||||
inside has overflow:visible. On short windows that combination
|
||||
clipped the cards off the bottom with no scrollbar. Make the list
|
||||
itself the scroll surface so the rest of the card stays put. */
|
||||
.cookbook-group[data-backend-group="Serve"] > .admin-card {
|
||||
min-height: 0;
|
||||
}
|
||||
.cookbook-group[data-backend-group="Serve"] > .admin-card > #hwfit-cached-list,
|
||||
.cookbook-group[data-backend-group="Serve"] > .admin-card > .hwfit-cached-list {
|
||||
flex: 1 1 0;
|
||||
min-height: 0;
|
||||
overflow-y: auto !important;
|
||||
overscroll-behavior: contain;
|
||||
}
|
||||
/* Drag-and-drop visual hint for the email compose pane. Subtle accent
|
||||
outline + tinted overlay so it's obvious files will attach if dropped. */
|
||||
@@ -17924,8 +18019,11 @@ body.gallery-selecting .gallery-dl-btn,
|
||||
}
|
||||
#cookbook-modal .cookbook-group > .admin-card {
|
||||
min-height: 0;
|
||||
overflow-y: auto !important;
|
||||
overflow-x: hidden !important;
|
||||
/* Let .cookbook-body be the SINGLE scroll surface. Nesting another
|
||||
overflow:auto here trapped the wheel inside the cached-list when a
|
||||
serve panel expanded — the page couldn't scroll past the panel's
|
||||
bottom (Launch button got hidden). */
|
||||
overflow: visible !important;
|
||||
}
|
||||
#cookbook-modal .cookbook-section-body {
|
||||
min-height: 0;
|
||||
@@ -18733,6 +18831,13 @@ body.gallery-selecting .gallery-dl-btn,
|
||||
justify-content: flex-end;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
/* When the Save split sits inside Row 1 (next to GPUs), align it with the
|
||||
input baseline (the row's grid cells stretch top-down; without this the
|
||||
Save buttons sit above the GPU button group). */
|
||||
.hwfit-serve-row .cookbook-serve-slots {
|
||||
align-self: end;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.cookbook-slot-btn {
|
||||
min-width: 22px; height: 22px;
|
||||
padding: 0 6px;
|
||||
@@ -20207,6 +20312,21 @@ body.gallery-selecting .gallery-dl-btn,
|
||||
background: color-mix(in srgb, var(--color-error) 8%, transparent);
|
||||
border: 1px solid color-mix(in srgb, var(--color-error) 30%, transparent);
|
||||
border-radius: 6px;
|
||||
/* The diagnosis body can carry traceback fragments and long unbroken
|
||||
paths (e.g. /home/.../snapshots/<sha>/<file>.gguf). Without these,
|
||||
a single long token pushes the card wider than the cookbook modal,
|
||||
scrolling the row right and clipping the action buttons. */
|
||||
min-width: 0;
|
||||
max-width: 100%;
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
.cookbook-diagnosis pre,
|
||||
.cookbook-diagnosis code {
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
overflow-wrap: anywhere;
|
||||
max-width: 100%;
|
||||
}
|
||||
.cookbook-diag-header {
|
||||
display: flex;
|
||||
@@ -20400,6 +20520,14 @@ body.gallery-selecting .gallery-dl-btn,
|
||||
opacity: 0.5;
|
||||
font-family: inherit;
|
||||
}
|
||||
/* Brief border+glow flash when an Ollama row in the hwfit list autofills the
|
||||
Download input — helps the user see what landed when the input is offscreen
|
||||
or above a tall list. */
|
||||
.cookbook-dl-repo.cookbook-dl-flash {
|
||||
border-color: var(--red) !important;
|
||||
box-shadow: 0 0 0 3px color-mix(in srgb, var(--red) 25%, transparent) !important;
|
||||
transition: border-color 0.2s, box-shadow 0.2s;
|
||||
}
|
||||
.cookbook-dl-btn {
|
||||
background: var(--accent, var(--red));
|
||||
color: #fff;
|
||||
@@ -22446,6 +22574,88 @@ input.settings-select::placeholder { color: color-mix(in srgb, var(--fg) 35%, tr
|
||||
text-align: right;
|
||||
}
|
||||
.settings-fallback-row .settings-select { flex: 1; min-width: 0; }
|
||||
/* Cookbook Serve Advanced fold — wraps the rarely-touched tuning rows
|
||||
(KV/Attention/Swap/Env for vLLM, llama.cpp batch/cache/split, VRAM
|
||||
monitor, speculative, extra args). Matches the existing .hwfit-panel-
|
||||
advanced look: muted-gray label, no caps, no letter-spacing, no
|
||||
warning-y opacity. Content flows into the parent's existing scroll
|
||||
surface (no inner max-height) and inner rows reset their margin so
|
||||
stacking gaps don't double when the fold opens. */
|
||||
/* Styled to match the Add Models page collapsible sections
|
||||
(.adm-section-toggle) — same border/background/caret pattern, so the
|
||||
two folds across the app read consistently. */
|
||||
details.hwfit-serve-advanced {
|
||||
margin-top: 8px;
|
||||
overflow: visible;
|
||||
}
|
||||
details.hwfit-serve-advanced > summary.hwfit-serve-advanced-summary {
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
list-style: none;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
font-size: 11px;
|
||||
color: var(--fg);
|
||||
opacity: 0.8;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
padding: 6px 9px;
|
||||
background: color-mix(in srgb, var(--fg) 4%, transparent);
|
||||
transition: border-color 0.12s, background 0.12s, opacity 0.12s, border-radius 0s;
|
||||
}
|
||||
details.hwfit-serve-advanced > summary.hwfit-serve-advanced-summary::-webkit-details-marker {
|
||||
display: none;
|
||||
}
|
||||
details.hwfit-serve-advanced > summary.hwfit-serve-advanced-summary:hover {
|
||||
opacity: 1;
|
||||
border-color: var(--red);
|
||||
background: color-mix(in srgb, var(--red) 8%, transparent);
|
||||
}
|
||||
/* Caret on the right, rotates open/closed. SVG-style rectangles via
|
||||
borders keep this glyph-free + crisp at small sizes. */
|
||||
details.hwfit-serve-advanced > summary.hwfit-serve-advanced-summary::after {
|
||||
content: '';
|
||||
margin-left: auto;
|
||||
width: 0;
|
||||
height: 0;
|
||||
border-left: 4px solid currentColor;
|
||||
border-top: 3px solid transparent;
|
||||
border-bottom: 3px solid transparent;
|
||||
opacity: 0.6;
|
||||
transform: rotate(90deg);
|
||||
transition: transform 0.18s ease;
|
||||
}
|
||||
details.hwfit-serve-advanced:not([open]) > summary.hwfit-serve-advanced-summary::after {
|
||||
transform: rotate(0deg);
|
||||
}
|
||||
/* Body rows below the header — tight rhythm so the fold doesn't
|
||||
feel airy. The cookbook modal's existing .cookbook-body is the
|
||||
scroll surface; nothing inside the fold should add its own scroll. */
|
||||
details.hwfit-serve-advanced[open] > summary.hwfit-serve-advanced-summary {
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
details.hwfit-serve-advanced > .hwfit-serve-row,
|
||||
details.hwfit-serve-advanced > .hwfit-serve-checks,
|
||||
details.hwfit-serve-advanced > .hwfit-serve-cmd-wrap,
|
||||
details.hwfit-serve-advanced > .hwfit-serve-extra {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
/* Pull the vLLM/SGLang checks row, Extra args, and the trailing
|
||||
model-specific (Speculative) checks row up tight against the row
|
||||
above — the previous 4px gap plus per-row baseline padding left a
|
||||
~8px gap that read as too airy in the Advanced fold. */
|
||||
details.hwfit-serve-advanced > .hwfit-serve-checks.hwfit-backend-vllm,
|
||||
details.hwfit-serve-advanced > .hwfit-serve-checks.hwfit-backend-sglang,
|
||||
details.hwfit-serve-advanced > .hwfit-serve-extra {
|
||||
margin-top: -8px;
|
||||
}
|
||||
details.hwfit-serve-advanced > .hwfit-serve-row:last-of-type,
|
||||
details.hwfit-serve-advanced > .hwfit-serve-checks:last-of-type {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.settings-fallback-remove {
|
||||
flex-shrink: 0;
|
||||
margin-right: 4px;
|
||||
@@ -22463,6 +22673,9 @@ input.settings-select::placeholder { color: color-mix(in srgb, var(--fg) 35%, tr
|
||||
transition: border-color 0.12s, color 0.12s, background 0.12s;
|
||||
position: relative;
|
||||
top: -6px;
|
||||
/* Glyph baseline trim: nudge × up 1px inside the button without moving the
|
||||
button. line-height < 1 lets the glyph float toward the top of its line box. */
|
||||
line-height: 0.85;
|
||||
}
|
||||
.settings-fallback-remove:hover {
|
||||
border-color: var(--red);
|
||||
@@ -33593,7 +33806,24 @@ button.cal-add-btn.cal-add-btn-text.cal-add-btn-sm:hover .cal-add-label {
|
||||
/* Only the direct-child compose button gets pushed right; nested chips
|
||||
inside #email-lib-accounts pack to the left as normal flex items. */
|
||||
.email-accounts-row > .memory-toolbar-btn { flex-shrink: 0; margin-left: auto; }
|
||||
#email-lib-accounts { justify-content: flex-start; }
|
||||
#email-lib-accounts { justify-content: flex-start; flex-wrap: wrap; }
|
||||
/* Mobile: collapse the account chips to a single horizontally-scrollable
|
||||
strip instead of stacking onto multiple rows. The compose "New" button
|
||||
stays outside the scroller (it's a sibling of #email-lib-accounts inside
|
||||
.email-accounts-row) so it remains pinned on the right. */
|
||||
@media (max-width: 768px) {
|
||||
#email-lib-accounts {
|
||||
flex-wrap: nowrap;
|
||||
overflow-x: auto;
|
||||
overflow-y: hidden;
|
||||
scrollbar-width: none;
|
||||
-ms-overflow-style: none;
|
||||
scroll-snap-type: x proximity;
|
||||
-webkit-overflow-scrolling: touch;
|
||||
}
|
||||
#email-lib-accounts::-webkit-scrollbar { display: none; height: 0; }
|
||||
#email-lib-accounts > * { flex-shrink: 0; scroll-snap-align: start; }
|
||||
}
|
||||
.email-accounts-loading-whirlpool {
|
||||
width: 14px;
|
||||
height: 14px;
|
||||
@@ -36198,6 +36428,16 @@ body.theme-frosted .modal {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
/* Mobile: drop the inline icons on Launch + Cancel in the serve panel so
|
||||
the buttons are text-only and don't wrap on narrow screens. Icons stay
|
||||
on desktop where horizontal space isn't tight. */
|
||||
@media (max-width: 600px) {
|
||||
.hwfit-serve-launch > svg,
|
||||
.hwfit-serve-cancel > svg {
|
||||
display: none !important;
|
||||
}
|
||||
}
|
||||
|
||||
/* Schedule form — mounted inside the cookbook serve panel. Uses the
|
||||
theme tokens (--bg, --panel, --border, --accent, --red) so it
|
||||
matches the rest of the cookbook chrome instead of inline whites. */
|
||||
@@ -36249,6 +36489,18 @@ body.theme-frosted .modal {
|
||||
flex-wrap: wrap;
|
||||
gap: 5px;
|
||||
}
|
||||
/* Days field inline with From / Until — push it + the action buttons to
|
||||
the right end of the row so the row reads: From | Until | …gap… | Days | Cancel | Save. */
|
||||
.hwfit-schedule-days-field {
|
||||
margin-left: auto;
|
||||
}
|
||||
.hwfit-schedule-actions-inline {
|
||||
display: inline-flex;
|
||||
align-items: flex-end;
|
||||
gap: 6px;
|
||||
align-self: flex-end;
|
||||
padding-bottom: 1px;
|
||||
}
|
||||
.hwfit-sched-day-chip {
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
|
||||
Reference in New Issue
Block a user