diff --git a/routes/skills_routes.py b/routes/skills_routes.py index 6894a13d7..705502e48 100644 --- a/routes/skills_routes.py +++ b/routes/skills_routes.py @@ -11,6 +11,8 @@ import logging import re from typing import List, Optional +import httpx + from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel, Field @@ -51,6 +53,10 @@ class SkillAddRequest(BaseModel): steps: List[str] = Field(default_factory=list) +class SkillImportUrlRequest(BaseModel): + url: str = Field(..., min_length=8, max_length=2000) + + class SkillUpdateRequest(BaseModel): name: Optional[str] = None description: Optional[str] = None @@ -1203,6 +1209,36 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter: save_settings(settings) return {"ok": True, "name": name, "is_overridden": False} + @router.post("/import-from-url") + async def import_skill_from_url(request: Request, body: SkillImportUrlRequest): + """Install a SKILL.md bundle from a public GitHub URL (skills.sh links supported).""" + require_admin(request) + user = _owner(request) + from services.memory.skill_importer import ( + SkillImportError, + fetch_skill_bundle, + ) + + try: + files, _src = fetch_skill_bundle(body.url.strip()) + entry = skills_manager.import_bundle_from_files( + files, + owner=user, + source_url=body.url.strip(), + ) + except SkillImportError as e: + raise HTTPException(400, str(e)) from e + except httpx.HTTPError as e: + logger.warning("skill import fetch failed: %s", e) + detail = str(e).strip() or "Could not download skill from URL" + raise HTTPException(502, detail) from e + except Exception as e: + logger.error("skill import failed: %s", e) + raise HTTPException(500, "Skill import failed") from e + + _fire_skill_added(user) + return {"ok": True, "skill": entry, "files": len(files)} + @router.post("/add") async def add_skill(request: Request, body: SkillAddRequest): user = _owner(request) diff --git a/services/memory/skill_importer.py b/services/memory/skill_importer.py new file mode 100644 index 000000000..65f4b21c0 --- /dev/null +++ b/services/memory/skill_importer.py @@ -0,0 +1,283 @@ +"""Import SKILL.md bundles from public GitHub (or skills.sh → GitHub) URLs.""" +from __future__ import annotations + +import logging +import os +import re +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple +from urllib.parse import quote, urlparse + +import httpx + +from src.url_safety import check_outbound_url + +logger = logging.getLogger(__name__) + +MAX_FILES = 64 +MAX_TOTAL_BYTES = 2_000_000 +MAX_FILE_BYTES = 400_000 +ALLOWED_SUFFIXES = ( + ".md", ".txt", ".json", ".yaml", ".yml", ".py", ".sh", ".toml", + ".js", ".ts", ".css", ".html", ".xml", ".csv", +) +TEXT_NAMES = {"skill.md", "license", "license.md", "readme.md"} +_GITHUB_HOSTS = frozenset({ + "github.com", "www.github.com", "api.github.com", "raw.githubusercontent.com", +}) + + +def _github_host(url: str) -> str: + return (urlparse(str(url)).hostname or "").lower() + + +def _assert_github_url(url: str, *, context: str = "URL") -> None: + host = _github_host(url) + if host not in _GITHUB_HOSTS: + raise SkillImportError( + f"{context} must stay on GitHub (got {host or 'unknown host'})" + ) + + +@dataclass +class ResolvedSource: + owner: str + repo: str + ref: str + path: str # directory or file path inside repo (no leading slash) + + +class SkillImportError(ValueError): + pass + + +def _safe_relpath(rel: str) -> str: + rel = (rel or "").replace("\\", "/").strip().lstrip("/") + if not rel or rel.startswith("..") or "/../" in f"/{rel}/": + raise SkillImportError(f"unsafe path: {rel!r}") + parts = [p for p in rel.split("/") if p and p != "."] + if any(p == ".." for p in parts): + raise SkillImportError(f"unsafe path: {rel!r}") + return "/".join(parts) + + +def _is_text_file(name: str) -> bool: + low = name.lower() + if low in TEXT_NAMES: + return True + return any(low.endswith(s) for s in ALLOWED_SUFFIXES) + + +def parse_skill_source(url: str) -> ResolvedSource: + """Normalize skills.sh / GitHub web URLs into owner/repo/ref/path.""" + raw = (url or "").strip() + if not raw: + raise SkillImportError("URL is required") + + # skills.sh often links to GitHub; try to unwrap ?url= or redirect target later. + if "skills.sh" in raw and "github.com" not in raw: + ok, reason = check_outbound_url(raw) + if not ok: + raise SkillImportError(reason) + with httpx.Client(follow_redirects=True, timeout=20.0) as client: + r = client.get(raw) + if r.status_code >= 400: + raise _github_response_error(r) + final = str(r.url) + _assert_github_url(final, context="redirect target") + # Page may embed a github link; prefer final URL if redirected. + if "github.com" in final: + raw = final + else: + m = re.search(r"https?://github\.com/[^\s\"')]+", r.text or "") + if m: + raw = m.group(0).rstrip(".,)") + + parsed = urlparse(raw) + host = _github_host(raw) + if host not in _GITHUB_HOSTS: + raise SkillImportError( + "Only GitHub URLs are supported (https://github.com/... or raw.githubusercontent.com/...)" + ) + + if host == "raw.githubusercontent.com": + # /owner/repo/ref/path/to/file + bits = [p for p in parsed.path.split("/") if p] + if len(bits) < 4: + raise SkillImportError("Invalid raw GitHub URL") + owner, repo, ref = bits[0], bits[1], bits[2] + path = "/".join(bits[3:]) + return ResolvedSource(owner=owner, repo=repo, ref=ref, path=path) + + bits = [p for p in parsed.path.split("/") if p] + if len(bits) < 2: + raise SkillImportError("Invalid GitHub URL") + owner, repo = bits[0], bits[1] + ref = "main" + path = "" + + if len(bits) >= 4 and bits[2] in ("tree", "blob"): + ref = bits[3] + path = "/".join(bits[4:]) + elif len(bits) == 2: + path = "" + else: + raise SkillImportError("GitHub URL must include /tree//... or /blob//...") + + return ResolvedSource(owner=owner, repo=repo, ref=ref, path=path) + + +def _raw_url(src: ResolvedSource, rel_path: str) -> str: + rel = _safe_relpath(rel_path) + return f"https://raw.githubusercontent.com/{src.owner}/{src.repo}/{quote(src.ref, safe='')}/{quote(rel, safe='/')}" + + +def _api_contents_url(src: ResolvedSource, rel_path: str = "") -> str: + rel = _safe_relpath(rel_path) if rel_path else "" + base = f"https://api.github.com/repos/{src.owner}/{src.repo}/contents" + if rel: + base += f"/{quote(rel, safe='/')}" + return f"{base}?ref={quote(src.ref, safe='')}" + + +def _github_response_error(response: httpx.Response) -> SkillImportError: + """Turn a failed GitHub HTTP response into a user-visible import error.""" + status = response.status_code + detail = "" + try: + body = response.json() + if isinstance(body, dict): + detail = str(body.get("message") or "").strip() + except Exception: + detail = (response.text or "").strip()[:200] + + low = detail.lower() + if status == 403 and "rate limit" in low: + return SkillImportError( + "GitHub API rate limit exceeded — try again in a bit" + + (f" ({detail})" if detail else "") + ) + if status == 404: + return SkillImportError("path not found on GitHub") + if detail: + return SkillImportError(f"GitHub request failed ({status}): {detail}") + return SkillImportError(f"GitHub request failed ({status})") + + +def _fetch_bytes(url: str) -> bytes: + ok, reason = check_outbound_url(url) + if not ok: + raise SkillImportError(reason) + with httpx.Client(follow_redirects=True, timeout=30.0) as client: + r = client.get(url, headers={"Accept": "application/vnd.github+json"}) + if r.status_code >= 400: + raise _github_response_error(r) + _assert_github_url(str(r.url), context="redirect target") + if len(r.content) > MAX_FILE_BYTES: + raise SkillImportError(f"file too large: {url}") + return r.content + + +def _fetch_text(url: str) -> str: + data = _fetch_bytes(url) + try: + return data.decode("utf-8") + except UnicodeDecodeError as e: + raise SkillImportError(f"non-text file: {url}") from e + + +def _list_github_dir(src: ResolvedSource, rel_dir: str, out: Dict[str, str], *, depth: int = 0) -> None: + if depth > 4 or len(out) >= MAX_FILES: + return + url = _api_contents_url(src, rel_dir) + ok, reason = check_outbound_url(url) + if not ok: + raise SkillImportError(reason) + with httpx.Client(follow_redirects=True, timeout=30.0) as client: + r = client.get(url, headers={"Accept": "application/vnd.github+json"}) + if r.status_code >= 400: + raise _github_response_error(r) + _assert_github_url(str(r.url), context="redirect target") + entries = r.json() + if not isinstance(entries, list): + raise SkillImportError("expected a directory on GitHub") + total = sum(len(v.encode("utf-8")) for v in out.values()) + for ent in entries: + if len(out) >= MAX_FILES or total >= MAX_TOTAL_BYTES: + break + if not isinstance(ent, dict): + continue + name = ent.get("name") or "" + ent_type = ent.get("type") + rel = _safe_relpath(f"{rel_dir}/{name}" if rel_dir else name) + if ent_type == "dir": + _list_github_dir(src, rel, out, depth=depth + 1) + total = sum(len(v.encode("utf-8")) for v in out.values()) + continue + if ent_type != "file" or not _is_text_file(name): + continue + dl = ent.get("download_url") + if not dl: + continue + _assert_github_url(dl, context="download URL") + text = _fetch_text(dl) + total += len(text.encode("utf-8")) + if total > MAX_TOTAL_BYTES: + raise SkillImportError("skill bundle exceeds size limit") + out[rel] = text + + +def fetch_skill_bundle(url: str) -> Tuple[Dict[str, str], ResolvedSource]: + """Download SKILL.md and sibling text assets. Returns relative_path → content.""" + src = parse_skill_source(url) + files: Dict[str, str] = {} + + path = _safe_relpath(src.path) if src.path else "" + if path.lower().endswith("skill.md"): + files[path] = _fetch_text(_raw_url(src, path)) + parent = "/".join(path.split("/")[:-1]) + if parent: + try: + _list_github_dir(src, parent, files) + except SkillImportError: + pass + return files, src + + if path: + try: + _fetch_text(_raw_url(src, f"{path}/SKILL.md")) + _list_github_dir(src, path, files) + return files, src + except Exception: + pass + try: + text = _fetch_text(_raw_url(src, path)) + if path.lower().endswith(".md"): + files[path] = text + return files, src + except Exception: + pass + _list_github_dir(src, path, files) + else: + _list_github_dir(src, "", files) + + if not any(p.lower().endswith("skill.md") for p in files): + # Flat repo root with SKILL.md only + try: + files["SKILL.md"] = _fetch_text(_raw_url(src, "SKILL.md")) + except Exception as e: + raise SkillImportError( + "No SKILL.md found — link to a skill folder or SKILL.md on GitHub" + ) from e + return files, src + + +def pick_skill_md(files: Dict[str, str]) -> Tuple[str, str]: + for rel, content in files.items(): + if rel.lower().endswith("skill.md"): + return rel, content + raise SkillImportError("bundle has no SKILL.md") + + +def default_category_from_source(src: ResolvedSource) -> str: + return "imported" diff --git a/services/memory/skills.py b/services/memory/skills.py index 87f74d57c..9cfe801e1 100644 --- a/services/memory/skills.py +++ b/services/memory/skills.py @@ -381,6 +381,54 @@ class SkillsManager: return sk.to_dict() + def import_bundle_from_files( + self, + files: Dict[str, str], + *, + owner: Optional[str] = None, + source_url: str = "", + category: str = "imported", + ) -> Dict: + """Install a fetched skill bundle (relative path → text) under skills/.""" + from .skill_importer import SkillImportError, pick_skill_md, _safe_relpath + from core.atomic_io import atomic_write_text + + if not files: + raise SkillImportError("empty bundle") + _rel, skill_md = pick_skill_md(files) + sk = Skill.from_markdown(skill_md) + nm = slugify(sk.name or _rel.split("/")[-2] or "skill") + cat = slugify(category or sk.category or "imported", fallback="imported") + + existing = {s["name"] for s in self.load_all()} + base = nm + i = 2 + while nm in existing: + nm = f"{base}-{i}" + i += 1 + + skill_dir = self._skill_dir(cat, nm) + os.makedirs(skill_dir, exist_ok=True) + + # Preserve bundle layout (templates/, references/, etc.) under the skill dir. + for rel, content in files.items(): + safe = _safe_relpath(rel) + dest = os.path.join(skill_dir, safe) + os.makedirs(os.path.dirname(dest), exist_ok=True) + atomic_write_text(dest, content) + + sk.name = nm + sk.category = cat + sk.owner = owner + sk.source = "imported" + if source_url: + extra = (sk.body_extra or "").strip() + note = f"Imported from {source_url}" + sk.body_extra = f"{extra}\n\n{note}".strip() if extra else note + atomic_write_text(self._skill_file(cat, nm), sk.to_markdown()) + sk.path = self._skill_file(cat, nm) + return sk.to_dict() + def update_skill(self, skill_id: str, updates: Dict, owner: Optional[str] = None) -> bool: """`skill_id` is the slug name. Allows updating any field plus renames if `name` changes (file is moved on disk). diff --git a/static/index.html b/static/index.html index 22cdfdaae..3d5bad58c 100644 --- a/static/index.html +++ b/static/index.html @@ -314,7 +314,15 @@

Add Skill

-

Create a skill by hand — title, what it solves, and an approach.

+

Import a skill from GitHub or skills.sh (folder with SKILL.md and optional templates).

+
+
+ + Import URL — e.g. GitHub tree link to a skill folder +
+ +
+

Or create a skill by hand — title, what it solves, and an approach.

Title — short name, e.g. “build-vllm-wheel” @@ -332,7 +340,7 @@ Tags — comma-separated, e.g. python, build, vllm
- +
diff --git a/static/js/skills.js b/static/js/skills.js index afb7475fc..f9c522afd 100644 --- a/static/js/skills.js +++ b/static/js/skills.js @@ -1818,6 +1818,35 @@ async function _showSkillSource(name) { }); } +async function importSkillFromUrl() { + const input = document.getElementById('skill-import-url'); + const url = (input?.value || '').trim(); + if (!url) { + uiModule.showError('Paste a GitHub or skills.sh URL first'); + return; + } + const btn = document.getElementById('skill-import-url-btn'); + if (btn) btn.disabled = true; + try { + const res = await fetch(`${API}/api/skills/import-from-url`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url }), + }); + const data = await res.json().catch(() => ({})); + if (!res.ok) throw new Error(data.detail || data.error || `HTTP ${res.status}`); + if (input) input.value = ''; + await loadSkills(); + const name = data.skill?.name || 'skill'; + uiModule.showToast(`Imported ${name} (${data.files || 1} file(s))`); + if (name) openSkill(name); + } catch (err) { + uiModule.showError('Import failed: ' + err.message); + } finally { + if (btn) btn.disabled = false; + } +} + async function addSkill() { const name = document.getElementById('new-skill-name')?.value.trim() || document.getElementById('new-skill-title')?.value.trim(); @@ -1866,6 +1895,10 @@ async function addSkill() { } document.addEventListener('DOMContentLoaded', () => { + document.getElementById('skill-import-url-btn')?.addEventListener('click', importSkillFromUrl); + document.getElementById('skill-import-url')?.addEventListener('keydown', (e) => { + if (e.key === 'Enter') importSkillFromUrl(); + }); document.getElementById('add-skill-btn')?.addEventListener('click', addSkill); document.getElementById('skills-search')?.addEventListener('input', renderSkillsList); document.getElementById('skills-sort')?.addEventListener('change', (e) => { diff --git a/static/style.css b/static/style.css index c7a21637c..60d2d470f 100644 --- a/static/style.css +++ b/static/style.css @@ -10126,6 +10126,15 @@ details a:hover { height: 32px; } +/* Skill Import beside URL field — match input height; cancel modal-body button margin. */ +.memory-add-row .theme-io-btn { + flex: none; + height: 28px; + box-sizing: border-box; + margin-top: 0; + padding: 5px 10px; +} + .memory-add-input { flex: 1; height: 28px; diff --git a/tests/test_skill_importer.py b/tests/test_skill_importer.py new file mode 100644 index 000000000..eecca614f --- /dev/null +++ b/tests/test_skill_importer.py @@ -0,0 +1,178 @@ +"""Skill URL importer — GitHub path parsing.""" +import pytest + +from services.memory.skill_importer import ( + ResolvedSource, + SkillImportError, + _assert_github_url, + _fetch_bytes, + _list_github_dir, + parse_skill_source, +) + + +def test_parse_github_blob_skill_md(): + src = parse_skill_source( + "https://github.com/anthropics/skills/blob/main/skills/pdf/SKILL.md" + ) + assert src.owner == "anthropics" + assert src.repo == "skills" + assert src.ref == "main" + assert src.path.endswith("skills/pdf/SKILL.md") + + +def test_parse_github_tree_directory(): + src = parse_skill_source( + "https://github.com/example/my-skills/tree/develop/caveman-skill" + ) + assert src.owner == "example" + assert src.repo == "my-skills" + assert src.ref == "develop" + assert src.path == "caveman-skill" + + +def test_parse_raw_github(): + src = parse_skill_source( + "https://raw.githubusercontent.com/o/r/main/path/SKILL.md" + ) + assert src.owner == "o" + assert src.repo == "r" + assert src.ref == "main" + assert src.path == "path/SKILL.md" + + +def test_rejects_non_github(): + with pytest.raises(SkillImportError): + parse_skill_source("https://example.com/skill.md") + + +def test_fetch_bytes_rejects_cross_host_redirect(monkeypatch): + class _Resp: + url = "https://evil.example/secret" + status_code = 200 + content = b"x" + + def raise_for_status(self): + return None + + class _Client: + def __init__(self, *args, **kwargs): + pass + + def __enter__(self): + return self + + def __exit__(self, *args): + return False + + def get(self, url, headers=None): + return _Resp() + + monkeypatch.setattr("services.memory.skill_importer.httpx.Client", _Client) + monkeypatch.setattr( + "services.memory.skill_importer.check_outbound_url", + lambda url: (True, ""), + ) + with pytest.raises(SkillImportError, match="redirect target"): + _fetch_bytes("https://raw.githubusercontent.com/o/r/main/SKILL.md") + + +def test_assert_github_url_allows_api_host(): + _assert_github_url( + "https://api.github.com/repos/o/r/contents?ref=main", + context="redirect target", + ) + + +def test_list_github_dir_accepts_api_github_response(monkeypatch): + monkeypatch.setattr( + "services.memory.skill_importer._fetch_text", + lambda url: "# skill\n", + ) + monkeypatch.setattr( + "services.memory.skill_importer.check_outbound_url", + lambda url: (True, ""), + ) + + class _Resp: + url = "https://api.github.com/repos/o/r/contents?ref=main" + status_code = 200 + + def raise_for_status(self): + return None + + def json(self): + return [{ + "name": "SKILL.md", + "type": "file", + "download_url": "https://raw.githubusercontent.com/o/r/main/SKILL.md", + }] + + class _Client: + def __init__(self, *args, **kwargs): + pass + + def __enter__(self): + return self + + def __exit__(self, *args): + return False + + def get(self, url, headers=None): + return _Resp() + + monkeypatch.setattr("services.memory.skill_importer.httpx.Client", _Client) + + out = {} + src = ResolvedSource(owner="o", repo="r", ref="main", path="") + _list_github_dir(src, "", out) + assert "SKILL.md" in out + + +def _mock_httpx_client(monkeypatch, response): + class _Client: + def __init__(self, *args, **kwargs): + pass + + def __enter__(self): + return self + + def __exit__(self, *args): + return False + + def get(self, url, headers=None): + return response + + monkeypatch.setattr("services.memory.skill_importer.httpx.Client", _Client) + monkeypatch.setattr( + "services.memory.skill_importer.check_outbound_url", + lambda url: (True, ""), + ) + + +def test_list_github_dir_surfaces_rate_limit(monkeypatch): + class _Resp: + url = "https://api.github.com/repos/o/r/contents?ref=main" + status_code = 403 + + def json(self): + return {"message": "API rate limit exceeded for 203.0.113.1"} + + _mock_httpx_client(monkeypatch, _Resp()) + src = ResolvedSource(owner="o", repo="r", ref="main", path="") + with pytest.raises(SkillImportError, match="rate limit"): + _list_github_dir(src, "", {}) + + +def test_fetch_bytes_surfaces_github_error_detail(monkeypatch): + class _Resp: + url = "https://raw.githubusercontent.com/o/r/main/SKILL.md" + status_code = 403 + content = b"" + + def json(self): + return {"message": "Forbidden"} + + _mock_httpx_client(monkeypatch, _Resp()) + with pytest.raises(SkillImportError, match="GitHub request failed \\(403\\): Forbidden"): + _fetch_bytes("https://raw.githubusercontent.com/o/r/main/SKILL.md")