feat(skills): import SKILL.md bundles from public GitHub URLs (#2576)

* feat(skills): import SKILL.md bundles from public GitHub URLs

Supports GitHub tree/blob/raw links and skills.sh pages that resolve to GitHub.
Installs SKILL.md plus sibling text assets under data/skills/imported/.

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(skills): admin-gate URL import and validate redirect hosts

- require_admin on POST /api/skills/import-from-url (matches other skill admin routes)
- reject cross-host redirects after httpx follow_redirects
- test for redirect host validation

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(skills): match Brain Add panel import/submit button styles

- Skill URL Import: theme-io-btn + download icon (same as memory Import)
- Add Skill submit: confirm-btn confirm-btn-primary

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(skills): allow api.github.com during directory import

Real imports hit the GitHub contents API after redirects; whitelist
api.github.com and add regression tests. Shrink Import button with flex:none.

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(skills): align skill Import button with URL input row

Match memory-add-input height (28px) in memory-add-row and center the
download icon with flexbox instead of vertical-align hacks.

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(skills): cancel modal-body margin on skill Import button

The skill Import button sits in .memory-add-row beside an input; the
global .modal-body button { margin-top: 6px } rule only affected buttons,
pushing Import down and misaligning the download icon. Reset margin-top
and match Memory Import SVG markup at 28px row height.

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(skills): surface GitHub API errors on URL import

Pass through GitHub response messages (especially 403 rate limits) as
SkillImportError instead of a generic download failure.

Co-authored-by: Cursor <cursoragent@cursor.com>

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Giulio Zelante
2026-06-05 19:48:23 +02:00
committed by GitHub
parent 977daf0643
commit b448119919
7 changed files with 597 additions and 2 deletions
+36
View File
@@ -11,6 +11,8 @@ import logging
import re
from typing import List, Optional
import httpx
from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel, Field
@@ -51,6 +53,10 @@ class SkillAddRequest(BaseModel):
steps: List[str] = Field(default_factory=list)
class SkillImportUrlRequest(BaseModel):
url: str = Field(..., min_length=8, max_length=2000)
class SkillUpdateRequest(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
@@ -1203,6 +1209,36 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
save_settings(settings)
return {"ok": True, "name": name, "is_overridden": False}
@router.post("/import-from-url")
async def import_skill_from_url(request: Request, body: SkillImportUrlRequest):
"""Install a SKILL.md bundle from a public GitHub URL (skills.sh links supported)."""
require_admin(request)
user = _owner(request)
from services.memory.skill_importer import (
SkillImportError,
fetch_skill_bundle,
)
try:
files, _src = fetch_skill_bundle(body.url.strip())
entry = skills_manager.import_bundle_from_files(
files,
owner=user,
source_url=body.url.strip(),
)
except SkillImportError as e:
raise HTTPException(400, str(e)) from e
except httpx.HTTPError as e:
logger.warning("skill import fetch failed: %s", e)
detail = str(e).strip() or "Could not download skill from URL"
raise HTTPException(502, detail) from e
except Exception as e:
logger.error("skill import failed: %s", e)
raise HTTPException(500, "Skill import failed") from e
_fire_skill_added(user)
return {"ok": True, "skill": entry, "files": len(files)}
@router.post("/add")
async def add_skill(request: Request, body: SkillAddRequest):
user = _owner(request)
+283
View File
@@ -0,0 +1,283 @@
"""Import SKILL.md bundles from public GitHub (or skills.sh → GitHub) URLs."""
from __future__ import annotations
import logging
import os
import re
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
from urllib.parse import quote, urlparse
import httpx
from src.url_safety import check_outbound_url
logger = logging.getLogger(__name__)
MAX_FILES = 64
MAX_TOTAL_BYTES = 2_000_000
MAX_FILE_BYTES = 400_000
ALLOWED_SUFFIXES = (
".md", ".txt", ".json", ".yaml", ".yml", ".py", ".sh", ".toml",
".js", ".ts", ".css", ".html", ".xml", ".csv",
)
TEXT_NAMES = {"skill.md", "license", "license.md", "readme.md"}
_GITHUB_HOSTS = frozenset({
"github.com", "www.github.com", "api.github.com", "raw.githubusercontent.com",
})
def _github_host(url: str) -> str:
return (urlparse(str(url)).hostname or "").lower()
def _assert_github_url(url: str, *, context: str = "URL") -> None:
host = _github_host(url)
if host not in _GITHUB_HOSTS:
raise SkillImportError(
f"{context} must stay on GitHub (got {host or 'unknown host'})"
)
@dataclass
class ResolvedSource:
owner: str
repo: str
ref: str
path: str # directory or file path inside repo (no leading slash)
class SkillImportError(ValueError):
pass
def _safe_relpath(rel: str) -> str:
rel = (rel or "").replace("\\", "/").strip().lstrip("/")
if not rel or rel.startswith("..") or "/../" in f"/{rel}/":
raise SkillImportError(f"unsafe path: {rel!r}")
parts = [p for p in rel.split("/") if p and p != "."]
if any(p == ".." for p in parts):
raise SkillImportError(f"unsafe path: {rel!r}")
return "/".join(parts)
def _is_text_file(name: str) -> bool:
low = name.lower()
if low in TEXT_NAMES:
return True
return any(low.endswith(s) for s in ALLOWED_SUFFIXES)
def parse_skill_source(url: str) -> ResolvedSource:
"""Normalize skills.sh / GitHub web URLs into owner/repo/ref/path."""
raw = (url or "").strip()
if not raw:
raise SkillImportError("URL is required")
# skills.sh often links to GitHub; try to unwrap ?url= or redirect target later.
if "skills.sh" in raw and "github.com" not in raw:
ok, reason = check_outbound_url(raw)
if not ok:
raise SkillImportError(reason)
with httpx.Client(follow_redirects=True, timeout=20.0) as client:
r = client.get(raw)
if r.status_code >= 400:
raise _github_response_error(r)
final = str(r.url)
_assert_github_url(final, context="redirect target")
# Page may embed a github link; prefer final URL if redirected.
if "github.com" in final:
raw = final
else:
m = re.search(r"https?://github\.com/[^\s\"')]+", r.text or "")
if m:
raw = m.group(0).rstrip(".,)")
parsed = urlparse(raw)
host = _github_host(raw)
if host not in _GITHUB_HOSTS:
raise SkillImportError(
"Only GitHub URLs are supported (https://github.com/... or raw.githubusercontent.com/...)"
)
if host == "raw.githubusercontent.com":
# /owner/repo/ref/path/to/file
bits = [p for p in parsed.path.split("/") if p]
if len(bits) < 4:
raise SkillImportError("Invalid raw GitHub URL")
owner, repo, ref = bits[0], bits[1], bits[2]
path = "/".join(bits[3:])
return ResolvedSource(owner=owner, repo=repo, ref=ref, path=path)
bits = [p for p in parsed.path.split("/") if p]
if len(bits) < 2:
raise SkillImportError("Invalid GitHub URL")
owner, repo = bits[0], bits[1]
ref = "main"
path = ""
if len(bits) >= 4 and bits[2] in ("tree", "blob"):
ref = bits[3]
path = "/".join(bits[4:])
elif len(bits) == 2:
path = ""
else:
raise SkillImportError("GitHub URL must include /tree/<branch>/... or /blob/<branch>/...")
return ResolvedSource(owner=owner, repo=repo, ref=ref, path=path)
def _raw_url(src: ResolvedSource, rel_path: str) -> str:
rel = _safe_relpath(rel_path)
return f"https://raw.githubusercontent.com/{src.owner}/{src.repo}/{quote(src.ref, safe='')}/{quote(rel, safe='/')}"
def _api_contents_url(src: ResolvedSource, rel_path: str = "") -> str:
rel = _safe_relpath(rel_path) if rel_path else ""
base = f"https://api.github.com/repos/{src.owner}/{src.repo}/contents"
if rel:
base += f"/{quote(rel, safe='/')}"
return f"{base}?ref={quote(src.ref, safe='')}"
def _github_response_error(response: httpx.Response) -> SkillImportError:
"""Turn a failed GitHub HTTP response into a user-visible import error."""
status = response.status_code
detail = ""
try:
body = response.json()
if isinstance(body, dict):
detail = str(body.get("message") or "").strip()
except Exception:
detail = (response.text or "").strip()[:200]
low = detail.lower()
if status == 403 and "rate limit" in low:
return SkillImportError(
"GitHub API rate limit exceeded — try again in a bit"
+ (f" ({detail})" if detail else "")
)
if status == 404:
return SkillImportError("path not found on GitHub")
if detail:
return SkillImportError(f"GitHub request failed ({status}): {detail}")
return SkillImportError(f"GitHub request failed ({status})")
def _fetch_bytes(url: str) -> bytes:
ok, reason = check_outbound_url(url)
if not ok:
raise SkillImportError(reason)
with httpx.Client(follow_redirects=True, timeout=30.0) as client:
r = client.get(url, headers={"Accept": "application/vnd.github+json"})
if r.status_code >= 400:
raise _github_response_error(r)
_assert_github_url(str(r.url), context="redirect target")
if len(r.content) > MAX_FILE_BYTES:
raise SkillImportError(f"file too large: {url}")
return r.content
def _fetch_text(url: str) -> str:
data = _fetch_bytes(url)
try:
return data.decode("utf-8")
except UnicodeDecodeError as e:
raise SkillImportError(f"non-text file: {url}") from e
def _list_github_dir(src: ResolvedSource, rel_dir: str, out: Dict[str, str], *, depth: int = 0) -> None:
if depth > 4 or len(out) >= MAX_FILES:
return
url = _api_contents_url(src, rel_dir)
ok, reason = check_outbound_url(url)
if not ok:
raise SkillImportError(reason)
with httpx.Client(follow_redirects=True, timeout=30.0) as client:
r = client.get(url, headers={"Accept": "application/vnd.github+json"})
if r.status_code >= 400:
raise _github_response_error(r)
_assert_github_url(str(r.url), context="redirect target")
entries = r.json()
if not isinstance(entries, list):
raise SkillImportError("expected a directory on GitHub")
total = sum(len(v.encode("utf-8")) for v in out.values())
for ent in entries:
if len(out) >= MAX_FILES or total >= MAX_TOTAL_BYTES:
break
if not isinstance(ent, dict):
continue
name = ent.get("name") or ""
ent_type = ent.get("type")
rel = _safe_relpath(f"{rel_dir}/{name}" if rel_dir else name)
if ent_type == "dir":
_list_github_dir(src, rel, out, depth=depth + 1)
total = sum(len(v.encode("utf-8")) for v in out.values())
continue
if ent_type != "file" or not _is_text_file(name):
continue
dl = ent.get("download_url")
if not dl:
continue
_assert_github_url(dl, context="download URL")
text = _fetch_text(dl)
total += len(text.encode("utf-8"))
if total > MAX_TOTAL_BYTES:
raise SkillImportError("skill bundle exceeds size limit")
out[rel] = text
def fetch_skill_bundle(url: str) -> Tuple[Dict[str, str], ResolvedSource]:
"""Download SKILL.md and sibling text assets. Returns relative_path → content."""
src = parse_skill_source(url)
files: Dict[str, str] = {}
path = _safe_relpath(src.path) if src.path else ""
if path.lower().endswith("skill.md"):
files[path] = _fetch_text(_raw_url(src, path))
parent = "/".join(path.split("/")[:-1])
if parent:
try:
_list_github_dir(src, parent, files)
except SkillImportError:
pass
return files, src
if path:
try:
_fetch_text(_raw_url(src, f"{path}/SKILL.md"))
_list_github_dir(src, path, files)
return files, src
except Exception:
pass
try:
text = _fetch_text(_raw_url(src, path))
if path.lower().endswith(".md"):
files[path] = text
return files, src
except Exception:
pass
_list_github_dir(src, path, files)
else:
_list_github_dir(src, "", files)
if not any(p.lower().endswith("skill.md") for p in files):
# Flat repo root with SKILL.md only
try:
files["SKILL.md"] = _fetch_text(_raw_url(src, "SKILL.md"))
except Exception as e:
raise SkillImportError(
"No SKILL.md found — link to a skill folder or SKILL.md on GitHub"
) from e
return files, src
def pick_skill_md(files: Dict[str, str]) -> Tuple[str, str]:
for rel, content in files.items():
if rel.lower().endswith("skill.md"):
return rel, content
raise SkillImportError("bundle has no SKILL.md")
def default_category_from_source(src: ResolvedSource) -> str:
return "imported"
+48
View File
@@ -381,6 +381,54 @@ class SkillsManager:
return sk.to_dict()
def import_bundle_from_files(
self,
files: Dict[str, str],
*,
owner: Optional[str] = None,
source_url: str = "",
category: str = "imported",
) -> Dict:
"""Install a fetched skill bundle (relative path → text) under skills/."""
from .skill_importer import SkillImportError, pick_skill_md, _safe_relpath
from core.atomic_io import atomic_write_text
if not files:
raise SkillImportError("empty bundle")
_rel, skill_md = pick_skill_md(files)
sk = Skill.from_markdown(skill_md)
nm = slugify(sk.name or _rel.split("/")[-2] or "skill")
cat = slugify(category or sk.category or "imported", fallback="imported")
existing = {s["name"] for s in self.load_all()}
base = nm
i = 2
while nm in existing:
nm = f"{base}-{i}"
i += 1
skill_dir = self._skill_dir(cat, nm)
os.makedirs(skill_dir, exist_ok=True)
# Preserve bundle layout (templates/, references/, etc.) under the skill dir.
for rel, content in files.items():
safe = _safe_relpath(rel)
dest = os.path.join(skill_dir, safe)
os.makedirs(os.path.dirname(dest), exist_ok=True)
atomic_write_text(dest, content)
sk.name = nm
sk.category = cat
sk.owner = owner
sk.source = "imported"
if source_url:
extra = (sk.body_extra or "").strip()
note = f"Imported from {source_url}"
sk.body_extra = f"{extra}\n\n{note}".strip() if extra else note
atomic_write_text(self._skill_file(cat, nm), sk.to_markdown())
sk.path = self._skill_file(cat, nm)
return sk.to_dict()
def update_skill(self, skill_id: str, updates: Dict, owner: Optional[str] = None) -> bool:
"""`skill_id` is the slug name. Allows updating any field plus
renames if `name` changes (file is moved on disk).
+10 -2
View File
@@ -314,7 +314,15 @@
<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">
<h2 style="margin:0;padding:0;line-height:1;"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:6px"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg>Add Skill</h2>
</div>
<p class="memory-desc doclib-desc" style="margin-top:6px;">Create a skill by hand — title, what it solves, and an approach.</p>
<p class="memory-desc doclib-desc" style="margin-top:6px;">Import a skill from GitHub or <a href="https://skills.sh" target="_blank" rel="noopener noreferrer">skills.sh</a> (folder with <code>SKILL.md</code> and optional templates).</p>
<div class="memory-add-row" style="margin-top:6px;margin-bottom:10px;">
<div class="skill-ph-wrap" style="flex:1;min-width:0;">
<input type="url" id="skill-import-url" placeholder=" " class="memory-add-input skill-hint-input" aria-label="Skill import URL" />
<span class="skill-rich-ph"><span class="k">Import URL</span> — e.g. GitHub tree link to a skill folder</span>
</div>
<button type="button" id="skill-import-url-btn" class="theme-io-btn" title="Import skill from URL" style="flex:none;height:28px;font-size:12px;"><svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:4px;" aria-hidden="true"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="7 10 12 15 17 10"/><line x1="12" y1="15" x2="12" y2="3"/></svg>Import</button>
</div>
<p class="memory-desc doclib-desc" style="margin-top:0;">Or create a skill by hand — title, what it solves, and an approach.</p>
<div class="skill-ph-wrap" style="margin-top:4px;margin-bottom:6px;">
<input type="text" id="new-skill-title" placeholder=" " class="memory-add-input skill-hint-input" aria-label="Skill title" />
<span class="skill-rich-ph"><span class="k">Title</span> — short name, e.g. “build-vllm-wheel”</span>
@@ -332,7 +340,7 @@
<span class="skill-rich-ph"><span class="k">Tags</span> — comma-separated, e.g. python, build, vllm</span>
</div>
<div style="display:flex;justify-content:flex-end;">
<button id="add-skill-btn" class="memory-toolbar-btn">Add Skill</button>
<button id="add-skill-btn" class="confirm-btn confirm-btn-primary">Add Skill</button>
</div>
</div>
</div>
+33
View File
@@ -1818,6 +1818,35 @@ async function _showSkillSource(name) {
});
}
async function importSkillFromUrl() {
const input = document.getElementById('skill-import-url');
const url = (input?.value || '').trim();
if (!url) {
uiModule.showError('Paste a GitHub or skills.sh URL first');
return;
}
const btn = document.getElementById('skill-import-url-btn');
if (btn) btn.disabled = true;
try {
const res = await fetch(`${API}/api/skills/import-from-url`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url }),
});
const data = await res.json().catch(() => ({}));
if (!res.ok) throw new Error(data.detail || data.error || `HTTP ${res.status}`);
if (input) input.value = '';
await loadSkills();
const name = data.skill?.name || 'skill';
uiModule.showToast(`Imported ${name} (${data.files || 1} file(s))`);
if (name) openSkill(name);
} catch (err) {
uiModule.showError('Import failed: ' + err.message);
} finally {
if (btn) btn.disabled = false;
}
}
async function addSkill() {
const name = document.getElementById('new-skill-name')?.value.trim()
|| document.getElementById('new-skill-title')?.value.trim();
@@ -1866,6 +1895,10 @@ async function addSkill() {
}
document.addEventListener('DOMContentLoaded', () => {
document.getElementById('skill-import-url-btn')?.addEventListener('click', importSkillFromUrl);
document.getElementById('skill-import-url')?.addEventListener('keydown', (e) => {
if (e.key === 'Enter') importSkillFromUrl();
});
document.getElementById('add-skill-btn')?.addEventListener('click', addSkill);
document.getElementById('skills-search')?.addEventListener('input', renderSkillsList);
document.getElementById('skills-sort')?.addEventListener('change', (e) => {
+9
View File
@@ -10126,6 +10126,15 @@ details a:hover {
height: 32px;
}
/* Skill Import beside URL field — match input height; cancel modal-body button margin. */
.memory-add-row .theme-io-btn {
flex: none;
height: 28px;
box-sizing: border-box;
margin-top: 0;
padding: 5px 10px;
}
.memory-add-input {
flex: 1;
height: 28px;
+178
View File
@@ -0,0 +1,178 @@
"""Skill URL importer — GitHub path parsing."""
import pytest
from services.memory.skill_importer import (
ResolvedSource,
SkillImportError,
_assert_github_url,
_fetch_bytes,
_list_github_dir,
parse_skill_source,
)
def test_parse_github_blob_skill_md():
src = parse_skill_source(
"https://github.com/anthropics/skills/blob/main/skills/pdf/SKILL.md"
)
assert src.owner == "anthropics"
assert src.repo == "skills"
assert src.ref == "main"
assert src.path.endswith("skills/pdf/SKILL.md")
def test_parse_github_tree_directory():
src = parse_skill_source(
"https://github.com/example/my-skills/tree/develop/caveman-skill"
)
assert src.owner == "example"
assert src.repo == "my-skills"
assert src.ref == "develop"
assert src.path == "caveman-skill"
def test_parse_raw_github():
src = parse_skill_source(
"https://raw.githubusercontent.com/o/r/main/path/SKILL.md"
)
assert src.owner == "o"
assert src.repo == "r"
assert src.ref == "main"
assert src.path == "path/SKILL.md"
def test_rejects_non_github():
with pytest.raises(SkillImportError):
parse_skill_source("https://example.com/skill.md")
def test_fetch_bytes_rejects_cross_host_redirect(monkeypatch):
class _Resp:
url = "https://evil.example/secret"
status_code = 200
content = b"x"
def raise_for_status(self):
return None
class _Client:
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, *args):
return False
def get(self, url, headers=None):
return _Resp()
monkeypatch.setattr("services.memory.skill_importer.httpx.Client", _Client)
monkeypatch.setattr(
"services.memory.skill_importer.check_outbound_url",
lambda url: (True, ""),
)
with pytest.raises(SkillImportError, match="redirect target"):
_fetch_bytes("https://raw.githubusercontent.com/o/r/main/SKILL.md")
def test_assert_github_url_allows_api_host():
_assert_github_url(
"https://api.github.com/repos/o/r/contents?ref=main",
context="redirect target",
)
def test_list_github_dir_accepts_api_github_response(monkeypatch):
monkeypatch.setattr(
"services.memory.skill_importer._fetch_text",
lambda url: "# skill\n",
)
monkeypatch.setattr(
"services.memory.skill_importer.check_outbound_url",
lambda url: (True, ""),
)
class _Resp:
url = "https://api.github.com/repos/o/r/contents?ref=main"
status_code = 200
def raise_for_status(self):
return None
def json(self):
return [{
"name": "SKILL.md",
"type": "file",
"download_url": "https://raw.githubusercontent.com/o/r/main/SKILL.md",
}]
class _Client:
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, *args):
return False
def get(self, url, headers=None):
return _Resp()
monkeypatch.setattr("services.memory.skill_importer.httpx.Client", _Client)
out = {}
src = ResolvedSource(owner="o", repo="r", ref="main", path="")
_list_github_dir(src, "", out)
assert "SKILL.md" in out
def _mock_httpx_client(monkeypatch, response):
class _Client:
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, *args):
return False
def get(self, url, headers=None):
return response
monkeypatch.setattr("services.memory.skill_importer.httpx.Client", _Client)
monkeypatch.setattr(
"services.memory.skill_importer.check_outbound_url",
lambda url: (True, ""),
)
def test_list_github_dir_surfaces_rate_limit(monkeypatch):
class _Resp:
url = "https://api.github.com/repos/o/r/contents?ref=main"
status_code = 403
def json(self):
return {"message": "API rate limit exceeded for 203.0.113.1"}
_mock_httpx_client(monkeypatch, _Resp())
src = ResolvedSource(owner="o", repo="r", ref="main", path="")
with pytest.raises(SkillImportError, match="rate limit"):
_list_github_dir(src, "", {})
def test_fetch_bytes_surfaces_github_error_detail(monkeypatch):
class _Resp:
url = "https://raw.githubusercontent.com/o/r/main/SKILL.md"
status_code = 403
content = b""
def json(self):
return {"message": "Forbidden"}
_mock_httpx_client(monkeypatch, _Resp())
with pytest.raises(SkillImportError, match="GitHub request failed \\(403\\): Forbidden"):
_fetch_bytes("https://raw.githubusercontent.com/o/r/main/SKILL.md")