diff --git a/src/agent_tools/filesystem_tools.py b/src/agent_tools/filesystem_tools.py index 7ba22161c..a55944ae5 100644 --- a/src/agent_tools/filesystem_tools.py +++ b/src/agent_tools/filesystem_tools.py @@ -1,6 +1,7 @@ import asyncio import json import os +import re import difflib import fnmatch import shutil @@ -16,6 +17,31 @@ _CODENAV_SKIP_DIRS = frozenset({ _CODENAV_MAX_HITS = 200 _CODENAV_MAX_LINE = 400 + +def _glob_to_regex(pat: str) -> "re.Pattern": + """Translate a forward-slash glob (**, *, ?) into a compiled regex. + `**/` matches zero or more complete directories. + `*` matches within a single path segment (does not cross /). + """ + i, n, out = 0, len(pat), [] + while i < n: + if pat[i : i + 3] == "**/": + out.append("(?:[^/]+/)*") + i += 3 + elif pat[i : i + 2] == "**": + out.append(".*") + i += 2 + elif pat[i] == "*": + out.append("[^/]*") + i += 1 + elif pat[i] == "?": + out.append("[^/]") + i += 1 + else: + out.append(re.escape(pat[i])) + i += 1 + return re.compile("".join(out)) + def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]: if old == new: return None @@ -259,23 +285,38 @@ class GlobTool: return {"error": f"glob: {e}", "exit_code": 1} def _glob(): - from pathlib import Path - base = Path(root) - if not base.is_dir(): + base = os.path.abspath(root) + if not os.path.isdir(base): return None, f"glob: {root}: not a directory" + norm_pat = pattern.replace("\\", "/") + # Fast path: literal pattern (no wildcards) → direct path lookup. + if not any(c in norm_pat for c in "*?["): + cand = os.path.normpath(os.path.join(base, norm_pat)) + if os.path.exists(cand): + return [cand], None + # Literal not at exact path — fall through to walk so + # e.g. "foo.py" still matches at any depth (like rglob). + # Compile glob to regex: * stays within one segment, **/ spans dirs. + regex = _glob_to_regex(norm_pat) matched = [] + cap = _CODENAV_MAX_HITS * 5 try: - for p in base.rglob(pattern): - if set(p.relative_to(base).parts) & _CODENAV_SKIP_DIRS: - continue - try: - mtime = p.stat().st_mtime - except OSError: - mtime = 0 - matched.append((mtime, str(p))) - if len(matched) > _CODENAV_MAX_HITS * 5: + for dp, dns, fns in os.walk(base): + # Prune skipped dirs before descending (unlike rglob which + # descends first then filters — fatal on large node_modules). + dns[:] = [d for d in dns if d not in _CODENAV_SKIP_DIRS] + for name in fns + dns: + full = os.path.join(dp, name) + rel = os.path.relpath(full, base).replace(os.sep, "/") + if regex.fullmatch(rel) or regex.fullmatch(name): + try: + mtime = os.stat(full).st_mtime + except OSError: + mtime = 0 + matched.append((mtime, full)) + if len(matched) > cap: break - except (OSError, ValueError) as _e: + except OSError as _e: return None, f"glob: {_e}" matched.sort(key=lambda t: t[0], reverse=True) return [pth for _, pth in matched[:_CODENAV_MAX_HITS]], None diff --git a/tests/test_code_nav_tools.py b/tests/test_code_nav_tools.py index 40e9b2ba0..d02840e18 100644 --- a/tests/test_code_nav_tools.py +++ b/tests/test_code_nav_tools.py @@ -24,6 +24,9 @@ def repo(): os.mkdir(os.path.join(root, "sub")) with open(os.path.join(root, "sub", "b.txt"), "w") as f: f.write("nothing\nNEEDLE upper\n") + os.mkdir(os.path.join(root, "sub", "deep")) + with open(os.path.join(root, "sub", "deep", "c.py"), "w") as f: + f.write("# deep python\n") os.mkdir(os.path.join(root, "node_modules")) with open(os.path.join(root, "node_modules", "dep.py"), "w") as f: f.write("needle in dep\n") @@ -107,6 +110,37 @@ def test_glob_requires_pattern(repo): assert r["exit_code"] == 1 +def test_glob_literal_in_subdir(repo): + """Bare literal should match at any depth (like rglob), not only at root.""" + r = _run("glob", f'{{"pattern": "b.txt", "path": "{repo}"}}') + assert r["exit_code"] == 0 + assert "b.txt" in r["output"] + + +def test_glob_multi_segment_single_star(repo): + """sub/*.txt matches sub/b.txt but NOT sub/deep/c.py (single * stays in one segment).""" + r = _run("glob", f'{{"pattern": "sub/*.txt", "path": "{repo}"}}') + assert r["exit_code"] == 0 + assert "b.txt" in r["output"] + assert "c.py" not in r["output"] + + +def test_glob_star_does_not_cross_slash(repo): + """src/*.py must NOT match src/a/b/x.py — * is single-segment only.""" + r = _run("glob", f'{{"pattern": "sub/*.py", "path": "{repo}"}}') + assert r["exit_code"] == 0 + # sub/ has no .py directly, only sub/deep/c.py — should NOT match + assert "No files matching" in r["output"] + + +def test_glob_double_star_matches_deep(repo): + """**/*.py should match files at any depth.""" + r = _run("glob", f'{{"pattern": "**/*.py", "path": "{repo}"}}') + assert r["exit_code"] == 0 + assert "a.py" in r["output"] + assert "c.py" in r["output"] + + # ── ls ──────────────────────────────────────────────────────────────────── def test_ls_lists_entries(repo):