mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-20 11:45:24 -04:00
fix(tools): prune skipped dirs before descending in glob tool (#4538)
* fix(tools): prune skipped dirs before descending in glob tool GlobTool used pathlib.Path.rglob which descends into every directory (including node_modules, .git, dist, etc.) and filters AFTER the walk. On repos with large junk directories this causes the glob tool to hang for minutes. Replace rglob with os.walk that prunes _CODENAV_SKIP_DIRS before descending — matching the approach GrepTool already uses. Also add a fast path for literal patterns (no wildcards → direct path lookup). Fixes #4493 * fix(tools): use regex glob matching to fix * semantics and literal fallback Replace fnmatch with _glob_to_regex so that * stays within a single path segment (matching pathlib/rglob semantics) and **/ spans zero or more directories. Literal patterns now fall through to os.walk when the direct path lookup misses, so e.g. 'foo.py' still finds files at any depth. Add tests for: - bare literal matching in subdirectories - multi-segment single-star patterns (sub/*.txt) - * not crossing / boundaries - ** matching at arbitrary depth Closes #4493 --------- Co-authored-by: michaelxer <michaelxer@users.noreply.github.com>
This commit is contained in:
@@ -24,6 +24,9 @@ def repo():
|
||||
os.mkdir(os.path.join(root, "sub"))
|
||||
with open(os.path.join(root, "sub", "b.txt"), "w") as f:
|
||||
f.write("nothing\nNEEDLE upper\n")
|
||||
os.mkdir(os.path.join(root, "sub", "deep"))
|
||||
with open(os.path.join(root, "sub", "deep", "c.py"), "w") as f:
|
||||
f.write("# deep python\n")
|
||||
os.mkdir(os.path.join(root, "node_modules"))
|
||||
with open(os.path.join(root, "node_modules", "dep.py"), "w") as f:
|
||||
f.write("needle in dep\n")
|
||||
@@ -107,6 +110,37 @@ def test_glob_requires_pattern(repo):
|
||||
assert r["exit_code"] == 1
|
||||
|
||||
|
||||
def test_glob_literal_in_subdir(repo):
|
||||
"""Bare literal should match at any depth (like rglob), not only at root."""
|
||||
r = _run("glob", f'{{"pattern": "b.txt", "path": "{repo}"}}')
|
||||
assert r["exit_code"] == 0
|
||||
assert "b.txt" in r["output"]
|
||||
|
||||
|
||||
def test_glob_multi_segment_single_star(repo):
|
||||
"""sub/*.txt matches sub/b.txt but NOT sub/deep/c.py (single * stays in one segment)."""
|
||||
r = _run("glob", f'{{"pattern": "sub/*.txt", "path": "{repo}"}}')
|
||||
assert r["exit_code"] == 0
|
||||
assert "b.txt" in r["output"]
|
||||
assert "c.py" not in r["output"]
|
||||
|
||||
|
||||
def test_glob_star_does_not_cross_slash(repo):
|
||||
"""src/*.py must NOT match src/a/b/x.py — * is single-segment only."""
|
||||
r = _run("glob", f'{{"pattern": "sub/*.py", "path": "{repo}"}}')
|
||||
assert r["exit_code"] == 0
|
||||
# sub/ has no .py directly, only sub/deep/c.py — should NOT match
|
||||
assert "No files matching" in r["output"]
|
||||
|
||||
|
||||
def test_glob_double_star_matches_deep(repo):
|
||||
"""**/*.py should match files at any depth."""
|
||||
r = _run("glob", f'{{"pattern": "**/*.py", "path": "{repo}"}}')
|
||||
assert r["exit_code"] == 0
|
||||
assert "a.py" in r["output"]
|
||||
assert "c.py" in r["output"]
|
||||
|
||||
|
||||
# ── ls ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_ls_lists_entries(repo):
|
||||
|
||||
Reference in New Issue
Block a user