Ignore non-string personal doc text (#1832)

This commit is contained in:
red person
2026-06-29 11:24:29 -07:00
committed by GitHub
parent 387f95187e
commit bbbe145247
2 changed files with 19 additions and 2 deletions
+15 -1
View File
@@ -1,4 +1,4 @@
from src.personal_docs import retrieve_personal_keyword
from src.personal_docs import retrieve_personal_keyword, split_chunks
def test_retrieve_personal_keyword_skips_non_dict_rows():
@@ -19,3 +19,17 @@ def test_retrieve_personal_keyword_tolerates_missing_chunks_key():
index = [{"name": "empty.txt"}, {"name": "doc.txt", "chunks": ["alpha beta gamma"]}]
out = retrieve_personal_keyword(index, "beta", k=5)
assert out == ["[doc.txt :: chunk 1]\nalpha beta gamma"]
def test_retrieve_personal_keyword_ignores_non_string_text():
index = [{"name": "doc.txt", "chunks": [None, ["beta"], "alpha beta gamma"]}]
assert retrieve_personal_keyword(index, ["beta"], k=5) == []
assert retrieve_personal_keyword(index, "beta", k=5) == [
"[doc.txt :: chunk 3]\nalpha beta gamma"
]
def test_split_chunks_ignores_non_string_text():
assert split_chunks(None, size=1000, overlap=200) == []
assert split_chunks(["hello"], size=1000, overlap=200) == []