fix: topic analysis false-matches keywords as substrings (e.g. 'ai' in 'email') (#687)

* fix: match topic keywords on word boundaries, not substrings

* fix: apply word-boundary matching to topic example snippets too

* test: topic keywords match whole words, not substrings
This commit is contained in:
Afonso Coutinho
2026-06-02 03:42:04 +01:00
committed by GitHub
parent 9d8eebfa63
commit 48d3b7abab
2 changed files with 32 additions and 2 deletions
+2 -2
View File
@@ -60,11 +60,11 @@ def analyze_topics(session_manager, owner: str = None) -> Dict[str, Any]:
for topic, keywords in TOPIC_KEYWORDS.items():
for kw in keywords:
if kw in content:
if re.search(rf"\b{re.escape(kw)}\b", content):
topic_counts[topic] += 1
sentences = re.split(r'[.!?]', str(content_raw))
for sentence in sentences:
if kw in sentence.lower():
if re.search(rf"\b{re.escape(kw)}\b", sentence.lower()):
topic_matches[topic].append({
"session_id": session_id,
"session_name": session_name,