mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
Topics: hydrate session history before analysis
analyze_topics() iterates session_manager.sessions and reads
session_data.get("history", []) directly. But SessionManager.load_sessions
seeds sessions metadata-only with empty history — messages are loaded
lazily, only when get_session(session_id) is called. So analyze_topics saw
empty history for every session that hadn't been individually opened this
process lifetime and reported total_topics: 0, even when the database held
plenty of matching messages.
Hydrate each candidate session via session_manager.get_session(session_id)
(the existing lazy-load path) before reading its history, after the
owner/archived filters so skipped sessions aren't loaded. Falls back to the
raw cached history when the manager has no get_session (test stubs).
tests/test_topic_analyzer.py: new test_topic_analyzer_hydrates_sessions
seeds a real SQLite DB with a session + message, runs the real
SessionManager (asserting cached history starts empty), then asserts
analyze_topics finds the topic. Fails before this change. The existing
keyword tests now pass an explicit owner to satisfy the owner-required
early return.
This commit is contained in:
@@ -49,7 +49,15 @@ def analyze_topics(session_manager, owner: str = None) -> Dict[str, Any]:
|
|||||||
if sess_owner != owner:
|
if sess_owner != owner:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for msg in session_data.get("history", []):
|
# Hydrate session to load history from DB if needed
|
||||||
|
if hasattr(session_manager, "get_session"):
|
||||||
|
hydrated_session = session_manager.get_session(session_id)
|
||||||
|
history = hydrated_session.history
|
||||||
|
else:
|
||||||
|
hydrated_session = session_data
|
||||||
|
history = session_data.get("history", [])
|
||||||
|
|
||||||
|
for msg in history:
|
||||||
content_raw = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
|
content_raw = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
|
||||||
if not content_raw:
|
if not content_raw:
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
"""Tests for topic keyword matching (src/topic_analyzer.py)."""
|
"""Tests for topic keyword matching (src/topic_analyzer.py)."""
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
|
import pytest
|
||||||
|
from sqlalchemy import create_engine
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
from core.database import Base, Session as DbSession, ChatMessage as DbChatMessage
|
||||||
|
from core.session_manager import SessionManager
|
||||||
from src.topic_analyzer import analyze_topics
|
from src.topic_analyzer import analyze_topics
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
def _sm(*messages):
|
def _sm(*messages):
|
||||||
history = [{"role": "user", "content": c} for c in messages]
|
history = [{"role": "user", "content": c} for c in messages]
|
||||||
return SimpleNamespace(sessions={"s1": {"owner": None, "name": "S", "history": history}})
|
return SimpleNamespace(sessions={"s1": {"owner": "alice", "name": "S", "history": history}})
|
||||||
|
|
||||||
|
|
||||||
def _freq(result):
|
def _freq(result):
|
||||||
@@ -16,15 +21,76 @@ def _freq(result):
|
|||||||
def test_substring_does_not_false_match_technology():
|
def test_substring_does_not_false_match_technology():
|
||||||
# Regression: "ai" matched inside "email"/"again"/"rain"/"wait", flagging
|
# Regression: "ai" matched inside "email"/"again"/"rain"/"wait", flagging
|
||||||
# Technology for messages with no technical content at all.
|
# Technology for messages with no technical content at all.
|
||||||
result = analyze_topics(_sm("Can you send me an email again about the rain? I will wait."))
|
result = analyze_topics(_sm("Can you send me an email again about the rain? I will wait."), owner="alice")
|
||||||
assert "Technology" not in _freq(result)
|
assert "Technology" not in _freq(result)
|
||||||
|
|
||||||
|
|
||||||
def test_real_keywords_still_match():
|
def test_real_keywords_still_match():
|
||||||
result = analyze_topics(_sm("I wrote some Python code to test the algorithm."))
|
result = analyze_topics(_sm("I wrote some Python code to test the algorithm."), owner="alice")
|
||||||
assert _freq(result).get("Technology", 0) >= 1
|
assert _freq(result).get("Technology", 0) >= 1
|
||||||
|
|
||||||
|
|
||||||
def test_multiword_keyword_matches():
|
def test_multiword_keyword_matches():
|
||||||
result = analyze_topics(_sm("Can you explain how to set this up?"))
|
result = analyze_topics(_sm("Can you explain how to set this up?"), owner="alice")
|
||||||
assert "Learning" in _freq(result)
|
assert "Learning" in _freq(result)
|
||||||
|
|
||||||
|
|
||||||
|
def test_topic_analyzer_hydrates_sessions(monkeypatch):
|
||||||
|
# 1. Create clean in-memory database
|
||||||
|
engine = create_engine("sqlite:///:memory:")
|
||||||
|
Base.metadata.create_all(bind=engine)
|
||||||
|
|
||||||
|
# 2. Create test session factory
|
||||||
|
TestSessionLocal = sessionmaker(bind=engine)
|
||||||
|
|
||||||
|
# 3. Populate test database with a session and a message about Python
|
||||||
|
db = TestSessionLocal()
|
||||||
|
session_id = "session-1"
|
||||||
|
|
||||||
|
s = DbSession(
|
||||||
|
id=session_id,
|
||||||
|
name="Python chat",
|
||||||
|
endpoint_url="http://localhost:8000",
|
||||||
|
model="gpt-4",
|
||||||
|
owner="alice",
|
||||||
|
message_count=1,
|
||||||
|
created_at=datetime.utcnow(),
|
||||||
|
updated_at=datetime.utcnow()
|
||||||
|
)
|
||||||
|
m = DbChatMessage(
|
||||||
|
id="msg-1",
|
||||||
|
session_id=session_id,
|
||||||
|
role="user",
|
||||||
|
content="I love writing python code.",
|
||||||
|
timestamp=datetime.utcnow()
|
||||||
|
)
|
||||||
|
|
||||||
|
db.add(s)
|
||||||
|
db.add(m)
|
||||||
|
db.commit()
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
# 4. Patch SessionLocal to use our in-memory DB
|
||||||
|
import core.session_manager
|
||||||
|
import core.database
|
||||||
|
monkeypatch.setattr(core.session_manager, "SessionLocal", TestSessionLocal)
|
||||||
|
monkeypatch.setattr(core.database, "SessionLocal", TestSessionLocal)
|
||||||
|
|
||||||
|
# 5. Initialize the real SessionManager and load metadata (seeds sessions with empty history)
|
||||||
|
sm = SessionManager()
|
||||||
|
|
||||||
|
# Verify that the session is in sm.sessions, and its history is currently empty
|
||||||
|
assert session_id in sm.sessions
|
||||||
|
assert len(sm.sessions[session_id].history) == 0
|
||||||
|
|
||||||
|
# 6. Execute the topic analysis
|
||||||
|
res = analyze_topics(sm, owner="alice")
|
||||||
|
|
||||||
|
# 7. Assertions
|
||||||
|
# There should be 1 topic found (Technology, since "python" / "code" are keywords)
|
||||||
|
assert res["total_topics"] > 0
|
||||||
|
|
||||||
|
# Check that the topic is Technology
|
||||||
|
tech_topic = next((t for t in res["topics"] if t["topic"] == "Technology"), None)
|
||||||
|
assert tech_topic is not None
|
||||||
|
assert tech_topic["frequency"] >= 1
|
||||||
|
|||||||
Reference in New Issue
Block a user