# routes/session_routes.py import re import json import uuid from datetime import datetime from fastapi import APIRouter, Form, HTTPException, Response, Request import logging from core.session_manager import SessionManager from core.models import ChatMessage from src.request_models import SessionResponse from core.database import Session as DbSession, SessionLocal, Document, GalleryImage from src.auth_helpers import get_current_user def _verify_session_owner(request: Request, session_id: str): """Verify the current user owns the session. Raises 404 if not.""" user = get_current_user(request) if not user: raise HTTPException(403, "Authentication required") db = SessionLocal() try: row = db.query(DbSession.owner).filter(DbSession.id == session_id).first() if not row: raise HTTPException(404, f"Session {session_id} not found") if row.owner != user: raise HTTPException(404, f"Session {session_id} not found") finally: db.close() logger = logging.getLogger(__name__) router = APIRouter(prefix="/api", tags=["sessions"]) def _pick_endpoint_for_sort(): """Pick model endpoint for auto-sort LLM call — uses utility endpoint setting, falls back to default.""" from src.endpoint_resolver import resolve_endpoint # Try utility endpoint first (what the user configured for background tasks) url, model, headers = resolve_endpoint("utility") if url and model: return url, model, headers # Fall back to task endpoint try: from src.task_endpoint import resolve_task_endpoint url, model, headers = resolve_task_endpoint() if url and model: return url, model, headers except Exception: pass # Fall back to default url, model, headers = resolve_endpoint("default") if url and model: return url, model, headers return None, None, None def setup_session_routes(session_manager: SessionManager, config: dict, webhook_manager=None): """Setup session routes with the provided manager and config""" REQUEST_TIMEOUT = config.get("REQUEST_TIMEOUT", 20) OPENAI_API_KEY = config.get("OPENAI_API_KEY") SESSIONS_FILE = config.get("SESSIONS_FILE") @router.get("/sessions") def list_sessions(request: Request): user = get_current_user(request) # Lazy purge: incognito sessions are ephemeral by design — wipe leftovers # from the DB and session_manager so they vanish on the next page refresh. # BUT: skip sessions that were created within the last 10 minutes. # Without that guard, the purge nukes the active "Nobody" session on the # very first /api/sessions call after creation, killing the in-flight # chat. The frontend's own _cleanupIncognitoSessions handler knows which # session is current and won't delete the live one — this server-side # purge exists only to catch ghosts the frontend missed (tab close, # crash). Only clean up rows old enough to be definitely orphaned. try: from datetime import datetime as _dt, timedelta as _td _cutoff = _dt.utcnow() - _td(minutes=10) _purge_db = SessionLocal() try: from core.database import ChatMessage as _DbMsg _ghosts = _purge_db.query(DbSession).filter( DbSession.name.in_(("Nobody", "Incognito")), DbSession.created_at < _cutoff, ).all() for _g in _ghosts: _purge_db.query(_DbMsg).filter(_DbMsg.session_id == _g.id).delete() _purge_db.delete(_g) if hasattr(session_manager, "delete_session"): try: session_manager.delete_session(_g.id) except Exception: pass if _ghosts: _purge_db.commit() finally: _purge_db.close() except Exception: pass user_sessions = session_manager.get_sessions_for_user(user) # Fetch folder info from DB for each session db = SessionLocal() try: folder_map = {} token_map = {} important_map = {} created_map = {} updated_map = {} last_msg_map = {} mode_map = {} msg_count_map = {} rows = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False).all() for row in rows: folder_map[row.id] = row.folder token_map[row.id] = (row.total_input_tokens or 0) + (row.total_output_tokens or 0) important_map[row.id] = row.is_important or False created_map[row.id] = row.created_at.isoformat() if row.created_at else None updated_map[row.id] = row.updated_at.isoformat() if row.updated_at else None # Fall back to updated_at then created_at so sessions that # predate the column (or have no messages) still sort sanely. last_msg_map[row.id] = ( row.last_message_at.isoformat() if row.last_message_at else (row.updated_at.isoformat() if row.updated_at else (row.created_at.isoformat() if row.created_at else None)) ) mode_map[row.id] = row.mode msg_count_map[row.id] = row.message_count or 0 # Sessions with active documents that have content from sqlalchemy import func doc_session_ids = set( r[0] for r in db.query(Document.session_id) .filter(Document.is_active == True, Document.current_content != None, func.trim(Document.current_content) != "") .distinct().all() ) img_session_ids = set( r[0] for r in db.query(GalleryImage.session_id) .filter(GalleryImage.session_id != None) .distinct().all() ) finally: db.close() sessions = [{"id": s.id, "name": s.name, "model": s.model, "endpoint_url": s.endpoint_url, "rag": s.rag, "archived": s.archived, "folder": folder_map.get(s.id), "total_tokens": token_map.get(s.id, 0), "is_important": important_map.get(s.id, False), "created_at": created_map.get(s.id), "updated_at": updated_map.get(s.id), "last_message_at": last_msg_map.get(s.id), "has_documents": s.id in doc_session_ids, "has_images": s.id in img_session_ids, "mode": mode_map.get(s.id), "message_count": msg_count_map.get(s.id, 0)} for s in user_sessions.values() if not s.archived and (s.name or "").strip() not in ("Nobody", "Incognito")] return sessions @router.post("/session", response_model=SessionResponse) def create_session( request: Request, name: str = Form(""), endpoint_url: str = Form(""), model: str = Form(""), rag: str = Form(None), skip_validation: str = Form(None), api_key: str = Form(""), endpoint_id: str = Form(""), ): skip_val = str(skip_validation).lower() == "true" if not endpoint_url and not skip_val: raise HTTPException(400, "endpoint_url is required (choose from /api/models)") model_to_use = model if skip_val: # skip_validation = trust the caller and do NOT probe /v1/models. # Used for custom endpoints AND for bare placeholder sessions with no # model at all (e.g. an email reply draft just needs a session to live # in). Probing here was 400-ing those with "Cannot reach /v1/models". pass elif not model_to_use: from src.llm_core import list_model_ids ids = list_model_ids(endpoint_url, timeout=REQUEST_TIMEOUT, headers={"Authorization": f"Bearer {api_key}"} if api_key.strip() else None) if not ids: raise HTTPException(400, "Cannot reach /v1/models") # Default to the first CHAT model — endpoints often list embedding/ # tts/whisper models first (e.g. text-embedding-ada-002), which # can't hold a conversation. _NON_CHAT = ("text-embedding", "embedding", "tts-", "whisper", "text-moderation", "moderation-", "dall-e", "rerank") chat_ids = [m for m in ids if not any(p in m.lower() for p in _NON_CHAT)] model_to_use = (chat_ids or ids)[0] else: from src.llm_core import list_model_ids import os as _os req_base = _os.path.basename(model_to_use.rstrip("/")) avail = list_model_ids(endpoint_url, timeout=REQUEST_TIMEOUT, headers={"Authorization": f"Bearer {api_key}"} if api_key.strip() else None) if not avail: raise HTTPException(400, "Cannot reach /v1/models") if model_to_use not in avail: found = None for a in avail: if _os.path.basename(a.rstrip("/")) == req_base: found = a break if not found: raise HTTPException(400, f"Model not found at server. Available: {', '.join(avail)}") model_to_use = found sid = str(uuid.uuid4()) user = get_current_user(request) session = session_manager.create_session( session_id=sid, name=name or "", endpoint_url=endpoint_url or "", model=model_to_use, rag=str(rag).lower() == "true" if rag else False, owner=user, ) # Set auth headers for custom API-key endpoints resolved_key = api_key.strip() if api_key else "" resolved_base = endpoint_url if not resolved_key and endpoint_id and endpoint_id.strip(): from core.database import ModelEndpoint _db = SessionLocal() try: ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id.strip()).first() if ep and ep.api_key: resolved_key = ep.api_key resolved_base = ep.base_url finally: _db.close() if resolved_key: from src.endpoint_resolver import build_headers session.headers = build_headers(resolved_key, resolved_base) session_manager.save_sessions() # Fire webhook (sync-safe) if webhook_manager: webhook_manager.fire_and_forget("session.created", { "session_id": sid, "name": session.name, "model": model_to_use, }) # Fire event for automation tasks from src.event_bus import fire_event fire_event("session_created", user) return SessionResponse( id=sid, name=session.name, model=model_to_use, rag=str(rag).lower() == "true" if rag else False, archived=False ) @router.patch("/session/{sid}") def rename_session( request: Request, sid: str, name: str = Form(None), folder: str = Form(None), model: str = Form(None), endpoint_url: str = Form(None), endpoint_id: str = Form(None), ): _verify_session_owner(request, sid) try: session = session_manager.get_session(sid) except KeyError: raise HTTPException(404, f"Session {sid} not found") result = {"id": sid} if name is not None: session_manager.update_session_name(sid, name) result["name"] = name # Update folder assignment if folder is not None: db = SessionLocal() try: db_session = db.query(DbSession).filter(DbSession.id == sid).first() if db_session: db_session.folder = folder if folder else None db_session.updated_at = datetime.utcnow() db.commit() result["folder"] = folder if folder else None finally: db.close() # Switch model/endpoint mid-session if model is not None and endpoint_url is not None: if endpoint_id: from core.database import ModelEndpoint _db = SessionLocal() try: ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id).first() if not ep: raise HTTPException(400, "Model endpoint no longer exists") finally: _db.close() session.model = model session.endpoint_url = endpoint_url # Update auth headers from the endpoint's stored API key if endpoint_id: _db = SessionLocal() try: ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id).first() if ep and ep.api_key: from src.endpoint_resolver import build_headers session.headers = build_headers(ep.api_key, ep.base_url) finally: _db.close() # Persist to DB db = SessionLocal() try: db_session = db.query(DbSession).filter(DbSession.id == sid).first() if db_session: db_session.model = model db_session.endpoint_url = endpoint_url db_session.updated_at = datetime.utcnow() db.commit() finally: db.close() result["model"] = model result["endpoint_url"] = endpoint_url return result @router.post("/session/{sid}/inject_messages") async def inject_messages(request: Request, sid: str): """Bulk-inject messages into a session's history (for group chat sync).""" _verify_session_owner(request, sid) try: sess = session_manager.get_session(sid) except KeyError: raise HTTPException(404, f"Session {sid} not found") body = await request.json() messages = body.get("messages", []) from core.models import ChatMessage for m in messages: sess.add_message(ChatMessage(m["role"], m["content"], metadata=m.get("metadata"))) session_manager.save_sessions() return {"ok": True, "count": len(messages)} @router.post("/session/{sid}/delete") def delete_session_beacon(request: Request, sid: str): """Delete session via POST (for navigator.sendBeacon on page close).""" return delete_session(request, sid) @router.post("/sessions/bulk-delete") async def bulk_delete_sessions(request: Request): """Delete multiple sessions (for compare cleanup via sendBeacon).""" from core.database import ChatMessage as _CM try: body = await request.json() ids = body.get("ids", []) except Exception: ids = [] for sid in ids: try: _verify_session_owner(request, sid) session_manager.delete_session(sid) db = SessionLocal() try: db.query(_CM).filter(_CM.session_id == sid).delete() db.query(DbSession).filter(DbSession.id == sid).delete() db.commit() except Exception: db.rollback() finally: db.close() except Exception: pass return {"deleted": len(ids)} @router.delete("/session/{sid}") def delete_session(request: Request, sid: str): """Permanently delete a session and all its messages.""" _verify_session_owner(request, sid) try: # Block deletion of starred/favorited sessions db = SessionLocal() try: db_sess = db.query(DbSession).filter(DbSession.id == sid).first() if db_sess and db_sess.is_important: raise HTTPException( status_code=403, detail={"error": "SESSION_STARRED", "message": "Unstar the session before deleting it"} ) finally: db.close() # Delete the session and all its messages if session_manager.delete_session(sid): return {"status": "deleted"} else: raise HTTPException(404, "Session not found") except HTTPException: raise except Exception as e: logger.error(f"Error deleting session {sid}: {e}") raise HTTPException( status_code=500, detail={ "error": "SESSION_DELETE_ERROR", "message": "Failed to delete session" } ) @router.delete("/sessions/all") def delete_all_sessions(request: Request): """Admin only: permanently delete ALL sessions and their messages.""" from core.middleware import require_admin require_admin(request) db = SessionLocal() try: from core.database import ChatMessage as DbChatMessage count = db.query(DbSession).count() db.query(DbChatMessage).delete() db.query(DbSession).delete() db.commit() session_manager.sessions.clear() logger.info(f"Admin deleted all {count} sessions") return {"status": "deleted", "count": count} except Exception as e: db.rollback() logger.error(f"Error deleting all sessions: {e}") raise HTTPException(500, "Failed to delete sessions") finally: db.close() @router.post("/session/{sid}/archive") def archive_session(request: Request, sid: str): """Archive a session, keeping its data but removing it from active sessions.""" _verify_session_owner(request, sid) try: # First check if session exists session_manager.get_session(sid) # Archive the session db = SessionLocal() try: db_session = db.query(DbSession).filter(DbSession.id == sid).first() if db_session: db_session.archived = True db_session.updated_at = datetime.utcnow() db.commit() # Update in memory if it exists if sid in session_manager.sessions: session_manager.sessions[sid].archived = True logger.info(f"Archived session {sid}") return {"status": "archived"} else: raise HTTPException(404, f"Session {sid} not found") except HTTPException: raise except Exception as e: db.rollback() logger.error(f"Error archiving session {sid}: {e}") raise HTTPException(500, "Failed to archive session") finally: db.close() except KeyError: raise HTTPException(404, f"Session '{sid}' not found") @router.post("/session/{sid}/unarchive") def unarchive_session(request: Request, sid: str): """Restore an archived session back to the active session list.""" _verify_session_owner(request, sid) db = SessionLocal() try: db_session = db.query(DbSession).filter(DbSession.id == sid).first() if not db_session: raise HTTPException(404, f"Session {sid} not found") db_session.archived = False db_session.updated_at = datetime.utcnow() db.commit() # Reload into session manager so it appears in the active list try: if sid in session_manager.sessions: session_manager.sessions[sid].archived = False else: session_manager._load_session_from_db(sid) except Exception: pass # Non-fatal — session will load on next access return {"status": "unarchived"} except HTTPException: raise except Exception as e: db.rollback() logger.error(f"Error unarchiving session {sid}: {e}") raise HTTPException(500, "Failed to unarchive session") finally: db.close() @router.get("/sessions/archived") def list_archived_sessions(request: Request, search: str = "", offset: int = 0, limit: int = 20, sort: str = "recent", model: str = ""): """List archived sessions for the archive browser.""" user = get_current_user(request) db = SessionLocal() try: q = db.query(DbSession).filter(DbSession.archived == True) if not user: raise HTTPException(403, "Authentication required") q = q.filter(DbSession.owner == user) if search: safe_search = search.replace('%', r'\%').replace('_', r'\_') q = q.filter(DbSession.name.ilike(f"%{safe_search}%", escape='\\')) if model: q = q.filter(DbSession.model.ilike(f"%{model}")) total = q.count() sort_map = { "recent": DbSession.updated_at.desc(), "oldest": DbSession.updated_at.asc(), "most-messages": DbSession.message_count.desc().nulls_last(), "alpha": DbSession.name.asc(), } order = sort_map.get(sort, DbSession.updated_at.desc()) rows = q.order_by(order).offset(offset).limit(limit).all() sessions = [] for s in rows: sessions.append({ "id": s.id, "name": s.name, "model": s.model, "message_count": s.message_count or 0, "created_at": s.created_at.isoformat() if s.created_at else None, "updated_at": s.updated_at.isoformat() if s.updated_at else None, "is_important": s.is_important, }) return {"sessions": sessions, "total": total} finally: db.close() @router.get("/history/{sid}") def get_history(request: Request, sid: str): _verify_session_owner(request, sid) try: session = session_manager.get_session(sid) except KeyError: raise HTTPException(404, f"Session {sid} not found") return {"history": [msg.to_dict() for msg in session.history]} @router.get("/session/{sid}/export") def export_session(request: Request, sid: str, fmt: str = "md", filename: str = ""): """Export conversation history as a downloadable file. Supported formats: md (markdown), txt (plain text), json, html """ _verify_session_owner(request, sid) try: session = session_manager.get_session(sid) except KeyError: raise HTTPException(404, f"Session {sid} not found") safe_name = re.sub(r'[^\w\-_]', '_', session.name) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') if fmt == "json": import json as _json data = { "name": session.name, "model": session.model, "exported": datetime.now().isoformat(), "messages": [{"role": m.role, "content": m.content} for m in session.history], } out_name = filename or f"conversation_{safe_name}_{timestamp}.json" return Response( content=_json.dumps(data, indent=2, ensure_ascii=False), media_type="application/json", headers={"Content-Disposition": f"attachment; filename={out_name}"}, ) if fmt == "txt": lines = [] for m in session.history: lines.append(f"[{m.role.upper()}]") lines.append(m.content) lines.append("") out_name = filename or f"conversation_{safe_name}_{timestamp}.txt" return Response( content="\n".join(lines), media_type="text/plain", headers={"Content-Disposition": f"attachment; filename={out_name}"}, ) if fmt == "html": html_parts = [ "", f"{session.name}", "", f"

{session.name}

", ] for m in session.history: cls = "user" if m.role == "user" else "ai" content = m.content.replace("&", "&").replace("<", "<").replace(">", ">") content = content.replace("\n", "
") html_parts.append(f'
{m.role}
{content}
') html_parts.append("") out_name = filename or f"conversation_{safe_name}_{timestamp}.html" return Response( content="\n".join(html_parts), media_type="text/html", headers={"Content-Disposition": f"attachment; filename={out_name}"}, ) # Default: markdown markdown_lines = [] markdown_lines.append(f"# Conversation: {session.name}") markdown_lines.append(f"*Exported on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*") markdown_lines.append(f"*Model: {session.model}*") markdown_lines.append("\n---\n") for message in session.history: role = message.role.upper() content = message.content markdown_lines.append(f"### {role}") markdown_lines.append(f"{content}\n") markdown_lines.append("---\n") if len(markdown_lines) > 3: markdown_lines.pop() out_name = filename or f"conversation_{safe_name}_{timestamp}.md" return Response( content="\n".join(markdown_lines), media_type="text/markdown", headers={"Content-Disposition": f"attachment; filename={out_name}"}, ) @router.post("/sessions/save") def sessions_save_now(request: Request): user = get_current_user(request) if not user: raise HTTPException(401, "Not authenticated") session_manager.save_sessions() return {"ok": True, "path": SESSIONS_FILE} @router.post("/session/openai") def create_session_openai( request: Request, name: str = Form("New Chat (OpenAI)"), model: str = Form("gpt-4o"), rag: str = Form(None) ): if not OPENAI_API_KEY: raise HTTPException(400, "Server missing OPENAI_API_KEY") sid = str(uuid.uuid4()) user = get_current_user(request) session = session_manager.create_session( session_id=sid, name="", endpoint_url="https://api.openai.com/v1/chat/completions", model=model, rag=str(rag).lower() == "true", owner=user, ) session.headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"} session_manager.save_sessions() from src.event_bus import fire_event fire_event("session_created", user) return {"id": sid, "name": "", "model": model} @router.post("/session/{session_id}/important") async def mark_session_important(request: Request, session_id: str, important: bool = Form(True)): """Mark a session as important to protect it from automatic cleanup.""" _verify_session_owner(request, session_id) try: # Validate session exists session_manager.get_session(session_id) # Update in database db = SessionLocal() try: db_session = db.query(DbSession).filter(DbSession.id == session_id).first() if db_session: db_session.is_important = important db_session.updated_at = datetime.utcnow() db.commit() # Update in memory if it exists if session_id in session_manager.sessions: session_manager.sessions[session_id].is_important = important return {"status": "success", "is_important": important} else: raise HTTPException(404, f"Session {session_id} not found") except HTTPException: raise except Exception as e: db.rollback() logger.error(f"Error updating session {session_id} importance: {e}") raise HTTPException(500, "Failed to update session importance") finally: db.close() except KeyError: raise HTTPException(404, f"Session {session_id} not found") @router.post("/session/{session_id}/compact") async def compact_session(request: Request, session_id: str): """Summarize older messages into one compacted history entry.""" _verify_session_owner(request, session_id) try: session = session_manager.get_session(session_id) except KeyError: raise HTTPException(404, f"Session {session_id} not found") history = list(session.history or []) if len(history) < 6: raise HTTPException(400, "Not enough messages to compact") # Keep a small recent tail verbatim. The prior half-chat/20-message # tail made manual compaction look like it did nothing on normal chats. recent_keep = min(8, max(4, len(history) // 4)) older = history[:-recent_keep] recent = history[-recent_keep:] if not older: raise HTTPException(400, "Nothing old enough to compact") from src.context_compactor import SELF_SUMMARY_SYSTEM_PROMPT from src.endpoint_resolver import resolve_endpoint from src.llm_core import llm_call_async url, model, headers = resolve_endpoint("utility") if not url or not model: url, model, headers = session.endpoint_url, session.model, session.headers if not url or not model: raise HTTPException(400, "No model configured for compaction") prior_compactions = sum( 1 for m in history if (m.metadata or {}).get("compacted") or "[Conversation summary" in (m.content or "") ) prompt = SELF_SUMMARY_SYSTEM_PROMPT.replace( "{count}", str(len(older)) ).replace( "{n}", str(prior_compactions + 1) ) convo_text = "\n".join( f"{m.role.upper()}: {(m.content or '')[:2000]}" for m in older ) try: summary = await llm_call_async( url, model, [{"role": "system", "content": prompt}, {"role": "user", "content": convo_text}], temperature=0.2, max_tokens=1024, headers=headers, timeout=60, ) except Exception as e: logger.error("Manual compaction failed: %s", e) raise HTTPException(500, "Compaction failed") summary_msg = ChatMessage( role="system", content=f"[Conversation summary]\n{summary}", metadata={ "compacted": True, "summarized_count": len(older), "timestamp": datetime.utcnow().isoformat(), }, ) new_history = [summary_msg] + recent if not session_manager.replace_messages(session_id, new_history): raise HTTPException(500, "Failed to save compacted history") return { "ok": True, "summarized": len(older), "kept": len(recent), "message_count": len(new_history), } @router.post("/sessions/auto-sort") def auto_sort_sessions(request: Request, skip_llm: bool = False): """Use AI to categorize all sessions into folders. Phase 1 deletes empty/throwaway sessions and Phase 2 asks the LLM to assign folders. When `skip_llm=true` the endpoint returns after Phase 1 — used by the "Tidy (no AI)" UI affordance so users can clean junk without spending tokens. """ from src.llm_core import llm_call user = get_current_user(request) user_sessions = session_manager.get_sessions_for_user(user) # Delete empty and throwaway sessions before sorting from core.database import ChatMessage as DbMsg db = SessionLocal() deleted_empty = 0 deleted_throwaway = 0 # Names that indicate a throwaway/test session (case-insensitive exact or prefix match) _THROWAWAY_NAMES = { "test", "testing", "asdf", "asd", "hello", "hi", "hey", "yo", "sup", "hola", "hii", "hiii", "heyo", "foo", "bar", "baz", "tmp", "temp", "scratch", "untitled", "new chat", "delete", "remove", "junk", "trash", "xxx", "abc", "qwerty", "blah", "stuff", "whatever", "idk", "ok", "lol", "bruh", "hmm", "hm", "meh", } _THROWAWAY_MAX_MESSAGES = 4 # only delete if <= this many messages try: rows = db.query(DbSession).filter(DbSession.archived == False).all() folder_map = {r.id: r.folder for r in rows} # Precompute per-session message counts in TWO aggregate queries # instead of 1–3 queries PER session — with many chats the per-row # loop was doing thousands of round-trips and blowing the timeout. from sqlalchemy import func as _sa_func _counts = dict(db.query(DbMsg.session_id, _sa_func.count(DbMsg.id)).group_by(DbMsg.session_id).all()) _asst_counts = dict( db.query(DbMsg.session_id, _sa_func.count(DbMsg.id)) .filter(DbMsg.role == "assistant").group_by(DbMsg.session_id).all() ) for row in rows: # Never delete important sessions if getattr(row, 'is_important', False): continue # Always delete incognito sessions during cleanup if (row.name or "").strip() == "Incognito": should_delete = True deleted_throwaway += 1 db.delete(row) if hasattr(session_manager, 'delete_session'): session_manager.delete_session(row.id) continue msg_count = _counts.get(row.id, 0) should_delete = False if msg_count == 0: should_delete = True deleted_empty += 1 elif msg_count <= _THROWAWAY_MAX_MESSAGES: name = (row.name or "").strip().lower() # Check first user message content (AI renames sessions, so # "hi" becomes "Casual Greeting Exchange" — name alone won't match) first_msg = db.query(DbMsg.content).filter( DbMsg.session_id == row.id, DbMsg.role == "user" ).order_by(DbMsg.timestamp).first() first_text = (first_msg[0] or "").strip().lower() if first_msg else "" # Count assistant messages — if user sent something but AI never replied, it's dead assistant_count = _asst_counts.get(row.id, 0) if name in _THROWAWAY_NAMES or name.startswith("chat:") or first_text in _THROWAWAY_NAMES: should_delete = True deleted_throwaway += 1 # Single user message with no AI response = dead session elif msg_count == 1 and assistant_count == 0: should_delete = True deleted_throwaway += 1 # Short phrase (1-3 words) with no real AI conversation (<=2 msgs) elif msg_count <= 2 and first_text and len(first_text.split()) <= 3 and len(first_text) <= 40: should_delete = True deleted_throwaway += 1 if should_delete: db.delete(row) if hasattr(session_manager, 'delete_session'): session_manager.delete_session(row.id) if deleted_empty or deleted_throwaway: db.commit() logger.info(f"Auto-sort: deleted {deleted_empty} empty + {deleted_throwaway} throwaway sessions") finally: db.close() # Re-fetch after cleanup if deleted_empty or deleted_throwaway: user_sessions = session_manager.get_sessions_for_user(user) # Short-circuit when the caller only wanted the cleanup phase # (the "Tidy (no AI)" path). Shape mirrors the post-Phase-1 # branch below so the frontend can render the same toast. if skip_llm: return { "status": "ok", "updated": 0, "folders": [], "deleted_empty": deleted_empty, "deleted_throwaway": deleted_throwaway, "unfiled_remaining": 0, "skipped_llm": True, } # Tidy works in batches: only sessions that don't already have a # folder, capped at TIDY_BATCH_SIZE (most recent first). Sending # all 100+ chats to one LLM call blows the context window, makes # the request slow, and re-bills the same tokens every click for # already-sorted chats. Skipping sessions with `current_folder` # means each Tidy press only handles new unfiled chats. TIDY_BATCH_SIZE = 15 all_candidates = [] for s in user_sessions.values(): if s.archived or s.name == "Incognito": continue if folder_map.get(s.id): # Already in a folder — skip on this pass. continue name = s.name or "(unnamed)" all_candidates.append({ "id": s.id, "name": name, "updated_at": getattr(s, "updated_at", None) or getattr(s, "created_at", None) or "", "current_folder": None, }) # Most-recent first, then take the top N for this batch. all_candidates.sort(key=lambda x: x.get("updated_at") or "", reverse=True) unfiled_total = len(all_candidates) session_list = all_candidates[:TIDY_BATCH_SIZE] if len(session_list) < 2: if deleted_empty or deleted_throwaway: return { "status": "ok", "updated": 0, "folders": [], "deleted_empty": deleted_empty, "deleted_throwaway": deleted_throwaway, "unfiled_remaining": unfiled_total, } return {"status": "skipped", "reason": "No unfiled sessions to sort"} # Pick an endpoint — prefer admin-configured task endpoint from src.task_endpoint import resolve_task_endpoint url, model, headers = resolve_task_endpoint() if not url: url, model, headers = _pick_endpoint_for_sort() if not url: raise HTTPException(503, "No available model endpoint for auto-sort") # Build prompt names_text = "\n".join(f' "{s["id"][:8]}": "{s["name"]}"' for s in session_list) prompt = ( "You are a session organizer. Group these chat sessions into folders by topic.\n\n" "Rules:\n" "- Be aggressive about grouping — put EVERY session in a folder\n" "- Use short folder names (2-4 words max)\n" "- Use the 8-char ID prefixes exactly as given\n" "- Output ONLY raw JSON, no markdown fences, no explanation\n\n" "Required JSON format:\n" '{"folders": {"Folder Name": ["id_prefix1", "id_prefix2"], "Other Folder": ["id_prefix3"]}}\n\n' f"Sessions (id_prefix: name):\n{{\n{names_text}\n}}" ) try: logger.info(f"Auto-sort: using model={model} at {url}") # 16384 (was 4096): with many chats the folder JSON is large, and a # reasoning model spends tokens thinking first — 4096 truncated the # JSON mid-output, so it never parsed ("invalid JSON for auto-sort"). raw = llm_call(url, model, [{"role": "user", "content": prompt}], temperature=0.3, max_tokens=16384, headers=headers, timeout=120) logger.info(f"Auto-sort raw response ({len(raw)} chars): {raw[:300]}") # Extract JSON from response — handle markdown fences, leading text, # reasoning-model blocks, and trailing commas. text = raw.strip() # Reasoning models emit (often containing { } that # would derail the brace scan) before the answer — drop it first. text = re.sub(r'[\s\S]*?', '', text, flags=re.I).strip() def _loads_lenient(s): """Parse JSON, retrying once with trailing commas stripped.""" if not s: return None for cand in (s, re.sub(r',(\s*[}\]])', r'\1', s)): try: return json.loads(cand) except json.JSONDecodeError: continue return None result = _loads_lenient(text) # Markdown code fence if result is None: fence_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text) if fence_match: result = _loads_lenient(fence_match.group(1).strip()) # First { … last } block if result is None: brace_start = text.find('{') brace_end = text.rfind('}') if brace_start >= 0 and brace_end > brace_start: result = _loads_lenient(text[brace_start:brace_end + 1]) if result is None: logger.error(f"Auto-sort: could not parse JSON from: {text[:500]}") raise HTTPException(502, "AI returned invalid JSON for auto-sort — the model may not follow JSON instructions; try a different utility model in Settings.") except HTTPException: raise except Exception as e: logger.error(f"Auto-sort LLM call failed: {e}") raise HTTPException(502, f"Auto-sort failed: {str(e)}") folders = result.get("folders", {}) if not folders: return {"status": "skipped", "reason": "AI found no groupings"} # Build id -> folder map id_prefix_map = {s["id"][:8]: s["id"] for s in session_list} assignments = {} for folder_name, ids in folders.items(): for sid_or_prefix in ids: # Match by full ID or prefix full_id = None if sid_or_prefix in id_prefix_map.values(): full_id = sid_or_prefix else: # Try prefix match prefix = sid_or_prefix.rstrip(".").rstrip(" ") if prefix in id_prefix_map: full_id = id_prefix_map[prefix] else: # Fuzzy prefix match for p, fid in id_prefix_map.items(): if fid.startswith(prefix) or prefix.startswith(p): full_id = fid break if full_id: assignments[full_id] = folder_name # Apply folder assignments updated = 0 db = SessionLocal() try: for sid, folder_name in assignments.items(): db_session = db.query(DbSession).filter(DbSession.id == sid).first() if db_session: db_session.folder = folder_name db_session.updated_at = datetime.utcnow() updated += 1 db.commit() except Exception as e: db.rollback() logger.error(f"Auto-sort DB update failed: {e}") raise HTTPException(500, "Failed to apply folder assignments") finally: db.close() # How many unfiled chats are left after this batch — the # frontend uses this to decide whether to show "Tidy more" or # "All sorted!" in the toast. unfiled_remaining_after = max(0, unfiled_total - updated) return { "status": "ok", "folders": list(folders.keys()), "updated": updated, "deleted_empty": deleted_empty, "deleted_throwaway": deleted_throwaway, "unfiled_remaining": unfiled_remaining_after, } @router.get("/session/{session_id}/context_info") async def get_context_info(request: Request, session_id: str): """Get the real context length for a session's model from the endpoint.""" _verify_session_owner(request, session_id) session = session_manager.get_session(session_id) if not session: raise HTTPException(404, "Session not found") if not session.endpoint_url or not session.model: return {"context_length": None} try: from src.model_context import get_context_length ctx = get_context_length(session.endpoint_url, session.model) return {"context_length": ctx, "model": session.model} except Exception: return {"context_length": None} return router