From 290cd7f1cd6fbed6d6634155cdc556d03ac1426a Mon Sep 17 00:00:00 2001
From: Abeelha <dev.theodoro@gmail.com>
Date: Mon, 1 Jun 2026 23:16:54 -0300
Subject: [PATCH] fix(stt): make local microphone transcription work without
 torch (#801)

faster-whisper runs on CTranslate2, not torch, but _get_whisper()
imported torch (only to check cuda availability) inside the same try as
the faster-whisper import. on a torch-less machine that raised
ImportError and reported the misleading 'faster-whisper not installed'
even when it was installed, so local mic transcription silently failed.

probe torch separately and optionally: present -> cuda, absent -> cpu.
also declare faster-whisper in requirements-optional.txt (torch stays an
optional extra for gpu).
---
 requirements-optional.txt   |  8 ++++++++
 services/stt/stt_service.py | 28 ++++++++++++++++++++--------
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/requirements-optional.txt b/requirements-optional.txt
index 72d9f7e69..d4900fe83 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -4,6 +4,14 @@
 # Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic
 # memory, and tool selection are core paths, so they ship by default now.
 
+# Local speech-to-text (microphone -> text) via faster-whisper, for the
+# "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no
+# torch needed). Install if you want to dictate/transcribe with the mic
+# without sending audio to an external endpoint.
+# Optional extra: install `torch` too if you have a CUDA GPU and want
+# GPU-accelerated transcription — it's auto-detected, CPU is used otherwise.
+faster-whisper
+
 # DuckDuckGo as a search provider option.
 # Install if you want DDG in the search-provider dropdown.
 # Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
diff --git a/services/stt/stt_service.py b/services/stt/stt_service.py
index 55e57afb1..0587128e6 100644
--- a/services/stt/stt_service.py
+++ b/services/stt/stt_service.py
@@ -59,17 +59,29 @@ class STTService:
         if self._whisper_model is None:
             try:
                 from faster_whisper import WhisperModel
-                settings = self._load_settings()
-                model_size = settings.get("stt_model", "base")
-                # Use CPU by default; will use CUDA if available
-                import torch
-                device = "cuda" if torch.cuda.is_available() else "cpu"
-                compute_type = "float16" if device == "cuda" else "int8"
-                self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type)
-                logger.info(f"faster-whisper model '{model_size}' loaded on {device}")
             except ImportError:
                 logger.warning("faster-whisper not installed. Install with: pip install faster-whisper")
                 return None
+            try:
+                settings = self._load_settings()
+                model_size = settings.get("stt_model", "base")
+                # faster-whisper runs on CTranslate2, not torch. torch is only
+                # used (optionally) to detect a CUDA device for acceleration —
+                # if it's missing or unusable we just run on CPU. Keeping this
+                # probe separate (and tolerant of any failure, e.g. a broken
+                # CUDA/torch install that raises OSError on import) means a
+                # torch-less or torch-broken machine still does CPU
+                # transcription instead of failing with a misleading
+                # "faster-whisper not installed" error.
+                try:
+                    import torch
+                    use_cuda = torch.cuda.is_available()
+                except Exception:
+                    use_cuda = False
+                device = "cuda" if use_cuda else "cpu"
+                compute_type = "float16" if device == "cuda" else "int8"
+                self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type)
+                logger.info(f"faster-whisper model '{model_size}' loaded on {device}")
             except Exception as e:
                 logger.error(f"Failed to load whisper model: {e}")
                 return None