fix(stt): make local microphone transcription work without torch (#801)

faster-whisper runs on CTranslate2, not torch, but _get_whisper() imported torch (only to check cuda availability) inside the same try as the faster-whisper import. on a torch-less machine that raised ImportError and reported the misleading 'faster-whisper not installed' even when it was installed, so local mic transcription silently failed. probe torch separately and optionally: present -> cuda, absent -> cpu. also declare faster-whisper in requirements-optional.txt (torch stays an optional extra for gpu).
2026-06-17 10:15:27 -04:00 · 2026-06-01 23:16:54 -03:00
parent 7448b88652
commit 290cd7f1cd
2 changed files with 28 additions and 8 deletions
@@ -4,6 +4,14 @@
 # Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic
 # memory, and tool selection are core paths, so they ship by default now.
 # Local speech-to-text (microphone -> text) via faster-whisper, for the
 # "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no
 # torch needed). Install if you want to dictate/transcribe with the mic
 # without sending audio to an external endpoint.
 # Optional extra: install `torch` too if you have a CUDA GPU and want
 # GPU-accelerated transcription — it's auto-detected, CPU is used otherwise.
 faster-whisper
 # DuckDuckGo as a search provider option.
 # Install if you want DDG in the search-provider dropdown.
 # Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
@@ -59,17 +59,29 @@ class STTService:
        if self._whisper_model is None:
            try:
                from faster_whisper import WhisperModel
                settings = self._load_settings()
                model_size = settings.get("stt_model", "base")
                # Use CPU by default; will use CUDA if available
                import torch
                device = "cuda" if torch.cuda.is_available() else "cpu"
                compute_type = "float16" if device == "cuda" else "int8"
                self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type)
                logger.info(f"faster-whisper model '{model_size}' loaded on {device}")
            except ImportError:
                logger.warning("faster-whisper not installed. Install with: pip install faster-whisper")
                return None
            try:
                settings = self._load_settings()
                model_size = settings.get("stt_model", "base")
                # faster-whisper runs on CTranslate2, not torch. torch is only
                # used (optionally) to detect a CUDA device for acceleration —
                # if it's missing or unusable we just run on CPU. Keeping this
                # probe separate (and tolerant of any failure, e.g. a broken
                # CUDA/torch install that raises OSError on import) means a
                # torch-less or torch-broken machine still does CPU
                # transcription instead of failing with a misleading
                # "faster-whisper not installed" error.
                try:
                    import torch
                    use_cuda = torch.cuda.is_available()
                except Exception:
                    use_cuda = False
                device = "cuda" if use_cuda else "cpu"
                compute_type = "float16" if device == "cuda" else "int8"
                self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type)
                logger.info(f"faster-whisper model '{model_size}' loaded on {device}")
            except Exception as e:
                logger.error(f"Failed to load whisper model: {e}")
                return None