From 290cd7f1cd6fbed6d6634155cdc556d03ac1426a Mon Sep 17 00:00:00 2001 From: Abeelha Date: Mon, 1 Jun 2026 23:16:54 -0300 Subject: [PATCH] fix(stt): make local microphone transcription work without torch (#801) faster-whisper runs on CTranslate2, not torch, but _get_whisper() imported torch (only to check cuda availability) inside the same try as the faster-whisper import. on a torch-less machine that raised ImportError and reported the misleading 'faster-whisper not installed' even when it was installed, so local mic transcription silently failed. probe torch separately and optionally: present -> cuda, absent -> cpu. also declare faster-whisper in requirements-optional.txt (torch stays an optional extra for gpu). --- requirements-optional.txt | 8 ++++++++ services/stt/stt_service.py | 28 ++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/requirements-optional.txt b/requirements-optional.txt index 72d9f7e69..d4900fe83 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -4,6 +4,14 @@ # Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic # memory, and tool selection are core paths, so they ship by default now. +# Local speech-to-text (microphone -> text) via faster-whisper, for the +# "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no +# torch needed). Install if you want to dictate/transcribe with the mic +# without sending audio to an external endpoint. +# Optional extra: install `torch` too if you have a CUDA GPU and want +# GPU-accelerated transcription — it's auto-detected, CPU is used otherwise. +faster-whisper + # DuckDuckGo as a search provider option. # Install if you want DDG in the search-provider dropdown. # Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE. diff --git a/services/stt/stt_service.py b/services/stt/stt_service.py index 55e57afb1..0587128e6 100644 --- a/services/stt/stt_service.py +++ b/services/stt/stt_service.py @@ -59,17 +59,29 @@ class STTService: if self._whisper_model is None: try: from faster_whisper import WhisperModel - settings = self._load_settings() - model_size = settings.get("stt_model", "base") - # Use CPU by default; will use CUDA if available - import torch - device = "cuda" if torch.cuda.is_available() else "cpu" - compute_type = "float16" if device == "cuda" else "int8" - self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type) - logger.info(f"faster-whisper model '{model_size}' loaded on {device}") except ImportError: logger.warning("faster-whisper not installed. Install with: pip install faster-whisper") return None + try: + settings = self._load_settings() + model_size = settings.get("stt_model", "base") + # faster-whisper runs on CTranslate2, not torch. torch is only + # used (optionally) to detect a CUDA device for acceleration — + # if it's missing or unusable we just run on CPU. Keeping this + # probe separate (and tolerant of any failure, e.g. a broken + # CUDA/torch install that raises OSError on import) means a + # torch-less or torch-broken machine still does CPU + # transcription instead of failing with a misleading + # "faster-whisper not installed" error. + try: + import torch + use_cuda = torch.cuda.is_available() + except Exception: + use_cuda = False + device = "cuda" if use_cuda else "cpu" + compute_type = "float16" if device == "cuda" else "int8" + self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type) + logger.info(f"faster-whisper model '{model_size}' loaded on {device}") except Exception as e: logger.error(f"Failed to load whisper model: {e}") return None