STT: clean temp audio files on transcription failure

STTService._transcribe_local writes the audio to a NamedTemporaryFile
(delete=False) and only unlinks it on the success path, before the except.
If model.transcribe() raises (corrupt audio, model/runtime error, etc.) the
function logs, returns None, and leaves the .webm temp file behind — so
every failed local transcription leaks a file in the system temp dir.

Initialize tmp_path = None up front and move the unlink into a finally
block so the temp file is cleaned up whether transcription succeeds or
raises.

tests/test_stt_leak.py stubs the whisper model to raise during transcribe,
runs _transcribe_local, and asserts it returns None and leaves no new .webm
file in the temp dir. Fails before this change.
This commit is contained in:
Tatlatat
2026-06-02 18:43:24 +07:00
committed by GitHub
parent f8e3bfeaff
commit 3885f9fa90
2 changed files with 34 additions and 3 deletions
+4 -3
View File
@@ -91,6 +91,7 @@ class STTService:
model = self._get_whisper()
if not model:
return None
tmp_path = None
try:
# Write to temp file (faster-whisper needs a file path or file-like)
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as tmp:
@@ -104,14 +105,14 @@ class STTService:
segments, info = model.transcribe(tmp_path, **kwargs)
text = " ".join(seg.text.strip() for seg in segments)
# Cleanup
Path(tmp_path).unlink(missing_ok=True)
logger.info(f"Local STT: {len(text)} chars, lang={info.language}, prob={info.language_probability:.2f}")
return text
except Exception as e:
logger.error(f"Local STT transcription failed: {e}", exc_info=True)
return None
finally:
if tmp_path:
Path(tmp_path).unlink(missing_ok=True)
# ── API endpoint ──