mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 17:55:26 -04:00
Odysseus v1.0
This commit is contained in:
@@ -0,0 +1,283 @@
|
||||
// static/js/voiceRecorder.js
|
||||
|
||||
/**
|
||||
* Voice recording with optional Speech-to-Text transcription.
|
||||
*
|
||||
* STT providers:
|
||||
* "disabled" — record audio as file attachment (original behavior)
|
||||
* "browser" — use Web Speech API for real-time transcription
|
||||
* "local" — send recording to server /api/stt/transcribe (Whisper)
|
||||
* "endpoint:<id>" — send recording to server /api/stt/transcribe (API)
|
||||
*/
|
||||
|
||||
let mediaRecorder = null;
|
||||
let audioChunks = [];
|
||||
let isRecording = false;
|
||||
let recordingStartTime = null;
|
||||
let recordingInterval = null;
|
||||
|
||||
// Browser STT state
|
||||
let _recognition = null;
|
||||
let _browserTranscript = '';
|
||||
|
||||
// Cached STT provider — refreshed on settings change
|
||||
let _sttProvider = 'disabled';
|
||||
|
||||
/**
|
||||
* Fetch current STT provider from server settings
|
||||
*/
|
||||
async function refreshSttProvider() {
|
||||
try {
|
||||
const res = await fetch('/api/stt/stats', { credentials: 'same-origin' });
|
||||
if (res.ok) {
|
||||
const stats = await res.json();
|
||||
_sttProvider = stats.provider || 'disabled';
|
||||
// Notify the send button to update its icon
|
||||
if (window._updateSendBtnIcon) window._updateSendBtnIcon();
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('Failed to fetch STT stats:', e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format seconds as MM:SS
|
||||
*/
|
||||
function formatTime(seconds) {
|
||||
const mins = Math.floor(seconds / 60).toString().padStart(2, '0');
|
||||
const secs = (seconds % 60).toString().padStart(2, '0');
|
||||
return `${mins}:${secs}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset UI state after recording ends
|
||||
*/
|
||||
function _resetRecordingUI() {
|
||||
isRecording = false;
|
||||
if (recordingInterval) {
|
||||
clearInterval(recordingInterval);
|
||||
recordingInterval = null;
|
||||
}
|
||||
// Reset send button via global callback
|
||||
const sendBtn = document.querySelector('.send-btn');
|
||||
if (sendBtn) {
|
||||
sendBtn.classList.remove('recording');
|
||||
sendBtn.dataset.mode = '';
|
||||
}
|
||||
if (window._updateSendBtnIcon) {
|
||||
setTimeout(window._updateSendBtnIcon, 50);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start browser speech recognition alongside recording
|
||||
*/
|
||||
function startBrowserSTT() {
|
||||
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||
if (!SpeechRecognition) return;
|
||||
|
||||
_browserTranscript = '';
|
||||
_recognition = new SpeechRecognition();
|
||||
_recognition.continuous = true;
|
||||
_recognition.interimResults = false;
|
||||
_recognition.lang = '';
|
||||
|
||||
_recognition.onresult = (event) => {
|
||||
for (let i = event.resultIndex; i < event.results.length; i++) {
|
||||
if (event.results[i].isFinal) {
|
||||
_browserTranscript += event.results[i][0].transcript + ' ';
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
_recognition.onerror = (e) => {
|
||||
console.warn('Browser STT error:', e.error);
|
||||
};
|
||||
|
||||
_recognition.start();
|
||||
}
|
||||
|
||||
function stopBrowserSTT() {
|
||||
if (_recognition) {
|
||||
try { _recognition.stop(); } catch (e) { /* ignore */ }
|
||||
_recognition = null;
|
||||
}
|
||||
return _browserTranscript.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Send audio to server for transcription
|
||||
*/
|
||||
async function transcribeOnServer(audioBlob) {
|
||||
const formData = new FormData();
|
||||
formData.append('file', audioBlob, 'audio.webm');
|
||||
|
||||
const res = await fetch('/api/stt/transcribe', {
|
||||
method: 'POST',
|
||||
credentials: 'same-origin',
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
throw new Error(err.detail?.message || 'Transcription failed');
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
return data.text || '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert transcribed text into the chat input
|
||||
*/
|
||||
function insertTranscription(text, showToast) {
|
||||
if (!text) return;
|
||||
const input = document.getElementById('message');
|
||||
if (!input) return;
|
||||
|
||||
const existing = input.value.trim();
|
||||
input.value = existing ? existing + ' ' + text : text;
|
||||
|
||||
// Trigger auto-resize and icon update
|
||||
input.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
input.focus();
|
||||
|
||||
if (showToast) showToast('Transcribed');
|
||||
}
|
||||
|
||||
/**
|
||||
* Start voice recording
|
||||
*/
|
||||
export function startRecording(onFileCreated, showToast, showError) {
|
||||
// Check for secure context (getUserMedia requires HTTPS or localhost)
|
||||
if (!window.isSecureContext) {
|
||||
if (showError) showError('Microphone requires HTTPS. Use a reverse proxy with SSL or access via localhost.');
|
||||
_resetRecordingUI();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||||
if (showError) showError('Microphone not supported in this browser.');
|
||||
_resetRecordingUI();
|
||||
return;
|
||||
}
|
||||
|
||||
audioChunks = [];
|
||||
|
||||
navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
.then(stream => {
|
||||
mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
|
||||
|
||||
mediaRecorder.ondataavailable = event => {
|
||||
if (event.data.size > 0) {
|
||||
audioChunks.push(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.onstop = async () => {
|
||||
stream.getTracks().forEach(track => track.stop());
|
||||
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
|
||||
const provider = _sttProvider;
|
||||
|
||||
if (provider === 'browser') {
|
||||
const transcript = stopBrowserSTT();
|
||||
if (transcript) {
|
||||
insertTranscription(transcript, showToast);
|
||||
} else {
|
||||
if (showToast) showToast('No speech detected');
|
||||
const audioFile = new File([audioBlob], `voice-message-${Date.now()}.webm`, { type: 'audio/webm' });
|
||||
if (onFileCreated) onFileCreated(audioFile);
|
||||
}
|
||||
} else if (provider === 'local' || provider.startsWith('endpoint:')) {
|
||||
// Show "Transcribing..." feedback
|
||||
if (showToast) showToast('Transcribing...', 5000);
|
||||
try {
|
||||
const transcript = await transcribeOnServer(audioBlob);
|
||||
if (transcript) {
|
||||
insertTranscription(transcript, showToast);
|
||||
} else {
|
||||
if (showToast) showToast('No speech detected');
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('STT transcription error:', e);
|
||||
if (showError) showError('Transcription failed: ' + e.message);
|
||||
// Fallback: attach as file
|
||||
const audioFile = new File([audioBlob], `voice-message-${Date.now()}.webm`, { type: 'audio/webm' });
|
||||
if (onFileCreated) onFileCreated(audioFile);
|
||||
}
|
||||
} else {
|
||||
// STT disabled — attach audio file
|
||||
const audioFile = new File([audioBlob], `voice-message-${Date.now()}.webm`, { type: 'audio/webm' });
|
||||
if (onFileCreated) onFileCreated(audioFile);
|
||||
}
|
||||
|
||||
_resetRecordingUI();
|
||||
};
|
||||
|
||||
mediaRecorder.start();
|
||||
isRecording = true;
|
||||
recordingStartTime = new Date();
|
||||
|
||||
// Start browser STT if that's the provider
|
||||
if (_sttProvider === 'browser') {
|
||||
startBrowserSTT();
|
||||
}
|
||||
|
||||
if (showToast) {
|
||||
showToast('Recording...');
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Microphone access error:', error);
|
||||
if (showError) {
|
||||
if (error.name === 'NotAllowedError') {
|
||||
showError('Microphone access denied. Check browser permissions.');
|
||||
} else if (error.name === 'NotFoundError') {
|
||||
showError('No microphone found.');
|
||||
} else {
|
||||
showError('Microphone error: ' + error.message);
|
||||
}
|
||||
}
|
||||
_resetRecordingUI();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop voice recording
|
||||
*/
|
||||
export function stopRecording() {
|
||||
if (mediaRecorder && mediaRecorder.state === 'recording') {
|
||||
mediaRecorder.stop();
|
||||
// isRecording will be set to false in _resetRecordingUI called from onstop
|
||||
} else {
|
||||
_resetRecordingUI();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if currently recording
|
||||
*/
|
||||
export function getIsRecording() {
|
||||
return isRecording;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize recording state
|
||||
*/
|
||||
export function init() {
|
||||
isRecording = false;
|
||||
refreshSttProvider();
|
||||
}
|
||||
|
||||
const voiceRecorderModule = {
|
||||
startRecording,
|
||||
stopRecording,
|
||||
getIsRecording,
|
||||
init,
|
||||
refreshSttProvider,
|
||||
get _sttProvider() { return _sttProvider; },
|
||||
set _sttProvider(v) { _sttProvider = v; },
|
||||
};
|
||||
|
||||
export default voiceRecorderModule;
|
||||
Reference in New Issue
Block a user