mirror of
https://github.com/barelyprofessional/KfChatDotNet.git
synced 2026-05-02 04:22:04 -04:00
Add Whisper transcription for BossmanJack Discord voice messages (#107)
* Add Whisper transcription for BossmanJack Discord voice messages Detect Discord voice message attachments (audio with IS_VOICE_MESSAGE flag) from the monitored user and transcribe them via OpenAI Whisper API before relaying to chat. Reuses the existing OpenAi.ApiKey setting. Feature is disabled by default via Whisper.Enabled setting. * Use separate API key setting for Whisper transcription * Switch to local Whisper and post-then-edit transcription flow Voice messages are now relayed immediately with a "transcribing..." placeholder, then transcribed locally via the whisper CLI and the message is edited to append the result. Removes OpenAI API dependency in favor of a local whisper binary. Settings: Whisper.BinaryPath, Whisper.Model, Whisper.Enabled --------- Co-authored-by: DFE <dfe@dfe.com> Co-authored-by: barelyprofessional <150058423+barelyprofessional@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
1a49fe1976
commit
3d269716e8
@@ -944,13 +944,72 @@ public class BotServices
|
||||
}
|
||||
|
||||
var result = $"[img]{settings[BuiltIn.Keys.DiscordIcon].Value}[/img] {message.Author.GlobalName ?? message.Author.Username}: {message.Content?.Replace("❤️", ":feels:")}";
|
||||
var voiceMessages = new List<(string Url, string Filename)>();
|
||||
foreach (var attachment in message.Attachments ?? [])
|
||||
{
|
||||
result += $"[br]Attachment: {attachment.GetProperty("filename").GetString()} {attachment.GetProperty("url").GetString()}";
|
||||
var filename = attachment.GetProperty("filename").GetString() ?? "unknown";
|
||||
var url = attachment.GetProperty("url").GetString() ?? "";
|
||||
|
||||
// Discord voice messages have content_type audio/ogg and the IS_VOICE_MESSAGE flag (1 << 13)
|
||||
if (attachment.TryGetProperty("content_type", out var contentTypeProp) &&
|
||||
contentTypeProp.GetString()?.StartsWith("audio/") == true &&
|
||||
attachment.TryGetProperty("flags", out var flagsProp) &&
|
||||
flagsProp.TryGetInt32(out var flags) &&
|
||||
(flags & (1 << 13)) != 0)
|
||||
{
|
||||
result += "[br]🎤 Voice message (transcribing...)";
|
||||
voiceMessages.Add((url, filename));
|
||||
}
|
||||
else
|
||||
{
|
||||
result += $"[br]Attachment: {filename} {url}";
|
||||
}
|
||||
}
|
||||
|
||||
_chatBot.SendChatMessage(result, TemporarilyBypassGambaSeshForDiscord);
|
||||
|
||||
var sentMsg = _chatBot.SendChatMessage(result, TemporarilyBypassGambaSeshForDiscord);
|
||||
UpdateBossmanLastSighting("talking in Discord").Wait(_cancellationToken);
|
||||
|
||||
// Transcribe voice messages in the background, then edit the sent message
|
||||
if (voiceMessages.Count > 0)
|
||||
{
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
// Wait for the message to be echoed so we have a UUID to edit
|
||||
if (!await _chatBot.WaitForChatMessageAsync(sentMsg, TimeSpan.FromSeconds(10), _cancellationToken))
|
||||
{
|
||||
_logger.Warn("Voice message never got echoed, can't edit with transcription");
|
||||
return;
|
||||
}
|
||||
|
||||
var edited = result;
|
||||
foreach (var (url, filename) in voiceMessages)
|
||||
{
|
||||
var transcription = await WhisperTranscription.TranscribeFromUrlAsync(url, filename, _cancellationToken);
|
||||
if (transcription != null)
|
||||
{
|
||||
edited = edited.Replace("🎤 Voice message (transcribing...)",
|
||||
$"🎤 Voice message: [i]{transcription}[/i]");
|
||||
}
|
||||
else
|
||||
{
|
||||
edited = edited.Replace("🎤 Voice message (transcribing...)",
|
||||
"🎤 Voice message (transcription unavailable)");
|
||||
}
|
||||
}
|
||||
|
||||
if (sentMsg.ChatMessageUuid != null)
|
||||
{
|
||||
await _chatBot.KfClient.EditMessageAsync(sentMsg.ChatMessageUuid, edited);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Error(ex, "Failed to transcribe Discord voice message");
|
||||
}
|
||||
}, _cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DiscordFlashText(SentMessageTrackerModel msg)
|
||||
|
||||
Reference in New Issue
Block a user