diff --git a/KfChatDotNetBot/Services/BotServices.cs b/KfChatDotNetBot/Services/BotServices.cs
index f98d495..424c531 100644
--- a/KfChatDotNetBot/Services/BotServices.cs
+++ b/KfChatDotNetBot/Services/BotServices.cs
@@ -944,13 +944,72 @@ public class BotServices
}
var result = $"[img]{settings[BuiltIn.Keys.DiscordIcon].Value}[/img] {message.Author.GlobalName ?? message.Author.Username}: {message.Content?.Replace("❤️", ":feels:")}";
+ var voiceMessages = new List<(string Url, string Filename)>();
foreach (var attachment in message.Attachments ?? [])
{
- result += $"[br]Attachment: {attachment.GetProperty("filename").GetString()} {attachment.GetProperty("url").GetString()}";
+ var filename = attachment.GetProperty("filename").GetString() ?? "unknown";
+ var url = attachment.GetProperty("url").GetString() ?? "";
+
+ // Discord voice messages have content_type audio/ogg and the IS_VOICE_MESSAGE flag (1 << 13)
+ if (attachment.TryGetProperty("content_type", out var contentTypeProp) &&
+ contentTypeProp.GetString()?.StartsWith("audio/") == true &&
+ attachment.TryGetProperty("flags", out var flagsProp) &&
+ flagsProp.TryGetInt32(out var flags) &&
+ (flags & (1 << 13)) != 0)
+ {
+ result += "[br]🎤 Voice message (transcribing...)";
+ voiceMessages.Add((url, filename));
+ }
+ else
+ {
+ result += $"[br]Attachment: {filename} {url}";
+ }
}
-
- _chatBot.SendChatMessage(result, TemporarilyBypassGambaSeshForDiscord);
+
+ var sentMsg = _chatBot.SendChatMessage(result, TemporarilyBypassGambaSeshForDiscord);
UpdateBossmanLastSighting("talking in Discord").Wait(_cancellationToken);
+
+ // Transcribe voice messages in the background, then edit the sent message
+ if (voiceMessages.Count > 0)
+ {
+ _ = Task.Run(async () =>
+ {
+ try
+ {
+ // Wait for the message to be echoed so we have a UUID to edit
+ if (!await _chatBot.WaitForChatMessageAsync(sentMsg, TimeSpan.FromSeconds(10), _cancellationToken))
+ {
+ _logger.Warn("Voice message never got echoed, can't edit with transcription");
+ return;
+ }
+
+ var edited = result;
+ foreach (var (url, filename) in voiceMessages)
+ {
+ var transcription = await WhisperTranscription.TranscribeFromUrlAsync(url, filename, _cancellationToken);
+ if (transcription != null)
+ {
+ edited = edited.Replace("🎤 Voice message (transcribing...)",
+ $"🎤 Voice message: [i]{transcription}[/i]");
+ }
+ else
+ {
+ edited = edited.Replace("🎤 Voice message (transcribing...)",
+ "🎤 Voice message (transcription unavailable)");
+ }
+ }
+
+ if (sentMsg.ChatMessageUuid != null)
+ {
+ await _chatBot.KfClient.EditMessageAsync(sentMsg.ChatMessageUuid, edited);
+ }
+ }
+ catch (Exception ex)
+ {
+ _logger.Error(ex, "Failed to transcribe Discord voice message");
+ }
+ }, _cancellationToken);
+ }
}
private async Task DiscordFlashText(SentMessageTrackerModel msg)
diff --git a/KfChatDotNetBot/Services/WhisperTranscription.cs b/KfChatDotNetBot/Services/WhisperTranscription.cs
new file mode 100644
index 0000000..916e085
--- /dev/null
+++ b/KfChatDotNetBot/Services/WhisperTranscription.cs
@@ -0,0 +1,144 @@
+using System.Diagnostics;
+using System.Net;
+using KfChatDotNetBot.Settings;
+using NLog;
+
+namespace KfChatDotNetBot.Services;
+
+///
+/// Local OpenAI Whisper integration for voice message transcription.
+///
+/// Downloads audio from a URL and runs it through a local Whisper binary
+/// (openai-whisper CLI) for speech-to-text transcription.
+///
+/// Requires: pip install openai-whisper (or faster-whisper)
+///
+/// Configuration:
+/// - Whisper.Enabled: Feature toggle (default: false)
+/// - Whisper.BinaryPath: Path to the whisper binary (default: whisper)
+/// - Whisper.Model: Model size - tiny, base, small, medium, large (default: base)
+/// - Proxy: Global proxy setting (optional, used for downloading)
+///
+public static class WhisperTranscription
+{
+ private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
+
+ ///
+ /// Downloads audio from a URL and transcribes it using local Whisper.
+ ///
+ /// Flow:
+ /// 1. Check if feature is enabled
+ /// 2. Download audio from URL to a temp file
+ /// 3. Run whisper CLI on the temp file with --output_format txt
+ /// 4. Read the output text file and return the transcription
+ /// 5. Clean up temp files
+ ///
+ /// Returns null on any failure. All errors are logged via NLog.
+ ///
+ public static async Task TranscribeFromUrlAsync(string url, string fileName, CancellationToken ct = default)
+ {
+ var settings = await SettingsProvider.GetMultipleValuesAsync([
+ BuiltIn.Keys.WhisperEnabled,
+ BuiltIn.Keys.WhisperBinaryPath,
+ BuiltIn.Keys.WhisperModel,
+ BuiltIn.Keys.Proxy
+ ]);
+
+ if (!settings[BuiltIn.Keys.WhisperEnabled].ToBoolean())
+ {
+ Logger.Debug("Whisper transcription is disabled");
+ return null;
+ }
+
+ var whisperBinary = settings[BuiltIn.Keys.WhisperBinaryPath].Value ?? "whisper";
+ var model = settings[BuiltIn.Keys.WhisperModel].Value ?? "base";
+
+ var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
+ if (settings[BuiltIn.Keys.Proxy].Value != null)
+ {
+ handler.UseProxy = true;
+ handler.Proxy = new WebProxy(settings[BuiltIn.Keys.Proxy].Value);
+ }
+
+ var tempDir = Path.Combine(Path.GetTempPath(), $"whisper_{Guid.NewGuid()}");
+ Directory.CreateDirectory(tempDir);
+ var ext = Path.GetExtension(fileName);
+ if (string.IsNullOrEmpty(ext)) ext = ".ogg";
+ var tempAudioPath = Path.Combine(tempDir, $"voice{ext}");
+
+ try
+ {
+ // Download the audio file
+ Logger.Info($"Downloading voice message from {url}");
+ using var client = new HttpClient(handler);
+ var audioBytes = await client.GetByteArrayAsync(url, ct);
+ await File.WriteAllBytesAsync(tempAudioPath, audioBytes, ct);
+ Logger.Info($"Downloaded {audioBytes.Length} bytes to {tempAudioPath}");
+
+ // Run local whisper
+ var args = $"\"{tempAudioPath}\" --model {model} --output_format txt --output_dir \"{tempDir}\"";
+ Logger.Info($"Running: {whisperBinary} {args}");
+
+ var processInfo = new ProcessStartInfo
+ {
+ FileName = whisperBinary,
+ Arguments = args,
+ RedirectStandardOutput = true,
+ RedirectStandardError = true,
+ UseShellExecute = false,
+ CreateNoWindow = true
+ };
+
+ using var process = Process.Start(processInfo);
+ if (process == null)
+ {
+ Logger.Error("Failed to start Whisper process");
+ return null;
+ }
+
+ await process.WaitForExitAsync(ct);
+
+ if (process.ExitCode != 0)
+ {
+ var stderr = await process.StandardError.ReadToEndAsync(ct);
+ Logger.Error($"Whisper exited with code {process.ExitCode}: {stderr}");
+ return null;
+ }
+
+ // Whisper outputs a .txt file with the same base name as the input
+ var outputPath = Path.Combine(tempDir, "voice.txt");
+ if (!File.Exists(outputPath))
+ {
+ Logger.Error($"Whisper output file not found at {outputPath}");
+ return null;
+ }
+
+ var transcription = (await File.ReadAllTextAsync(outputPath, ct)).Trim();
+
+ if (string.IsNullOrWhiteSpace(transcription))
+ {
+ Logger.Warn("Whisper returned empty transcription");
+ return null;
+ }
+
+ Logger.Info($"Transcription received: {transcription.Length} characters");
+ return transcription;
+ }
+ catch (Exception ex)
+ {
+ Logger.Error(ex, "Error during Whisper transcription");
+ return null;
+ }
+ finally
+ {
+ try
+ {
+ if (Directory.Exists(tempDir)) Directory.Delete(tempDir, true);
+ }
+ catch (Exception ex)
+ {
+ Logger.Warn(ex, "Failed to cleanup Whisper temp files");
+ }
+ }
+ }
+}
diff --git a/KfChatDotNetBot/Settings/BuiltIn.cs b/KfChatDotNetBot/Settings/BuiltIn.cs
index 024c60c..d31fd27 100644
--- a/KfChatDotNetBot/Settings/BuiltIn.cs
+++ b/KfChatDotNetBot/Settings/BuiltIn.cs
@@ -574,6 +574,12 @@ public static class BuiltIn
public static string KasinoKrashEnabled = "Kasino.Krash.Enabled";
[BuiltInSetting("Delay in milliseconds before cleaning up krash", SettingValueType.Text, "10000", WholeNumberRegex)]
public static string KasinoKrashCleanupDelay = "Kasino.Krash.CleanupDelay";
+ [BuiltInSetting("Whether Whisper transcription is enabled", SettingValueType.Boolean, "false", BooleanRegex)]
+ public static string WhisperEnabled = "Whisper.Enabled";
+ [BuiltInSetting("Path of the Whisper binary", SettingValueType.Text, "whisper")]
+ public static string WhisperBinaryPath = "Whisper.BinaryPath";
+ [BuiltInSetting("Whisper model name (e.g. tiny, base, small, medium, large)", SettingValueType.Text, "base")]
+ public static string WhisperModel = "Whisper.Model";
[BuiltInSetting("Whether Winna is enabled", SettingValueType.Boolean, "false", BooleanRegex)]
public static string WinnaEnabled = "Winna.Enabled";
[BuiltInSetting("BossmanJack's Winna username", SettingValueType.Text, "ImBossmanJack")]