Attempt to improve the way long strings are split up. There's a new extension method called FancySplitMessage to achieve this. Truncation options now work on bytes instead of string length too

This commit is contained in:
barelyprofessional
2025-02-09 15:59:39 +08:00
parent 4448a6a70a
commit f0fc79c142
3 changed files with 85 additions and 7 deletions

View File

@@ -1,4 +1,5 @@
using System.Text.RegularExpressions;
using System.Text;
using System.Text.RegularExpressions;
namespace KfChatDotNetBot;
@@ -44,4 +45,80 @@ public static class Extensions
}
}
/// <summary>
/// Split messages to x number of bytes while avoiding splitting mid-word where possible
/// </summary>
/// <param name="s">String that should get split</param>
/// <param name="partLengthBytes">Length limit, no part should be > than the number of bytes specified</param>
/// <param name="partLimit">Limit for how many parts to return (returns first n elements). Set to 0 to disable.</param>
/// <returns></returns>
public static List<string> FancySplitMessage(this string s, int partLengthBytes = 1023, int partLimit = 5)
{
var output = new List<string>();
var part = string.Empty;
foreach (var word in s.Split(' '))
{
if (word.Utf8LengthBytes() > partLengthBytes)
{
// Add the part already in memory if there is one
if (part != string.Empty)
{
output.Add(part.TrimEnd());
part = string.Empty;
}
// Breaks into chunks of x size which will break really long URLs etc. but no other way really
output.AddRange(word.ChunkBytes(partLengthBytes));
continue;
}
if (part.Utf8LengthBytes() + word.Utf8LengthBytes() > partLengthBytes)
{
// TrimEnd() to remove trailing spaces
output.Add(part.TrimEnd());
part = word + " ";
continue;
}
part += word + " ";
}
// Add on whatever remains
if (part != string.Empty)
{
output.Add(part.TrimEnd());
}
if (partLimit != 0 && output.Count > partLimit)
{
return output.Take(partLimit).ToList();
}
return output;
}
public static int Utf8LengthBytes(this string s)
{
return Encoding.UTF8.GetByteCount(s);
}
public static IEnumerable<string> ChunkBytes(this string input, int bytesPerChunk)
{
var bytes = Encoding.UTF8.GetBytes(input);
for (var i = 0; i < bytes.Length; i += bytesPerChunk)
{
var chunkSize = Math.Min(bytesPerChunk, bytes.Length - i);
yield return Encoding.UTF8.GetString(bytes, i, chunkSize);
}
}
public static string TruncateBytes(this string s, int limitBytes)
{
return Encoding.UTF8.GetString(
Encoding.UTF8.GetBytes(s)
.Take(limitBytes)
.ToArray()
).TrimEnd();
}
}