Skip to content

Instantly share code, notes, and snippets.

@grandsilence
Last active October 28, 2019 21:56
Show Gist options
  • Save grandsilence/371615c953e5286a3d5eb55905782435 to your computer and use it in GitHub Desktop.
Save grandsilence/371615c953e5286a3d5eb55905782435 to your computer and use it in GitHub Desktop.
C# Correct chunk messages for telegram bot (limit 4096 per message) with emoji and unicode support.
// Telegram has 4096 limit per message. So use chunkSize = 4096.
public static IEnumerable<string> ChunkUnicode(this string self, int chunkSize)
{
if (string.IsNullOrEmpty(self))
return Enumerable.Empty<string>();
if (chunkSize < 4)
throw new ArgumentException("Minimum chunk size is 4.", nameof(chunkSize));
var results = new List<string>();
var msg = new StringBuilder();
int messageLength = 0;
var bytes = Encoding.UTF32.GetBytes(self);
for (int i = 0; i < bytes.Length; i += 4)
{
uint codepoint = BitConverter.ToUInt32(bytes, i);
int charSize;
if (codepoint < 128)
charSize = 1;
else if (codepoint < 32768)
charSize = 2;
else
charSize = 4;
// Append
if (messageLength + charSize > chunkSize)
{
results.Add(msg.ToString());
msg.Clear();
messageLength = 0;
}
if (charSize < 4)
msg.Append(Convert.ToChar(codepoint));
else
msg.Append(char.ConvertFromUtf32((int) codepoint));
messageLength += charSize;
}
if (msg.Length > 0)
results.Add(msg.ToString());
return results;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment