Last active
October 28, 2019 21:56
-
-
Save grandsilence/371615c953e5286a3d5eb55905782435 to your computer and use it in GitHub Desktop.
C# Correct chunk messages for telegram bot (limit 4096 per message) with emoji and unicode support.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Telegram has 4096 limit per message. So use chunkSize = 4096. | |
public static IEnumerable<string> ChunkUnicode(this string self, int chunkSize) | |
{ | |
if (string.IsNullOrEmpty(self)) | |
return Enumerable.Empty<string>(); | |
if (chunkSize < 4) | |
throw new ArgumentException("Minimum chunk size is 4.", nameof(chunkSize)); | |
var results = new List<string>(); | |
var msg = new StringBuilder(); | |
int messageLength = 0; | |
var bytes = Encoding.UTF32.GetBytes(self); | |
for (int i = 0; i < bytes.Length; i += 4) | |
{ | |
uint codepoint = BitConverter.ToUInt32(bytes, i); | |
int charSize; | |
if (codepoint < 128) | |
charSize = 1; | |
else if (codepoint < 32768) | |
charSize = 2; | |
else | |
charSize = 4; | |
// Append | |
if (messageLength + charSize > chunkSize) | |
{ | |
results.Add(msg.ToString()); | |
msg.Clear(); | |
messageLength = 0; | |
} | |
if (charSize < 4) | |
msg.Append(Convert.ToChar(codepoint)); | |
else | |
msg.Append(char.ConvertFromUtf32((int) codepoint)); | |
messageLength += charSize; | |
} | |
if (msg.Length > 0) | |
results.Add(msg.ToString()); | |
return results; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment