Created
October 15, 2021 03:29
-
-
Save namazso/0b417ea1dc8a587a3b720b7fbfb8be44 to your computer and use it in GitHub Desktop.
IRC formatting parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include "irc_format.h" | |
struct IRCParser | |
{ | |
enum State : int | |
{ | |
Normal, | |
Color1_1, | |
Color1_2, | |
ColorComma, | |
Color2_1, | |
Color2_2, | |
ColorFinish, | |
ColorHex1_1, | |
ColorHex1_2, | |
ColorHex1_3, | |
ColorHex1_4, | |
ColorHex1_5, | |
ColorHex1_6, | |
ColorHexComma, | |
ColorHex2_1, | |
ColorHex2_2, | |
ColorHex2_3, | |
ColorHex2_4, | |
ColorHex2_5, | |
ColorHex2_6, | |
ColorHexFinish | |
}; | |
std::vector<std::pair<char, IRCFormat>> parsed; | |
int state = Normal; | |
IRCFormat current_format{}; | |
uint32_t color_fg{}; | |
uint32_t color_bg{}; | |
static bool push_dec(uint32_t& v, char c); | |
static bool push_hex(uint32_t& v, char c); | |
void color_finish(bool fg_set, bool bg_set); | |
void colorhex_finish(bool fg_set, bool bg_set); | |
void normal(char c); | |
void process(char c); | |
}; | |
constexpr static uint32_t rgb(uint8_t r, uint8_t g, uint8_t b) | |
{ | |
return ((uint32_t)r) | ((uint32_t)g << 8) | ((uint32_t)b << 16); | |
} | |
constexpr static uint8_t unhex(char c) | |
{ | |
#define TEST_RANGE(c, a, b, offset) if (uint8_t(c) >= uint8_t(a) && uint8_t(c) <= uint8_t(b))\ | |
return uint8_t(c) - uint8_t(a) + (offset) | |
TEST_RANGE(c, '0', '9', 0x0); | |
TEST_RANGE(c, 'a', 'f', 0xa); | |
TEST_RANGE(c, 'A', 'F', 0xA); | |
#undef TEST_RANGE | |
return 0xFF; | |
}; | |
constexpr static uint32_t k_irc_colors[] = | |
{ | |
rgb(255, 255, 255), | |
rgb(0, 0, 0), | |
rgb(0, 0, 127), | |
rgb(0, 147, 0), | |
rgb(255, 0, 0), | |
rgb(127, 0, 0), | |
rgb(156, 0, 156), | |
rgb(252, 127, 0), | |
rgb(255, 255, 0), | |
rgb(0, 252, 0), | |
rgb(0, 147, 147), | |
rgb(0, 255, 255), | |
rgb(0, 0, 252), | |
rgb(255, 0, 255), | |
rgb(127, 127, 127), | |
rgb(210, 210, 210), | |
rgb(0x47, 0x00, 0x00), | |
rgb(0x47, 0x21, 0x00), | |
rgb(0x47, 0x47, 0x00), | |
rgb(0x32, 0x47, 0x00), | |
rgb(0x00, 0x47, 0x00), | |
rgb(0x00, 0x47, 0x2c), | |
rgb(0x00, 0x47, 0x47), | |
rgb(0x00, 0x27, 0x47), | |
rgb(0x00, 0x00, 0x47), | |
rgb(0x2e, 0x00, 0x47), | |
rgb(0x47, 0x00, 0x47), | |
rgb(0x47, 0x00, 0x2a), | |
rgb(0x74, 0x00, 0x00), | |
rgb(0x74, 0x3a, 0x00), | |
rgb(0x74, 0x74, 0x00), | |
rgb(0x51, 0x74, 0x00), | |
rgb(0x00, 0x74, 0x00), | |
rgb(0x00, 0x74, 0x49), | |
rgb(0x00, 0x74, 0x74), | |
rgb(0x00, 0x40, 0x74), | |
rgb(0x00, 0x00, 0x74), | |
rgb(0x4b, 0x00, 0x74), | |
rgb(0x74, 0x00, 0x74), | |
rgb(0x74, 0x00, 0x45), | |
rgb(0xb5, 0x00, 0x00), | |
rgb(0xb5, 0x63, 0x00), | |
rgb(0xb5, 0xb5, 0x00), | |
rgb(0x7d, 0xb5, 0x00), | |
rgb(0x00, 0xb5, 0x00), | |
rgb(0x00, 0xb5, 0x71), | |
rgb(0x00, 0xb5, 0xb5), | |
rgb(0x00, 0x63, 0xb5), | |
rgb(0x00, 0x00, 0xb5), | |
rgb(0x75, 0x00, 0xb5), | |
rgb(0xb5, 0x00, 0xb5), | |
rgb(0xb5, 0x00, 0x6b), | |
rgb(0xff, 0x00, 0x00), | |
rgb(0xff, 0x8c, 0x00), | |
rgb(0xff, 0xff, 0x00), | |
rgb(0xb2, 0xff, 0x00), | |
rgb(0x00, 0xff, 0x00), | |
rgb(0x00, 0xff, 0xa0), | |
rgb(0x00, 0xff, 0xff), | |
rgb(0x00, 0x8c, 0xff), | |
rgb(0x00, 0x00, 0xff), | |
rgb(0xa5, 0x00, 0xff), | |
rgb(0xff, 0x00, 0xff), | |
rgb(0xff, 0x00, 0x98), | |
rgb(0xff, 0x59, 0x59), | |
rgb(0xff, 0xb4, 0x59), | |
rgb(0xff, 0xff, 0x71), | |
rgb(0xcf, 0xff, 0x60), | |
rgb(0x6f, 0xff, 0x6f), | |
rgb(0x65, 0xff, 0xc9), | |
rgb(0x6d, 0xff, 0xff), | |
rgb(0x59, 0xb4, 0xff), | |
rgb(0x59, 0x59, 0xff), | |
rgb(0xc4, 0x59, 0xff), | |
rgb(0xff, 0x66, 0xff), | |
rgb(0xff, 0x59, 0xbc), | |
rgb(0xff, 0x9c, 0x9c), | |
rgb(0xff, 0xd3, 0x9c), | |
rgb(0xff, 0xff, 0x9c), | |
rgb(0xe2, 0xff, 0x9c), | |
rgb(0x9c, 0xff, 0x9c), | |
rgb(0x9c, 0xff, 0xdb), | |
rgb(0x9c, 0xff, 0xff), | |
rgb(0x9c, 0xd3, 0xff), | |
rgb(0x9c, 0x9c, 0xff), | |
rgb(0xdc, 0x9c, 0xff), | |
rgb(0xff, 0x9c, 0xff), | |
rgb(0xff, 0x94, 0xd3), | |
rgb(0x00, 0x00, 0x00), | |
rgb(0x13, 0x13, 0x13), | |
rgb(0x28, 0x28, 0x28), | |
rgb(0x36, 0x36, 0x36), | |
rgb(0x4d, 0x4d, 0x4d), | |
rgb(0x65, 0x65, 0x65), | |
rgb(0x81, 0x81, 0x81), | |
rgb(0x9f, 0x9f, 0x9f), | |
rgb(0xbc, 0xbc, 0xbc), | |
rgb(0xe2, 0xe2, 0xe2), | |
rgb(0xff, 0xff, 0xff), | |
IRCFormat::color_unset | |
}; | |
bool IRCParser::push_dec(uint32_t& v, char c) | |
{ | |
if (!(c >= '0' && c <= '9')) | |
return false; | |
v = v * 10 + (uint32_t)(c - '0'); | |
return true; | |
} | |
bool IRCParser::push_hex(uint32_t& v, char c) | |
{ | |
const auto cv = unhex(c); | |
if (cv == 0xFF) | |
return false; | |
v = (v << 4) | cv; | |
return true; | |
} | |
void IRCParser::color_finish(bool fg_set, bool bg_set) | |
{ | |
if (fg_set) | |
current_format.fg_color = k_irc_colors[color_fg]; | |
if (bg_set) | |
current_format.bg_color = k_irc_colors[color_bg]; | |
if (!fg_set && !bg_set) | |
{ | |
current_format.fg_color = IRCFormat::color_unset; | |
current_format.bg_color = IRCFormat::color_unset; | |
} | |
} | |
void IRCParser::colorhex_finish(bool fg_set, bool bg_set) | |
{ | |
if (fg_set) | |
current_format.fg_color = rgb((uint8_t)(color_fg >> 16), (uint8_t)(color_fg >> 8), (uint8_t)color_fg); | |
if (bg_set) | |
current_format.bg_color = rgb((uint8_t)(color_bg >> 16), (uint8_t)(color_bg >> 8), (uint8_t)color_bg); | |
if (!fg_set && !bg_set) | |
{ | |
current_format.fg_color = IRCFormat::color_unset; | |
current_format.bg_color = IRCFormat::color_unset; | |
} | |
} | |
void IRCParser::normal(char c) | |
{ | |
switch (c) | |
{ | |
case 0x02: | |
current_format.bold ^= 1; | |
break; | |
case 0x1D: | |
current_format.italic ^= 1; | |
break; | |
case 0x1F: | |
current_format.underline ^= 1; | |
break; | |
case 0x1E: | |
current_format.strikethrough ^= 1; | |
break; | |
case 0x11: | |
current_format.monospace ^= 1; | |
break; | |
case 0x16: | |
std::swap(current_format.fg_color, current_format.bg_color); | |
break; | |
case 0x0F: | |
current_format = {}; | |
break; | |
case 0x03: | |
state = Color1_1; | |
color_fg = 0; | |
color_bg = 0; | |
break; | |
case 0x04: | |
state = ColorHex1_1; | |
color_fg = 0; | |
color_bg = 0; | |
break; | |
case 0x00: | |
break; // end of string | |
default: | |
if (c >= 1 && c < 32) | |
{ | |
break; // swallow unknown control character | |
} | |
if (current_format.fg_color == current_format.bg_color && current_format.fg_color != IRCFormat::color_unset) | |
{ | |
auto copy_format = current_format; | |
copy_format.fg_color = IRCFormat::color_unset; | |
copy_format.bg_color = IRCFormat::color_unset; | |
copy_format.spoiler = true; | |
parsed.emplace_back(c, copy_format); | |
} | |
else | |
{ | |
parsed.emplace_back(c, current_format); | |
} | |
break; | |
} | |
} | |
void IRCParser::process(char c) | |
{ | |
switch (state) | |
{ | |
case Normal: | |
normal(c); | |
break; | |
case Color1_1: | |
if (!push_dec(color_fg, c)) | |
{ | |
color_finish(false, false); | |
state = Normal; | |
return process(c); | |
} | |
++state; | |
break; | |
case Color1_2: | |
if (c == ',') | |
{ | |
++state; | |
return process(c); | |
} | |
if (!push_dec(color_fg, c)) | |
{ | |
color_finish(true, false); | |
state = Normal; | |
return process(c); | |
} | |
++state; | |
break; | |
case ColorComma: | |
if (c != ',') | |
{ | |
color_finish(true, false); | |
state = Normal; | |
return process(c); | |
} | |
++state; | |
break; | |
case Color2_1: | |
if (!push_dec(color_bg, c)) | |
{ | |
color_finish(true, false); | |
state = Normal; | |
process(','); // add back the comma we ate | |
return process(c); | |
} | |
++state; | |
break; | |
case Color2_2: | |
if (!push_dec(color_bg, c)) | |
{ | |
color_finish(true, true); | |
state = Normal; | |
return process(c); | |
} | |
++state; | |
break; | |
case ColorFinish: | |
color_finish(true, true); | |
state = Normal; | |
return process(c); | |
case ColorHex1_1: | |
if (!push_hex(color_fg, c)) | |
{ | |
colorhex_finish(false, false); | |
state = Normal; | |
return process(c); | |
} | |
++state; | |
break; | |
case ColorHex1_2: | |
case ColorHex1_3: | |
case ColorHex1_4: | |
case ColorHex1_5: | |
case ColorHex1_6: | |
if (!push_hex(color_fg, c)) | |
{ | |
// some invalid garbage, just reset color | |
colorhex_finish(false, false); | |
state = Normal; | |
return process(c); | |
} | |
++state; | |
break; | |
case ColorHexComma: | |
if (c != ',') | |
{ | |
colorhex_finish(true, false); | |
state = Normal; | |
return process(c); | |
} | |
++state; | |
break; | |
case ColorHex2_1: | |
if (!push_hex(color_bg, c)) | |
{ | |
color_finish(true, false); | |
state = Normal; | |
process(','); // add back the comma we ate | |
return process(c); | |
} | |
++state; | |
break; | |
case ColorHex2_2: | |
case ColorHex2_3: | |
case ColorHex2_4: | |
case ColorHex2_5: | |
case ColorHex2_6: | |
if (!push_hex(color_bg, c)) | |
{ | |
// invalid stuff, just use the first color | |
colorhex_finish(true, false); | |
state = Normal; | |
return process(c); | |
} | |
++state; | |
break; | |
case ColorHexFinish: | |
colorhex_finish(true, true); | |
state = Normal; | |
return process(c); | |
} | |
} | |
std::vector<std::pair<char, IRCFormat>> IRCFormat::parse_message(const char* str) | |
{ | |
IRCParser p; | |
while (const auto c = *str++) | |
p.process(c); | |
p.process(0); | |
return p.parsed; | |
} | |
std::string IRCFormat::to_html(const std::vector<std::pair<char, IRCFormat>>& msg) | |
{ | |
IRCFormat last{}; | |
std::string html = last.html_open(); | |
for (const auto ch : msg) | |
{ | |
if(ch.second != last) | |
{ | |
html += last.html_close(); | |
last = ch.second; | |
html += last.html_open(); | |
} | |
html += ch.first; | |
} | |
html += last.html_close(); | |
return html; | |
} | |
static std::string rgb_to_html(uint32_t c) | |
{ | |
char color[7]; | |
sprintf_s(color, "%02X%02X%02X", c & 0xFF, (c >> 8) & 0xFF, (c >> 16) & 0xFF); | |
return color; | |
} | |
std::string IRCFormat::html_open() const | |
{ | |
std::ostringstream out; | |
if (bold) | |
out << "<b>"; | |
if (italic) | |
out << "<i>"; | |
if (underline) | |
out << "<u>"; | |
if (strikethrough) | |
out << "<strike>"; | |
if (monospace) | |
out << "<code>"; | |
if (spoiler) | |
out << "<span data-mx-spoiler>"; | |
if (fg_color != color_unset) | |
out << "<span data-mx-color=\"#" << rgb_to_html(fg_color) << "\">"; | |
if (bg_color != color_unset) | |
out << "<span data-mx-bg-color=\"#" << rgb_to_html(bg_color) << "\">"; | |
return out.str(); | |
} | |
std::string IRCFormat::html_close() const | |
{ | |
std::ostringstream out; | |
if (bg_color != color_unset) | |
out << "</span>"; | |
if (fg_color != color_unset) | |
out << "</span>"; | |
if (spoiler) | |
out << "</span>"; | |
if (monospace) | |
out << "</code>"; | |
if (strikethrough) | |
out << "</strike>"; | |
if (underline) | |
out << "</u>"; | |
if (italic) | |
out << "</i>"; | |
if (bold) | |
out << "</b>"; | |
return out.str(); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <cstdint> | |
#include <string> | |
#include <sstream> | |
#include <vector> | |
struct IRCFormat | |
{ | |
bool bold{}; | |
bool italic{}; | |
bool underline{}; | |
bool strikethrough{}; | |
bool monospace{}; | |
bool spoiler{}; | |
uint32_t fg_color = color_unset; | |
uint32_t bg_color = color_unset; | |
std::string html_open() const; | |
std::string html_close() const; | |
constexpr static auto color_unset = 0xFF000000; | |
bool operator==(const IRCFormat& rhs) const | |
{ | |
return true | |
&& bold == rhs.bold | |
&& italic == rhs.italic | |
&& underline == rhs.underline | |
&& strikethrough == rhs.strikethrough | |
&& monospace == rhs.monospace | |
&& spoiler == rhs.spoiler | |
&& fg_color == rhs.fg_color | |
&& spoiler == rhs.spoiler; | |
} | |
bool operator!=(const IRCFormat& rhs) const { return !(*this == rhs); } | |
static std::vector<std::pair<char, IRCFormat>> parse_message(const char* str); | |
static std::string to_html(const std::vector<std::pair<char, IRCFormat>>& msg); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment