Created
November 20, 2021 12:45
-
-
Save MrSmith33/d4c92d548a749dc4c9e31a7daae7f0e3 to your computer and use it in GitHub Desktop.
FSA based lexer for Vox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.stdio; | |
import std.format; | |
// First attribute goes into IN_TOKEN table | |
enum State : ubyte { | |
// intermediate states | |
@(1) i_invalid, | |
@(1) i_id1, // _a-zA-Z | |
@(1) i_id2, // _a-zA-Z0-9 | |
@(1) i_dig1_0, // 0 | |
@(1) i_dig2, // 1-9_ | |
@(1) i_dig2_b, // bB | |
@(1) i_dig2_x, // xX | |
@(1) i_dig3_bin, // _ | |
@(1) i_dig4_bin, // 01_ | |
@(1) i_dig3_hex, // _ | |
@(1) i_dig4_hex, // 0-9A-Fa-f_ | |
@(1) i_and, // & | |
@(1) i_and2, // && | |
@(1) i_and_eq, // &= | |
@(1) i_at, // @ | |
@(1) i_backslash, // \ | |
@(1) i_colon, // : | |
@(1) i_comma, // , | |
@(1) i_dot, // . | |
@(1) i_dot2, // .. | |
@(1) i_dot3, // ... | |
@(1) i_eq, // = | |
@(1) i_eq2, // == | |
@(1) i_more, // > | |
@(1) i_more_eq, // >= | |
@(1) i_more2, // >> | |
@(1) i_more2_eq, // >>= | |
@(1) i_more3, // >>> | |
@(1) i_more3_eq, // >>>= | |
@(1) i_less, // < | |
@(1) i_less_eq, // <= | |
@(1) i_less2, // << | |
@(1) i_less2_eq, // <<= | |
@(1) i_minus, // - | |
@(1) i_minus_eq, // -= | |
@(1) i_minus2, // -- | |
@(1) i_not, // ! | |
@(1) i_not_eq, // != | |
@(1) i_or, // | | |
@(1) i_or_eq, // |= | |
@(1) i_or2, // || | |
@(1) i_percent, // % | |
@(1) i_percent_eq, // %= | |
@(1) i_plus, // + | |
@(1) i_plus_eq, // += | |
@(1) i_plus2, // ++ | |
@(1) i_question, // ? | |
@(1) i_semicolon, // ; | |
@(1) i_slash, // / | |
@(1) i_slash2, // // | |
@(1) i_comment, // //\n or /**/ | |
@(1) i_slash_star, // /* | |
@(1) i_slash_star2,// /*.* | |
@(1) i_slash_eq, // /= | |
@(1) i_star, // * | |
@(1) i_star_eq, // *= | |
@(1) i_tilde, // ~ | |
@(1) i_tilde_eq, // ~= | |
@(1) i_xor, // ^ | |
@(1) i_xor_eq, // ^= | |
@(1) i_quote2, // " | |
@(1) i_quote2_esc, // \ | |
@(1) i_lit_string, // " | |
@(1) i_lparen, // ( | |
@(1) i_rparen, // ) | |
@(1) i_lbracket, // [ | |
@(1) i_rbracket, // ] | |
@(1) i_lcurly, // { | |
@(1) i_rcurly, // } | |
@(0) i_start, // starting state | |
// final states | |
@(0) invalid, | |
@(0) id, | |
@(0) and, // & | |
@(0) and2, // && | |
@(0) and_eq, // &= | |
@(0) at, // @ | |
@(0) backslash, // \ | |
@(0) colon, // : | |
@(0) comma, // , | |
@(0) dot, // . | |
@(0) dot2, // .. | |
@(0) dot3, // ... | |
@(0) eq, // = | |
@(0) eq2, // == | |
@(0) more, // > | |
@(0) more_eq, // >= | |
@(0) more2, // >> | |
@(0) more2_eq, // >>= | |
@(0) more3, // >>> | |
@(0) more3_eq, // >>>= | |
@(0) less, // < | |
@(0) less_eq, // <= | |
@(0) less2, // << | |
@(0) less2_eq, // <<= | |
@(0) minus, // - | |
@(0) minus_eq, // -= | |
@(0) minus2, // -- | |
@(0) not, // ! | |
@(0) not_eq, // != | |
@(0) or, // | | |
@(0) or_eq, // |= | |
@(0) or2, // || | |
@(0) percent, // % | |
@(0) percent_eq, // %= | |
@(0) plus, // + | |
@(0) plus_eq, // += | |
@(0) plus2, // ++ | |
@(0) question, // ? | |
@(0) semicolon, // ; | |
@(0) slash, // / | |
@(0) slash_eq, // /= | |
@(0) star, // * | |
@(0) star_eq, // *= | |
@(0) tilde, // ~ | |
@(0) tilde_eq, // ~= | |
@(0) xor, // ^ | |
@(0) xor_eq, // ^= | |
@(0) lparen, // ( | |
@(0) rparen, // ) | |
@(0) lbracket, // [ | |
@(0) rbracket, // ] | |
@(0) lcurly, // { | |
@(0) rcurly, // } | |
@(0) lit_int_dec, // 0|[1-9][0-9_]* | |
@(0) lit_int_hex, // ("0b"|"0B")[01_]+ | |
@(0) lit_int_bin, // ("0x"|"0X")[0-9A-Fa-f_]+ | |
@(1) lit_string, // "" | |
@(0) comment, // // /* | |
@(0) inval_comment,// /* EOI | |
@(1) eoi, | |
} | |
immutable IN_TOKEN = (){ | |
ubyte[State.max+1] res; | |
foreach (i, m; __traits(allMembers, State)) { | |
res[i] = __traits(getAttributes, __traits(getMember, State, m))[0]; | |
} | |
return res; | |
}(); | |
// Groups chars, so that state transition table is smaller | |
enum CharClass : ubyte { | |
other, | |
space, // ' '\t | |
underscore, // _ | |
letter, // a-zA-Z | |
letter_b, // bB | |
letter_x, // xX | |
letter_hex, // acdefACDEF | |
zero, // 0 | |
one, // 1 | |
digit, // 2-9 | |
cr, // \r | |
lf, // \n | |
eoi, // \3 | |
lparen, // ( | |
rparen, // ) | |
lbracket, // [ | |
rbracket, // ] | |
lcurly, // { | |
rcurly, // } | |
and, // & | |
eq, // = | |
at, // @ | |
backslash, // \ | |
colon, // : | |
comma, // , | |
dot, // . | |
more, // > | |
less, // < | |
minus, // - | |
not, // ! | |
dollar, // $ | |
hash, // # | |
or, // | | |
percent, // % | |
plus, // + | |
question, // ? | |
semicolon, // ; | |
slash, // / | |
star, // * | |
tilde, // ~ | |
xor, // ^ | |
quote, // ' | |
quote2, // " | |
} | |
immutable CHAR_TO_CLASS = (){ | |
CharClass[256] table; | |
table[' '] = CharClass.space; | |
table['\t'] = CharClass.space; | |
table['_'] = CharClass.underscore; | |
table['a'..'z'+1] = CharClass.letter; | |
table['A'..'Z'+1] = CharClass.letter; | |
table['a'..'f'+1] = CharClass.letter_hex; | |
table['A'..'F'+1] = CharClass.letter_hex; | |
table['b'] = CharClass.letter_b; | |
table['B'] = CharClass.letter_b; | |
table['x'] = CharClass.letter_x; | |
table['X'] = CharClass.letter_x; | |
table['0'] = CharClass.zero; | |
table['1'] = CharClass.one; | |
table['2'..'9'+1] = CharClass.digit; | |
table['\r'] = CharClass.cr; | |
table['\n'] = CharClass.lf; | |
table['\3'] = CharClass.eoi; | |
table['('] = CharClass.lparen; | |
table[')'] = CharClass.rparen; | |
table['['] = CharClass.lbracket; | |
table[']'] = CharClass.rbracket; | |
table['{'] = CharClass.lcurly; | |
table['}'] = CharClass.rcurly; | |
table['&'] = CharClass.and; | |
table['='] = CharClass.eq; | |
table['@'] = CharClass.at; | |
table['\\'] = CharClass.backslash; | |
table[':'] = CharClass.colon; | |
table[','] = CharClass.comma; | |
table['.'] = CharClass.dot; | |
table['>'] = CharClass.more; | |
table['<'] = CharClass.less; | |
table['-'] = CharClass.minus; | |
table['!'] = CharClass.not; | |
table['$'] = CharClass.dollar; | |
table['#'] = CharClass.hash; | |
table['|'] = CharClass.or; | |
table['%'] = CharClass.percent; | |
table['+'] = CharClass.plus; | |
table['?'] = CharClass.question; | |
table[';'] = CharClass.semicolon; | |
table['/'] = CharClass.slash; | |
table['*'] = CharClass.star; | |
table['~'] = CharClass.tilde; | |
table['^'] = CharClass.xor; | |
table['\''] = CharClass.quote; | |
table['"'] = CharClass.quote2; | |
return table; | |
}(); | |
pragma(msg, typeof(NEXT_STATE)); | |
immutable NEXT_STATE = (){ | |
State[CharClass.max+1][State.i_start+1] table; | |
void all(State from, State to) { | |
table[from][] = to; | |
} | |
void only(State from, CharClass[] ch_classes, State to) { | |
foreach(CharClass ch_class; ch_classes) | |
table[from][ch_class] = to; | |
} | |
all(State.i_start, State.i_invalid); | |
all(State.i_invalid, State.invalid); | |
only(State.i_start, [CharClass.eoi], State.eoi); | |
only(State.i_start, [CharClass.underscore, CharClass.letter, CharClass.letter_b, CharClass.letter_x, CharClass.letter_hex], State.i_id1); | |
all(State.i_id1, State.id); | |
all(State.i_id2, State.id); | |
only(State.i_id1, [CharClass.underscore, CharClass.letter, CharClass.letter_b, CharClass.letter_x, CharClass.letter_hex, CharClass.zero, CharClass.one, CharClass.digit], State.i_id2); | |
only(State.i_id2, [CharClass.underscore, CharClass.letter, CharClass.letter_b, CharClass.letter_x, CharClass.letter_hex, CharClass.zero, CharClass.one, CharClass.digit], State.i_id2); | |
only(State.i_start, [CharClass.lparen], State.i_lparen); | |
all(State.i_lparen, State.lparen); | |
only(State.i_start, [CharClass.rparen], State.i_rparen); | |
all(State.i_rparen, State.rparen); | |
only(State.i_start, [CharClass.lbracket], State.i_lbracket); | |
all(State.i_lbracket, State.lbracket); | |
only(State.i_start, [CharClass.rbracket], State.i_rbracket); | |
all(State.i_rbracket, State.rbracket); | |
only(State.i_start, [CharClass.lcurly], State.i_lcurly); | |
all(State.i_lcurly, State.lcurly); | |
only(State.i_start, [CharClass.rcurly], State.i_rcurly); | |
all(State.i_rcurly, State.rcurly); | |
only(State.i_start, [CharClass.and], State.i_and); | |
all(State.i_and, State.and); | |
only(State.i_and, [CharClass.eq], State.i_and_eq); | |
all(State.i_and_eq, State.and_eq); | |
only(State.i_and, [CharClass.and], State.i_and2); | |
all(State.i_and2, State.and2); | |
only(State.i_start, [CharClass.at], State.i_at); | |
all(State.i_at, State.at); | |
only(State.i_start, [CharClass.backslash], State.i_backslash); | |
all(State.i_backslash, State.backslash); | |
only(State.i_start, [CharClass.colon], State.i_colon); | |
all(State.i_colon, State.colon); | |
only(State.i_start, [CharClass.comma], State.i_comma); | |
all(State.i_comma, State.comma); | |
only(State.i_start, [CharClass.dot], State.i_dot); | |
all(State.i_dot, State.dot); | |
only(State.i_dot, [CharClass.dot], State.i_dot2); | |
all(State.i_dot2, State.dot2); | |
only(State.i_dot2, [CharClass.dot], State.i_dot3); | |
all(State.i_dot3, State.dot3); | |
only(State.i_start, [CharClass.eq], State.i_eq); | |
all(State.i_eq, State.eq); | |
only(State.i_eq, [CharClass.eq], State.i_eq2); | |
all(State.i_eq2, State.eq2); | |
only(State.i_start, [CharClass.more], State.i_more); | |
all(State.i_more, State.more); | |
only(State.i_more, [CharClass.more], State.i_more2); | |
all(State.i_more2, State.more2); | |
only(State.i_more, [CharClass.eq], State.i_more_eq); | |
all(State.i_more_eq, State.more_eq); | |
only(State.i_more2, [CharClass.more], State.i_more3); | |
all(State.i_more3, State.more3); | |
only(State.i_more2, [CharClass.eq], State.i_more2_eq); | |
all(State.i_more2_eq, State.more2_eq); | |
only(State.i_more3, [CharClass.eq], State.i_more3_eq); | |
all(State.i_more3_eq, State.more3_eq); | |
only(State.i_start, [CharClass.less], State.i_less); | |
all(State.i_less, State.less); | |
only(State.i_less, [CharClass.less], State.i_less2); | |
all(State.i_less2, State.less2); | |
only(State.i_less, [CharClass.eq], State.i_less_eq); | |
all(State.i_less_eq, State.less_eq); | |
only(State.i_less2, [CharClass.eq], State.i_less2_eq); | |
all(State.i_less2_eq, State.less2_eq); | |
only(State.i_start, [CharClass.minus], State.i_minus); | |
all(State.i_minus, State.minus); | |
only(State.i_minus, [CharClass.minus], State.i_minus2); | |
all(State.i_minus2, State.minus2); | |
only(State.i_minus, [CharClass.eq], State.i_minus_eq); | |
all(State.i_minus_eq, State.minus_eq); | |
only(State.i_start, [CharClass.plus], State.i_plus); | |
all(State.i_plus, State.plus); | |
only(State.i_plus, [CharClass.plus], State.i_plus2); | |
all(State.i_plus2, State.plus2); | |
only(State.i_plus, [CharClass.eq], State.i_plus_eq); | |
all(State.i_plus_eq, State.plus_eq); | |
only(State.i_start, [CharClass.or], State.i_or); | |
all(State.i_or, State.or); | |
only(State.i_or, [CharClass.or], State.i_or2); | |
all(State.i_or2, State.or2); | |
only(State.i_or, [CharClass.eq], State.i_or_eq); | |
all(State.i_or_eq, State.or_eq); | |
only(State.i_start, [CharClass.not], State.i_not); | |
all(State.i_not, State.not); | |
only(State.i_not, [CharClass.eq], State.i_not_eq); | |
all(State.i_not_eq, State.not_eq); | |
only(State.i_start, [CharClass.percent], State.i_percent); | |
all(State.i_percent, State.percent); | |
only(State.i_percent, [CharClass.eq], State.i_percent_eq); | |
all(State.i_percent_eq, State.percent_eq); | |
only(State.i_start, [CharClass.question], State.i_question); | |
all(State.i_question, State.question); | |
only(State.i_start, [CharClass.slash], State.i_slash); | |
all(State.i_slash, State.slash); | |
only(State.i_slash, [CharClass.eq], State.i_slash_eq); | |
all(State.i_slash_eq, State.slash_eq); | |
only(State.i_slash, [CharClass.slash], State.i_slash2); | |
all(State.i_slash2, State.i_slash2); | |
only(State.i_slash2, [CharClass.lf], State.i_comment); | |
all(State.i_comment, State.comment); | |
only(State.i_slash, [CharClass.star], State.i_slash_star); | |
all(State.i_slash_star, State.i_slash_star); | |
only(State.i_slash_star, [CharClass.star], State.i_slash_star2); | |
only(State.i_slash_star, [CharClass.eoi], State.inval_comment); | |
all(State.i_slash_star2, State.i_slash_star); | |
only(State.i_slash_star2, [CharClass.star], State.i_slash_star2); | |
only(State.i_slash_star2, [CharClass.slash], State.i_comment); | |
only(State.i_slash_star2, [CharClass.eoi], State.inval_comment); | |
only(State.i_start, [CharClass.semicolon], State.i_semicolon); | |
all(State.i_semicolon, State.semicolon); | |
only(State.i_start, [CharClass.star], State.i_star); | |
all(State.i_star, State.star); | |
only(State.i_star, [CharClass.eq], State.i_star_eq); | |
all(State.i_star_eq, State.star_eq); | |
only(State.i_start, [CharClass.tilde], State.i_tilde); | |
all(State.i_tilde, State.tilde); | |
only(State.i_tilde, [CharClass.eq], State.i_tilde_eq); | |
all(State.i_tilde_eq, State.tilde_eq); | |
only(State.i_start, [CharClass.xor], State.i_xor); | |
all(State.i_xor, State.xor); | |
only(State.i_xor, [CharClass.eq], State.i_xor_eq); | |
all(State.i_xor_eq, State.xor_eq); | |
only(State.i_start, [CharClass.zero], State.i_dig1_0); | |
all(State.i_dig1_0, State.lit_int_dec); | |
only(State.i_dig1_0, [CharClass.letter_b], State.i_dig2_b); | |
all(State.i_dig2_b, State.invalid); | |
only(State.i_dig2_b, [CharClass.zero, CharClass.one], State.i_dig4_bin); | |
all(State.i_dig4_bin, State.lit_int_bin); | |
only(State.i_dig4_bin, [CharClass.zero, CharClass.one, CharClass.underscore], State.i_dig4_bin); | |
only(State.i_dig2_b, [CharClass.underscore], State.i_dig3_bin); | |
all(State.i_dig3_bin, State.invalid); | |
only(State.i_dig3_bin, [CharClass.underscore], State.i_dig3_bin); | |
only(State.i_dig3_bin, [CharClass.zero, CharClass.one], State.i_dig4_bin); | |
only(State.i_dig1_0, [CharClass.letter_x], State.i_dig2_x); | |
all(State.i_dig2_x, State.invalid); | |
only(State.i_dig2_x, [CharClass.zero, CharClass.one, CharClass.digit, CharClass.letter_b, CharClass.letter_hex], State.i_dig4_hex); | |
all(State.i_dig4_hex, State.lit_int_hex); | |
only(State.i_dig4_hex, [CharClass.zero, CharClass.one, CharClass.digit, CharClass.letter_b, CharClass.letter_hex, CharClass.underscore], State.i_dig4_hex); | |
only(State.i_dig2_x, [CharClass.underscore], State.i_dig3_hex); | |
all(State.i_dig3_hex, State.invalid); | |
only(State.i_dig3_hex, [CharClass.underscore], State.i_dig3_hex); | |
only(State.i_dig3_hex, [CharClass.zero, CharClass.one, CharClass.digit, CharClass.letter_b, CharClass.letter_hex], State.i_dig4_hex); | |
only(State.i_start, [CharClass.one, CharClass.digit], State.i_dig2); | |
all(State.i_dig2, State.lit_int_dec); | |
only(State.i_dig2, [CharClass.zero, CharClass.one, CharClass.digit, CharClass.underscore], State.i_dig2); | |
only(State.i_start, [CharClass.space, CharClass.cr, CharClass.lf], State.i_start); | |
only(State.i_start, [CharClass.quote2], State.i_quote2); | |
all(State.i_quote2, State.i_quote2); | |
only(State.i_quote2, [CharClass.backslash], State.i_quote2_esc); | |
all(State.i_quote2_esc, State.i_quote2); | |
only(State.i_quote2, [CharClass.quote2], State.i_lit_string); | |
all(State.i_lit_string, State.lit_string); | |
only(State.i_quote2, [CharClass.eoi], State.invalid); | |
only(State.i_quote2_esc, [CharClass.eoi], State.invalid); | |
State[256][State.i_start+1] table2; | |
foreach(stateIndex, ref row; table) { | |
foreach(classIndex, State state; row) { | |
foreach(charIndex, CharClass cl; CHAR_TO_CLASS) { | |
if (cl == classIndex) { | |
table2[stateIndex][charIndex] = state; | |
} | |
} | |
} | |
} | |
State[128][256] table3; | |
foreach(i, ref row; table2) { | |
foreach(j, cell; row) { | |
table3[j][i] = cell; | |
} | |
} | |
return table3; | |
}(); | |
struct Token { | |
uint start; | |
uint end; | |
uint line; | |
uint col; | |
State tok; | |
const(char)[] getTokenString(const(char)[] input) pure const { | |
return input[start..end]; | |
} | |
void toString(scope void delegate(const(char)[]) sink) const { | |
sink.formattedWrite("line %s col %s start %s end %s len %s %s", line+1, col+1, start, end, end-start, tok); | |
} | |
} | |
struct Lexer | |
{ | |
const(char)[] input; | |
uint position; | |
uint line; | |
uint column; | |
Token nextToken() { | |
uint curPosition = position; | |
uint curLine = line; | |
uint curColumn = column; | |
// skip whitespace | |
while(true) | |
{ | |
char ch = input[curPosition]; | |
if (ch == ' ' || ch == '\t' || ch == '\r') { | |
++curColumn; | |
} else if (ch == '\n') { | |
curColumn = 0; | |
++curLine; | |
} else { | |
break; | |
} | |
++curPosition; | |
} | |
// store token position | |
uint startPos = curPosition; | |
uint startLine = curLine; | |
uint startCol = curColumn; | |
State state = State.i_start; | |
while(true) | |
{ | |
char ch = input[curPosition]; | |
state = NEXT_STATE[ch][state]; | |
if (state > State.i_start) break; | |
if (ch == '\n') { | |
curColumn = 0; | |
++curLine; | |
} else { | |
++curColumn; | |
} | |
++curPosition; | |
} | |
position = curPosition; | |
line = curLine; | |
column = curColumn; | |
return Token(startPos, curPosition, startLine, startCol, state); | |
} | |
} | |
void test() { | |
string source1 = "[](){}aaa _ a A _0 a0 A0 _1 a1 A1 _a aa Aa _A aA AA && & &= @ \\ : , . .. ... = == > >= >> >>= >>> >>>= < <= << <<= - -= -- ! != | |= || % %= + += ++ ? ; / /= // comment \r\n /**/ /***/ * *= ~ ~= ^ ^= \r\n \3"; | |
string source2 = "/**\3"; | |
string source3 = "/*\3"; | |
string source4 = "0 1 1_ 0x 0x_ 0x0 0x_0 0x0_ 0b 0b_ 0b0 0b_0 0b0_ 0xfff 0x_fff \3"; | |
string source5 = "\n//\r\na\3"; | |
string source6 = "/*\n*/\3"; | |
string source7 = "\"\" \"\\\"\" \"\n\"" ~ "\3"; | |
string source8 = "\"\3"; | |
string source9 = "\"\\\3"; | |
string source10 = "0x_fff\3"; | |
auto sources = [source1, source2, source3, source4, source5, source6, source7, source8, source9, source10]; | |
//auto sources = [source10]; | |
foreach(source; sources) { | |
auto lexer = Lexer(source); | |
while (true) { | |
auto tok = lexer.nextToken; | |
writefln("%s %s", tok, tok.getTokenString(source)); | |
if (tok.tok == State.eoi) break; | |
} | |
} | |
} | |
size_t total_bytes; | |
size_t sm; | |
string source_code; | |
void test_bench() { | |
total_bytes += source_code.length; | |
auto lexer = Lexer(source_code); | |
auto tok = lexer.nextToken; | |
while (tok.tok != State.eoi) { | |
//if (tok.tok == State.invalid) { | |
// writefln("%s %s", tok, tok.getTokenString(source_code)); | |
//} | |
sm += tok.tok; | |
//writefln("%s %s", tok, source_code[tok.from..tok.from+tok.length]); | |
//writefln("%s", total_bytes); | |
tok = lexer.nextToken; | |
} | |
} | |
void bench() { | |
import std.datetime.stopwatch; | |
import std.file : readText; | |
//source_code = readText!(string)("types.vx") ~ '\3'; | |
//source_code = readText!(string)("fannkuch100000.vx") ~ '\3'; | |
source_code = readText!(string)("fib1000000.vx") ~ '\3'; | |
writefln("%s", source_code.length); | |
writeln("benchmarking..."); | |
total_bytes = 0; | |
auto r = benchmark!(test_bench)(100); | |
double seconds = r[0].total!"nsecs" / 1_000_000_000.0; | |
auto bps = total_bytes / seconds; | |
writefln("lexed = %sB in %ss (%sB/s) - %s", scaledNumberFmt(total_bytes), scaledNumberFmt(seconds), scaledNumberFmt(bps), sm); | |
} | |
void main() { | |
test; | |
//bench; | |
} | |
import core.time : Duration; | |
/// Use 'i' format char to get binary prefixes (like Ki, instead of K), only for integers | |
/// Use '#' flag to get greek letter in the output (not compatible with 'i') | |
struct ScaledNumberFmt(T) { | |
import std.algorithm : min, max; | |
import std.format : formattedWrite, FormatSpec; | |
T value; | |
void toString(scope void delegate(const(char)[]) sink, const ref FormatSpec!char fmt) const { | |
if (fmt.spec == 'i') { | |
// Use binary prefixes instead of decimal prefixes | |
long intVal = cast(long)value; | |
int scale = calcScale2(intVal); | |
double scaledValue = scaled2(value, scale); | |
int digits = numDigitsInNumber10(scaledValue); | |
string prefix = scalePrefixesAscii[scaleToScaleIndex2(scale)]; // length is 1 or 0 | |
int width = max(fmt.width - (cast(int)prefix.length * 2), 0); // account for 'i' prefix | |
int precision = max(min(3-digits, fmt.precision), 0); // gives 0 or 1 | |
string fmtString = (scale == 0) ? "%*.*f%s" : "%*.*f%si"; | |
sink.formattedWrite(fmtString, width, precision, scaledValue, prefix); | |
} else { | |
int scale = calcScale10(value); | |
auto scaledValue = scaled10(value, -scale); | |
int digits = numDigitsInNumber10(scaledValue); | |
immutable string[] prefixes = (fmt.flHash) ? scalePrefixesGreek : scalePrefixesAscii; | |
string prefix = prefixes[scaleToScaleIndex10(scale)]; // length is 1 or 0 | |
int width = max(fmt.width - cast(int)prefix.length, 0); | |
int precision = max(min(3-digits, fmt.precision), 0); // gives 0 or 1 | |
sink.formattedWrite("%*.*f%s", width, precision, scaledValue, prefix); | |
} | |
} | |
} | |
auto scaledNumberFmt(T)(T value) { | |
return ScaledNumberFmt!T(value); | |
} | |
auto scaledNumberFmt(Duration value, double scale = 1) { | |
double seconds = value.total!"nsecs" / 1_000_000_000.0; | |
return ScaledNumberFmt!double(seconds * scale); | |
} | |
// -30 .. 30, with step of 3. Or -10 to 10 with step of 1 | |
immutable string[] scalePrefixesAscii = ["q","r","y","z","a","f","p","n","u","m","","K","M","G","T","P","E","Z","Y","R","Q"]; | |
immutable string[] scalePrefixesGreek = ["q","r","y","z","a","f","p","n","µ","m","","K","M","G","T","P","E","Z","Y","R","Q"]; | |
enum NUM_SCALE_PREFIXES = 10; | |
enum MIN_SCALE_PREFIX = -30; | |
enum MAX_SCALE_PREFIX = 30; | |
int numDigitsInNumber10(Num)(const Num val) { | |
import std.math: abs, round; | |
ulong absVal = cast(ulong)val.abs.round; | |
int numDigits = 1; | |
while (absVal >= 10) { | |
absVal /= 10; | |
++numDigits; | |
} | |
return numDigits; | |
} | |
private int signum(T)(const T x) pure nothrow { | |
return (x > 0) - (x < 0); | |
} | |
/// Returns number in range of [-30; 30] | |
int calcScale10(Num)(Num val) { | |
import std.algorithm: clamp; | |
import std.math: abs, round, log10; | |
// cast to double is necessary in case of long.min, which overflows integral abs | |
auto lg = log10(abs(cast(double)val)); | |
// handle very small values and zero | |
if (lg == -double.infinity) return 0; | |
double absLog = abs(lg); | |
int scale = cast(int)(round(absLog/3.0))*3; | |
int logSign = signum(lg); | |
int clampedScale = scale * logSign; | |
// we want | |
// 0.9994 to be formatted as 999m | |
// 0.9995 to be formatted as 1.0 | |
// 0.9996 to be formatted as 1.0 | |
if (abs(scaled10(val, -clampedScale)) < 0.9995) clampedScale -= 3; | |
if (clampedScale < MIN_SCALE_PREFIX) | |
clampedScale = 0; // prevent zero, or values smaller that min scale to display with min scale | |
else if (clampedScale > MAX_SCALE_PREFIX) | |
clampedScale = MAX_SCALE_PREFIX; | |
return clampedScale; | |
} | |
/// Returns number in range of [0; 100] | |
int calcScale2(Num)(Num val) { | |
import std.algorithm: clamp; | |
import std.math: abs, round, log2; | |
auto lg = log2(abs(val)); | |
double absLog = abs(lg); | |
int scale = cast(int)(round(absLog/10.0))*10; | |
int logSign = signum(lg); | |
int clampedScale = scale * logSign; | |
// we want | |
// 0.9994 to be formatted as 999m | |
// 0.9995 to be formatted as 1.0 | |
// 0.9996 to be formatted as 1.0 | |
if (abs(scaled2(val, clampedScale)) < 0.9995) clampedScale -= 10; | |
if (clampedScale < 0) | |
clampedScale = 0; // negative scale should not happen for binary numbers | |
else if (clampedScale > MAX_SCALE_PREFIX) | |
clampedScale = MAX_SCALE_PREFIX; | |
return clampedScale; | |
} | |
int scaleToScaleIndex10(int scale) { | |
return scale / 3 + NUM_SCALE_PREFIXES; // -30...30 -> -10...10 -> 0...20 | |
} | |
int scaleToScaleIndex2(int scale) { | |
return scale / 10 + NUM_SCALE_PREFIXES; // -100...100 -> -10...10 -> 0...20 | |
} | |
double scaled10(Num)(Num num, int scale) { | |
import std.math: pow; | |
return num * pow(10.0, scale); | |
} | |
double scaled2(Num)(Num num, int scale) { | |
double divisor = 1 << scale; | |
return num / divisor; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment