Last active
September 19, 2025 02:52
-
-
Save DreamOfTranscendence/1660dd5d50857ea3686e321d0f6a89fe to your computer and use it in GitHub Desktop.
c++ Syntax highlighter in JavaScript that outputs HTML formated highlighting from c++ code files input as a string
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //Importiant: original code author: | |
| //grabbed from: https://github.com/AdrS/CppToHTML | |
| //GNU liscense or something IDK | |
| //USAGE: Select and upload file from the file input element if you can find it, | |
| //run this file on example.com or a blank webpage | |
| // run: var htmlString=cpp_syntax_hi.htmlPrettyPrint(cpp_syntax_hi.files_e[0].textC), od2=document.createElement("div"); od2.innerHTML=htmlString; document.body.appendChild(od2); | |
| //I might need this to make: https://gist.github.com/DreamOfTranscendence/0dbb2267f4ebd19441869fe793f46cab | |
| /* | |
| #include <string> | |
| #include <iostream> | |
| #include <cstdio> | |
| */ | |
| (function cpp_syntax_hi(){ //context isolator wrapper function? | |
| /*enum TokenType*/ var PREPROCESSOR=0,COMMENT=1,KEYWORD=2,IDENTIFIER=3,CONSTANT=4,STRING=5,SYMBOLS=6,S_SYMBOL=7; var TokenType={PREPROCESSOR:0,COMMENT:1,KEYWORD:2,IDENTIFIER:3,CONSTANT:4,STRING:5,SYMBOLS:6,S_SYMBOL:7}; | |
| self.enum_TokenType=TokenType; | |
| var pos=0, | |
| n_blockbrak=[], /* pair_blockbrak={"{":"}","[":"]","(":")", "}":"{","]":"[",")":"("},*/ s_blockbrak="{}[]()", //turns out I do not need pair_blockbrak | |
| saPos=0; //instance number ids used. | |
| //n_blockbrak items are arrays: ["{",209] /*blockbrak_char, instance number int id*/ | |
| var _sym_ids; | |
| /*std::string*/ function tokenTypeToText(/*TokenType*/ t) { | |
| switch (t) { | |
| case PREPROCESSOR: return "PREPROCESSOR"; | |
| case COMMENT: return "COMMENT"; | |
| case STRING: return "STRING"; | |
| case CONSTANT: return "CONSTANT"; | |
| case SYMBOLS: return "SYMBOLS"; | |
| case KEYWORD: return "KEYWORD"; | |
| case IDENTIFIER: return "IDENTIFIER"; | |
| case S_SYMBOL: return "S_SYMBOL"; | |
| default: return "bs"; | |
| } | |
| }; | |
| /*struct*/var Token; (function struct_Token(){ //silly struct and/or class polyfill :) | |
| var proto={type:undefined, contents:undefined}; | |
| Token=function Token(s, t){ | |
| var rto=Object.assign({},proto); rto.type=t; rto.__proto__=proto; | |
| if(typeof s=="string") rto.contents=s; else rto.contents=String(s); // make polyfill? for arrays and uint8arrays ?? | |
| return rto; | |
| }; | |
| proto.constructor=Token; Token.prototype=proto; | |
| })(); | |
| function isS_Symbol( c){ | |
| //pos | |
| var si=s_blockbrak.indexOf(c)+1, n_L, rid, isEnd=!(si&1); // is literal ("{}[]()") faster than refrence outer scope var? s_blockbrak | |
| if(si){ | |
| if(isEnd){ | |
| n_L=n_blockbrak[n_blockbrak.length-1]; | |
| if(s_blockbrak.indexOf(n_L[0])+2==si){ n_blockbrak.pop(),(rid=n_L[1]); } | |
| else rid="lonely_sbbp_"+saPos; | |
| }else{ //isBegin | |
| rid="sbbp_"+saPos; | |
| n_blockbrak.push([c,rid]); | |
| } | |
| si=Token(c,S_SYMBOL); si.sym_id=rid; si.isEnd=isEnd; | |
| saPos++; | |
| }else si=false; | |
| return si; | |
| }; | |
| function isWhiteSpace( c) { // f with "\u000c" ? | |
| return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; | |
| }; | |
| function isDecimalDigit( c) { | |
| return c >= '0' && c <= '9'; | |
| }; | |
| function isOctalDigit( c) { | |
| return c >= '0' && c < '8'; | |
| }; | |
| function isHexDigit( c) { | |
| return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); | |
| }; | |
| function skipWhiteSpace(/*const std::string&*/ s, /*unsigned int& pos */) { //make pos have an object property, pos? , which is the actual value | |
| let L=s.length; while (isWhiteSpace(s[pos]) && pos < L)pos++; | |
| }; | |
| function isSymbol(/*char*/ c) { | |
| const s = "~!%^&*()-+={[}]|:;,<.>/?"; | |
| //return !!(s.indexOf(c)+1); //will improve performance? | |
| for (var i = 0; i < 24; i++) | |
| if (c == s[i])return true; | |
| return false; | |
| }; | |
| function isValidNameCharacter( c) { | |
| return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_'; | |
| }; | |
| const /*char**/ keywords_ = [ "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", "char16_t", "char32_t", "class", "compl", "const", "constexpr", "const_cast", "continue", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "final", "float", "for", "friend", "goto", "if", "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not", "not_eq", "nullptr", "operator", "or", "or_eq", "override", "private", "protected", "public", "register", "reinterpret_cast", "return", "short", "signed", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", "template", "this", "thread_local", "throw", "true", "try", "typedef", "typeid", "typename", "union", "unsigned", "using", "virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq" ]; | |
| function isKeyword(/*std::string&*/ s) { | |
| var keywords=keywords_; | |
| var min = 0, max = 85, guess, comp,kw;//,L=700; | |
| while (max >= min/*&&L--*/) { | |
| guess = (min + max) >> 1; //divide by 2 with math.floor. (simulate int behavior) | |
| kw=keywords[guess]; comp = kw==s?0:(s>kw?1:-1); //s.compare(keywords[guess]); | |
| if (comp == 0) { | |
| return true; | |
| } | |
| else if (comp /*> 0 */ == 1) { | |
| min = guess + 1; | |
| } | |
| else { | |
| max = guess - 1; | |
| } | |
| //console.log("min = "+min+"; max = "+max+"; comp = "+comp+"; guess = "+guess+";"); | |
| }; | |
| //console.log("L is "+L); | |
| return false; | |
| }; | |
| /* | |
| add support for multi line preprocessor derectives | |
| add support for decimals constants ex: 1.09E-5 | |
| */ | |
| function/*Token*/ getNextToken( s/*, unsigned int& pos */) { | |
| var temp = ""; | |
| var start = pos, //.value? | |
| tmp2; | |
| while (pos < s.length) { | |
| if (s[pos] == '/') { | |
| if (pos + 1 < s.length && s[pos + 1] == '/' || s[pos + 1] == '*') { //comment | |
| pos += 2; | |
| if (s[pos - 1] == '/') { //single line comment (get everythinig up to end of line) | |
| while (pos < s.length && s[pos] != '\n') { pos++; } | |
| return Token(s.substr(start, pos - start), COMMENT); | |
| } | |
| else { //multiline comment (get everything until closing comment, or end of file) | |
| while (pos < s.length && !(pos > start + 2 && s[pos - 2] == '*' && s[pos - 1] == '/')) { pos++; } | |
| return Token(s.substr(start, pos - start), COMMENT); | |
| } | |
| } | |
| else { //operator (must be the '/' operator | |
| pos++; | |
| return Token("/", SYMBOLS); | |
| } | |
| } | |
| else if (s[pos] == '#') { //preprocessor directive (get everything until end of line, end of file, or start of a comment | |
| while (pos < s.length && s[pos] != '\n' && !(pos + 1 < s.length && s[pos] == '/' && (s[pos + 1] == '*' || s[pos + 1] == '/'))) { pos++; } | |
| return Token(s.substr(start, pos - start), PREPROCESSOR); | |
| } | |
| else if (s[pos] == '"') { //string literal (get everything until closing quoteation mark or end of file, ignore escaped quotes) | |
| while (pos < s.length && !(s[pos + 1] == '"' && s[pos] != '\\')) { pos++; } | |
| pos += 2; | |
| return Token(s.substr(start, pos - start), STRING); | |
| } | |
| else if (s[pos]=="'"){ //character (char) literal | |
| while (pos < s.length && !(s[pos + 1] == "'" && s[pos] != '\\')) { pos++; } | |
| pos += 2; | |
| return Token(s.substr(start, pos - start), STRING); | |
| } | |
| else if (isDecimalDigit(s[pos])) { //numerical constant (get until end of number) | |
| if (s[pos] == '0' && pos + 2 < s.length) { | |
| if ((s[pos + 1] == 'o' || s[pos + 1] == 'O') && isOctalDigit(s[pos + 2])) { //octal digit | |
| pos += 3; | |
| while (pos < s.length && isOctalDigit(s[pos])) { pos++; } | |
| } | |
| else if ((s[pos + 1] == 'x' || s[pos + 1] == 'X') && isHexDigit(s[pos + 2])) { //hex digit | |
| pos += 3; | |
| while (pos < s.length && isHexDigit(s[pos])) { pos++; } | |
| } | |
| else { | |
| while (pos < s.length && isDecimalDigit(s[pos])) { pos++; } | |
| } | |
| } | |
| else { | |
| while (pos < s.length && isDecimalDigit(s[pos])) { pos++; } | |
| } | |
| return Token(s.substr(start, pos - start), CONSTANT); | |
| } | |
| else if( tmp2= isS_Symbol(s[pos]) ){ //intentional assignment in conditional | |
| pos++; | |
| return tmp2; // M A G I C | |
| } | |
| else if (isSymbol(s[pos])) { //get all symbols (stop at first non symbol or comment | |
| pos++; | |
| while (pos < s.length && isSymbol(s[pos]) && !(s[pos] == '/' && pos + 1 < s.length && (s[pos + 1] == '/' || s[pos + 1] == '*'))) { pos++; } | |
| return Token(s.substr(start, pos - start), SYMBOLS); | |
| } | |
| else if (isValidNameCharacter(s[pos]) && !isDecimalDigit(s[pos])) { //keyword, or identifier | |
| pos++; | |
| while (pos < s.length && isValidNameCharacter(s[pos])) { pos++; } | |
| temp = s.substr(start, pos - start); | |
| if (isKeyword(temp)) { | |
| return Token(temp, KEYWORD); | |
| } | |
| else { | |
| return Token(temp, IDENTIFIER); | |
| } | |
| } | |
| else { | |
| while (pos < s.length && s[pos] != '/' && s[pos] != '"' && !isValidNameCharacter(s[pos]) && !isSymbol(s[pos])) { pos++; } | |
| return Token("/* analysis fail? : \n"+s.substr(start, pos - start).replace(/\/\*/g,"//END original multiline comment*/ /*")+"*/", COMMENT); //Token("//not writen yet", COMMENT); | |
| } | |
| } | |
| return Token("//end", COMMENT); | |
| }; | |
| function htmlPrettyPrint(/*const std::string&*/ s, /*bool*/ ofmt) { // ofmt is wholedoc, and set ofmt to 2 for info array | |
| pos=0; var /*pos = {value:0},*/ start, | |
| tmp1; | |
| n_blockbrak=[]; _sym_ids={}; | |
| var ret; | |
| if(ofmt==1){ ret= "<!DOCTYPE html>\n<html>\n<head>\n </head>\n<body>\n"; }else if(ofmt==2) ret=[]; else ret=""; | |
| if(ofmt!=2) ret+= "<style>\n pre.CODE { background-color:#000000;}\n span.PREPROCESSOR { color:#FFBBBB; }\n span.COMMENT { color:#33FF33; }\n span.KEYWORD { color:#FF33FF; }\n span.IDENTIFIER { color:#FFFFFF; }\n span.CONSTANT { color:#FF9933; }\n span.STRING { color:#EE8726; }\n span.SYMBOLS { color:#63E3FF; }\n span.S_SYMBOL { color:#5050FF; font-weight:bold; }\n //span.bigCollapseAble::before { content:\"\\/\"; background-color:#DDD; } \n </style>\n<pre class=\"CODE\">\n"; | |
| while (pos < s.length) { | |
| start = pos; | |
| skipWhiteSpace(s/*, pos*/); //skip initial whitespace | |
| if(pos!=start){ if(ofmt==2) ret.push({"type":"ws","contents":s.substring(start,pos)}); else ret += s.substr(start, pos - start); } | |
| var t = getNextToken(s/*, pos*/); | |
| if(ofmt==2){ | |
| if(t.type==S_SYMBOL){ if(!_sym_ids[t.sym_id]) _sym_ids[t.sym_id]=[]; _sym_ids[t.sym_id][t.isEnd&1]=ret.length; } | |
| ret.push(t); // or, ret.push([t.type,t.contents]); | |
| }else{ | |
| switch (t.type) { | |
| case PREPROCESSOR: | |
| ret += ("<span class=\"PREPROCESSOR\">" + t.contents + "</span>"); | |
| break; | |
| case COMMENT: | |
| ret += ("<span class=\"COMMENT\">" + t.contents + "</span>"); | |
| break; | |
| case STRING: | |
| ret += ("<span class=\"STRING\">" + t.contents + "</span>"); | |
| break; | |
| case CONSTANT: | |
| ret += ("<span class=\"CONSTANT\">" + t.contents + "</span>"); | |
| break; | |
| case SYMBOLS: | |
| ret += ("<span class=\"SYMBOLS\">" + t.contents + "</span>"); | |
| break; | |
| case S_SYMBOL: | |
| if(!t.isEnd) ret+="<span class=\"bigCollapseAble\" >"; | |
| ret += ("<span class=\"S_SYMBOL\" name=\""+t.sym_id+"\">" + t.contents + "</span>"); | |
| if(t.isEnd) ret+="</span>"; | |
| break; | |
| case KEYWORD: | |
| ret += ("<span class=\"KEYWORD\">" + t.contents + "</span>"); | |
| break; | |
| case IDENTIFIER: | |
| ret += ("<span class=\"IDENTIFIER\">" + t.contents + "</span>"); | |
| break; | |
| default:null; | |
| } | |
| } | |
| } | |
| if(ofmt==2) ret.sym_ids=_sym_ids; else ret += "</pre>"; | |
| if(ofmt==1) ret+="\n</body>\n</html>"; | |
| return ret; //std::cout << ret; | |
| }; | |
| //int main(int argc, const char** argv) { | |
| /*if (argc != 2) { | |
| std::cout << "usage: main src.cpp" << std::endl; | |
| return 0; | |
| }*/ /* | |
| FILE* f = fopen("G:\\cppToHtml\\cppPrettyHtml.cpp", "r"); | |
| if (!f)return 0; | |
| fseek(f, 0, SEEK_END); | |
| size_t size = ftell(f); | |
| fseek(f, 0, SEEK_SET); | |
| char* data = new char[size + 1]; | |
| fread(data, 1, size, f); | |
| fclose(f); | |
| data[size] = '\0'; | |
| htmlPrettyPrint(std::string(data)); //this here is the ONE that does it. | |
| return 0; | |
| } */ | |
| "fin / end"; | |
| var fi=document.createElement("input"); | |
| fi.type="file"; fi.multiple=true; | |
| //fi.setAttribute("webkitdirectory","true"); | |
| fi.onchange=function(a){ | |
| //filereader is probably depricated, ug. well it sucked anyway, but promise based coding sucks too. | |
| var fa=fi.files,L=fa.length,i=L,ti=L,c; | |
| while(i--){ c=fa[i]; c.text().then(function(s){ c.textC=s; ti--; if(ti==0&&typeof fi.ontextready=="function")fi.ontextready(); }); }; | |
| //using .text() promise because it's probably faster, .bytes() or .arrayBuffer() is probably better in case ansi or other non-utf8 files, but then I have to take the time to convert uint8array to string | |
| }; | |
| document.body.appendChild(fi); | |
| self.cpp_syntax_hi={"htmlPrettyPrint":htmlPrettyPrint,"file_e":fi}; | |
| var lasterHi, | |
| changeDisplayBrak=function(name,ONstate){ | |
| var ei=document.getElementsByName(name); for(var n=0,L=ei.length;n!=L;n++) ei[n].style.backgroundColor=ONstate?"#FFFF00":""; | |
| }; | |
| self.onmouseover=function(e){ //highlight matching brackets/ blocks / parenthesis | |
| var t=e.target||e.srcElement,ei,cs=t.getAttribute("class"); | |
| if(lasterHi&&lasterHi.length) changeDisplayBrak(lasterHi,false); //turn off when move off, use optional timeout setting? | |
| lasterHi=t.getAttribute("name"); | |
| if(cs&&cs.indexOf("S_SYMBOL")!=-1) changeDisplayBrak(lasterHi,true); | |
| }; | |
| })(); //end of //context isolator wrapper function? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment